コード例 #1
0
ファイル: evaluate.py プロジェクト: rfarmaha/MSCI_541
def calculate_measures_results_files(qrel, results_files, output_location):
    if not os.path.exists(output_location):
        os.makedirs(output_location[:-1])
    csv_name = output_location + 'average_measures.csv'
    with open(csv_name, 'w') as csvfile:
        fieldnames = [
            'Run Name', 'Mean Average Precision', 'Mean P@10', 'Mean NDCG@10',
            'Mean NDCG@1000', 'Mean TBG'
        ]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    for r in results_files:
        filename = r.split("/")
        filename = filename[len(filename) - 1]
        name = filename.split(".")[0]
        print("Computing measures for: {}".format(name))
        try:
            results = ResultsParser(r).parse()
            measures = Measures(qrel, results[0], results[1])
            measures_to_csv(name, measures, output_location)
            append_to_average_file(csv_name, name, measures)
        except (ResultsParser.ResultsParseError, ValueError,
                FileNotFoundError) as e:
            print("Cannot print: {}, bad format".format(name))
            bad_format_run(name, output_location)
            continue
コード例 #2
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def unmute_notification(self, uuid, name: str):
        uuid = str(uuid)
        if not self.is_user_allowed(uuid):
            return False

        name = name.lower()
        if name != "all":
            var_name = Measures.find_var_by_name(name)
            if not var_name:
                return True

            mutes = self._config.get(uuid, "mute", fallback="")
            if mutes == "all":
                all_vars = Measures.ALL_VARS
                all_vars.remove(var_name)
                self._config[uuid]["mute"] = ";".join(all_vars)
            else:
                current_vars = mutes.split(";")
                if var_name in current_vars:
                    current_vars = current_vars.remove(var_name)
                if current_vars is None:
                    self._config[uuid]["mute"] = ""
                else:
                    self._config[uuid]["mute"] = ";".join(current_vars)

        else:
            self._config[uuid]["mute"] = ""

        self.save()
        return True
コード例 #3
0
 def compare_acc_procars(self, dir_path):
     accuracies = {}
     genome = self.parse(dir_path)
     anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt')
     for j in anc_genomes:
         print(j.get_name())
         accuracies[j.get_name()] = Measures.calculate_accuracy_measure(genome, j)
     return accuracies[min(accuracies)]
コード例 #4
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def add_notification(self, uuid, name, op, value):
        uuid = str(uuid)
        value = str(value)
        if not self.is_user_allowed(uuid):
            return False

        op_name = Measures.parse_op(op)
        if not op_name:
            return False

        var_name = Measures.find_var_by_name(name)
        if not var_name:
            return False

        record_name = "{0} {1}".format(var_name, op_name)
        self._config[uuid][record_name] = value
        self.save()
        return True
コード例 #5
0
 def compare_acc_GASTS(self, dir_path):
     accuracies = {}
     genomes = self.parse(dir_path)
     anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt')
     for i in genomes:
         for j in anc_genomes:
             if i == j:
                 accuracies[i] = Measures.calculate_accuracy_measure(genomes[i], anc_genomes[j])
     return accuracies
コード例 #6
0
 def compare_acc_Infercarspro(self, dir_path):
     accuracies = {}
     genomes = self.parse(dir_path)
     anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt')
     for i in genomes:
         for j in anc_genomes:
             print(j.get_name())
             if i == j.get_name():
                 accuracies[i] = Measures.calculate_accuracy_measure(genomes[i], j)
     return accuracies[min(accuracies)]
コード例 #7
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def is_notification_enabled(self, uuid, name):
        uuid = str(uuid)
        if not self.has_notification(uuid, name):
            return False

        var_name = Measures.find_var_by_name(name.lower())
        if not var_name:
            return False

        mutes = self._config.get(uuid, "mute", fallback="")
        return mutes != "all" and var_name not in mutes.split(";")
コード例 #8
0
def newInteration(lines, learnFactor, logger):
  execution = Execution(learnFactor, logger);

  linesTraining, linesTeste = execution.separateTrainingTestesLines(lines);


  execution.training(linesTraining);
  results = execution.testing(linesTeste);
  print(results);

  tableResults = Measures.getTableResults(results);
  
  accuracy = Measures.accuracy(tableResults)
  precision = Measures.precision(tableResults)
  recall = Measures.recall(tableResults)
  f1_score = Measures.f1_score(precision, recall)
  
  print('\naccuracy: ' + str(accuracy) + '% ' + 'precision: ' + str(precision) 
        + '% ' + 'recall: ' + str(recall) + '% ' + 'f1_score: ' + str(f1_score) + '%');
  
  return accuracy, precision, recall, f1_score;
コード例 #9
0
ファイル: ner.py プロジェクト: chrispool/PTA
	def __init__(self, argv):
		classifier = "ner/classifiers/" + "wikification.ser.gz"
		jar = "ner/stanford-ner-3.4.jar"
		self.tagger = NERTagger(classifier, jar)
		self.testfile = open(sys.argv[1])
		with open('html/htmlheader.txt', 'r') as h:
			self.htmlHeader = h.read()
		with open('html/htmlfooter.txt', 'r') as f:
			self.htmlFooter = f.read()
		
		self.measures = Measures()
		self.classify()
コード例 #10
0
def print_cpp_info(conn, project_info: ProjectInfo):
    print("\tC++ (files = {0}):".format(db.get_number_of_files(conn, "C++", project_info)
                                        + db.get_number_of_files(conn, "C/C++ Header", project_info)))
    print("\t\tCyclomatic complexity per function:")
    print("\t\t" + str(Measures.from_data(db.get_all_functions_ccn(conn, "C++", project_info)
                                          + db.get_all_functions_ccn(conn, "C/C++ Header", project_info))))

    print("\t\tCode lines per function:")
    print("\t\t" + str(Measures.from_data(db.get_all_functions_code_lines(conn, "C++", project_info)
                                          + db.get_all_functions_code_lines(conn, "C/C++ Header", project_info))))

    print("\t\tComment lines per function:")
    print("\t\t" + str(Measures.from_data(db.get_all_functions_comment_lines(conn, "C++", project_info)
                                          + db.get_all_functions_comment_lines(conn, "C/C++ Header", project_info))))

    print("\t\tCode lines per class:")
    print("\t\t" + str(Measures.from_data(db.get_all_classes_code_lines(conn, "C++", project_info)
                                          + db.get_all_classes_code_lines(conn, "C/C++ Header", project_info))))

    print("\t\tComment lines per class:")
    print("\t\t" + str(Measures.from_data(db.get_all_classes_comment_lines(conn, "C++", project_info)
                                          + db.get_all_classes_comment_lines(conn, "C/C++ Header", project_info))))

    print("\t\tMethods per class:")
    print("\t\t" + str(Measures.from_data(db.get_all_classes_n_methods(conn, "C++", project_info)
                                          + db.get_all_classes_n_methods(conn, "C/C++ Header", project_info))))
    print()
コード例 #11
0
def print_cs_info(conn):
    print("Overall C# (files = {0}):".format(db.get_number_of_files(
        conn, "C#")))
    print("\tCyclomatic complexity per method:")
    print("\t" + str(Measures.from_data(db.get_all_functions_ccn(conn, "C#"))))

    print("\tCode lines per method:")
    print("\t" +
          str(Measures.from_data(db.get_all_functions_code_lines(conn, "C#"))))

    print("\tComment lines per method:")
    print("\t" + str(
        Measures.from_data(db.get_all_functions_comment_lines(conn, "C#"))))

    print("\tCode lines per class:")
    print("\t" +
          str(Measures.from_data(db.get_all_classes_code_lines(conn, "C#"))))

    print("\tComment lines per class:")
    print(
        "\t" +
        str(Measures.from_data(db.get_all_classes_comment_lines(conn, "C#"))))

    print("\tMethods per class:")
    print("\t" +
          str(Measures.from_data(db.get_all_classes_n_methods(conn, "C#"))))
    print()
コード例 #12
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def has_notification(self, uuid, name):
        uuid = str(uuid)
        if not self._config.has_section(uuid):
            return False

        var_name = Measures.find_var_by_name(name)
        if not var_name:
            return False

        if len([x for x in self._config.options(uuid) if x.startswith(var_name)]) > 0:
            return True

        return False
コード例 #13
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def remove_notification(self, uuid, name):
        uuid = str(uuid)
        if not self.is_user_allowed(uuid):
            return False

        var_name = Measures.find_var_by_name(name)
        if not var_name:
            return False

        # now I have to find all records which start with this variable name and remove them
        for record_name in [x for x in self._config.options(uuid) if x.startswith(var_name)]:
            self._config.remove_option(uuid, record_name)

        self.save()
        return True
コード例 #14
0
ファイル: conf_manager.py プロジェクト: magnet-clip/bot
    def mute_notification(self, uuid, name: str):
        uuid = str(uuid)
        if not self.is_user_allowed(uuid):
            return False

        name = name.lower()
        if name != "all":
            var_name = Measures.find_var_by_name(name)
            if not var_name:
                return False

            mutes = self._config.get(uuid, "mute", fallback="")
            if mutes != "all" and var_name not in mutes.split(";"):
                self._config[uuid]["mute"] = mutes + ";" + var_name
        else:
            self._config[uuid]["mute"] = "all"

        self.save()
        return True
コード例 #15
0
def main(learnFactor):
  dateNow = datetime.now();
  dateNowFormated = dateNow.strftime('%Y-%m-%d %H:%M:%S')
  print('Start at: ' + dateNowFormated);
  
  # dirResutsName = dateNow.strftime('%Y-%m-%d_%H:%M');
  dirName = './results/' + 'predict2-' + str(learnFactor);
  
  try:
    os.mkdir(dirName);
    print("Directory " , dirName ,  " Created ") ;
  except:
    print("Directory " , dirName ,  " create error");
    
  planilhaDeCorpusTreino = open("./corpus.tsv", "r");
  lines = planilhaDeCorpusTreino.readlines();

  lines.pop(0);
  interationLogFile = Logger(dirName + '/interation_L01_I01.txt');
  bateryLogFile = Logger(dirName + '/batery_tests.tsv');
  learFactorLogFile = Logger(dirName + '/learFactor_tests.tsv');
  # resumeLogFile = Logger(dirName + '/resume_tests.txt');
  
  bateryTestsString = ("Taxa_aprendizado" + "\t" + "Interação" + "\t" + "Acuracia" + "\t" 
        + "Precisão" + "\t" + "Recall" + "\t" + "F1_score");
  bateryLogFile.write(bateryTestsString);
  
  learnTestsString = ("learnFactor" + "\t" + "acuracia_media" + "\t" 
        + "acuracia_dispersion" + "\t" + "acuracia_defaultError" + "\t" + "precision_media" + "\t" 
        + "precision_dispersion" + "\t" + "precision_defaultError" + "\t" + "recall_media" + "\t" 
        + "recall_dispersion" + "\t" + "recall_defaultError" + "\t" + "f1_score_media" + "\t" 
        + "f1_score_dispersion" + "\t" + "f1_score_defaultError");
    
  learFactorLogFile.write(learnTestsString);
      
  learnFactor = learnFactor;
  learFactorPass = 0.2;
  learnFactorMax = learnFactor + 0.1;
  
  
  while learnFactor < learnFactorMax:
    learnPercents = 100/((learnFactorMax - learnFactor)/learFactorPass);
    print('learn is in ' + str(learnPercents) + '%');
    accuracyValues = [];
    precisionValues = [];
    recallValues = [];
    scoresValues = [];
    
    
    for index in range(5):
      interationPercents = (100 * index)/float(5);
      print('interatction is in ' + str(interationPercents) + '%');
      print( 'learn :' + str(learnFactor) + ' interation: ' + str(index));
      accuracy, precision, recall, f1_score = newInteration(lines, learnFactor, interationLogFile);
      bateryTestsString = (str(learnFactor) + "\t" + str(index) + "\t" + str(accuracy) + "\t" 
        + str(precision) + "\t" + str(recall) + "\t" + str(f1_score));
      bateryLogFile.write(bateryTestsString);
      
      accuracyValues.append(accuracy);
      precisionValues.append(precision);
      recallValues.append(recall);
      scoresValues.append(f1_score);

    acuracia_media, acuracia_dispersion, acuracia_defaultError = Measures.valueStatistic(accuracyValues);
    
    precision_media, precision_dispersion, precision_defaultError = Measures.valueStatistic(precisionValues);
    
    recall_media, recall_dispersion, recall_defaultError = Measures.valueStatistic(recallValues);
    
    f1_score_media, f1_score_dispersion, f1_score_defaultError = Measures.valueStatistic(scoresValues);
    
    learnTestsString = (str(learnFactor) + "\t" + str(acuracia_media) + "\t" 
        + str(acuracia_dispersion) + "\t" + str(acuracia_defaultError) + "\t" + str(precision_media) + "\t" 
        + str(precision_dispersion) + "\t" + str(precision_defaultError) + "\t" + str(recall_media) + "\t" 
        + str(recall_dispersion) + "\t" + str(recall_defaultError) + "\t" + str(f1_score_media) + "\t" 
        + str(f1_score_dispersion) + "\t" + str(f1_score_defaultError));
    
    learFactorLogFile.write(learnTestsString);
    learnFactor += learFactorPass;
    
    
  interationLogFile.close();
  bateryLogFile.close();
  learFactorLogFile.close();
  
  dateNow2 = datetime.now();
  passTime = dateNow - dateNow2;
  passTimeInSec = passTime.total_seconds();
  passDays    = divmod(passTimeInSec, 86400);    
  passHours   = divmod(days[1], 3600);               
  passMinutes = divmod(hours[1], 60);                
  passSeconds = divmod(minutes[1], 1);   

  print('Runtime, calculate at %d days, %d hours, %d minutes and %d seconds' % 
    (passDays[0], passHours[0], passMinutes[0], passSeconds[0]))
コード例 #16
0
import gc
import machine

from wifi import wifi_connect
from api import api_send
from logs import logs_error
from measures import Measures
from settings import WIFI_SSID, WIFI_PASSWORD, COUNT_FOR_AVERAGE, SENDING_DELTA

wdt = machine.WDT(timeout=180000)

try:
    wifi_connect(WIFI_SSID, WIFI_PASSWORD)
    while True:
        measures = Measures(COUNT_FOR_AVERAGE, SENDING_DELTA)
        data = measures.get_averaged_measures()
        del (measures)
        gc.collect()
        api_send(data)
        wdt.feed()
        gc.collect()
except Exception as e:
    logs_error(str(e))
コード例 #17
0
### Evaluation script used for evaluation of baselines for MultiRC dataset
# The evaluation script expects the questions, and predicted answers from separate json files.
# The predicted answers should be 1s and 0s (no real-valued scores)

import json
from measures import Measures

# this is the location of your data; has to be downloaded from http://cogcomp.org/multirc/
inputFile = '../data/dev_83-fixedIds.json'

measures = Measures()


def main():
    eval('../baseline-scores/human-01.json')
    # eval('../baseline-scores/allOnes.json')
    # eval('../baseline-scores/allZeros.json')
    # eval('../baseline-scores/simpleLR.json')
    # eval('../baseline-scores/lucene_world.json')
    # eval('../baseline-scores/lucene_paragraphs.json')


# the input to the `eval` function is the file which contains the binary predictions per question-id
def eval(outFile):
    input = json.load(open(inputFile))
    output = json.load(open(outFile))
    output_map = dict([[a["pid"] + "==" + a["qid"], a["scores"]]
                       for a in output])

    assert len(output_map) == len(
        output), "You probably have redundancies in your keys"
コード例 #18
0
ファイル: project.py プロジェクト: UroBs17/ACDA
        layer1_dot_layer2 = np.dot(activate_i_dot_l1, self.weights_layer_2)
        return activations.sigmoid(layer1_dot_layer2)
if __name__ == '__main__':
    
    for i in range(7):
        f= open("data"+str(i+1)+".txt","r")
        inputs = []
        targets = []
        n = int(f.readline())
        for line in range(n):
            l = list(map(int, f.readline().strip().split()))
            inputs.append(l[:-1])
            targets.append([l[-1]])
        inputs = np.array(inputs)
        targets = np.array(targets)
        nn = network(inputs,targets)
        nn.fit(10000)
        pred = nn.predict(inputs)
        print(pred)
        print(targets)
        m = Measures(targets,pred)
        print("precision", m.precision())
        print("recall", m.recall())
        print("f1", m.f1())
        print("accuracy", m.accuracy())
        print("confusion matrix")
        for i in m.cmatrix():
            print(i)
           
        print("---------------")
        
コード例 #19
0
ファイル: neuron.py プロジェクト: UroBs17/ACDA
    dataset = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    weights = np.array([1, 1])
    theta = 1

    McCullochPitts = Neuron(weights, bias=False, theta=theta)

    real = np.array([0, 1, 1, 1])
    pred = np.array([])

    for data in dataset:
        McCullochPitts.process(data)
        McCullochPitts.activate(lambda v, t: 1 if v >= t else 0)
        pred = np.append(pred, McCullochPitts.z)

    m = Measures(real, pred)
    print("precision %s" % m.precision())
    print("recall %s" % m.recall())
    print("f1 %s" % m.f1())
    print("accuracy %s" % m.accuracy())

    Miscellaneous.printcmatrix(m.cmatrix())

    x1 = np.linspace(-3, 3, 10)
    x2 = np.linspace(-3, 3, 10)

    v = -(weights[0] / weights[1]) * x1 + theta / weights[1]

    plt.fill_between(x1, v, 3, where=v < 3, color='g', alpha=0.5)

    ones = dataset[np.where(real > 0)]
コード例 #20
0
ファイル: ner.py プロジェクト: chrispool/PTA
class Ner():
	
	def __init__(self, argv):
		classifier = "ner/classifiers/" + "wikification.ser.gz"
		jar = "ner/stanford-ner-3.4.jar"
		self.tagger = NERTagger(classifier, jar)
		self.testfile = open(sys.argv[1])
		with open('html/htmlheader.txt', 'r') as h:
			self.htmlHeader = h.read()
		with open('html/htmlfooter.txt', 'r') as f:
			self.htmlFooter = f.read()
		
		self.measures = Measures()
		self.classify()
	
	def cleanData(self, line):
		#function to clean wrong annotated data
		if len(line) > 6:	
			if line[6] == '-':
				line[6] = ''
		if len(line) > 7:
			if line[7] == '-':
				line[7] = ''
		return line

	def classify(self):
		#create test data with as key document and tuple of word and label
		testdata = defaultdict(list)
		tokens = defaultdict(list)
		for line in self.testfile:
			e = self.cleanData(line.strip().split())
			if len(e) == 6:
				testdata[e[0]].append([e[0],e[1],e[2],e[3], e[4], e[5], 'O', ''])
			elif len(e) == 7:
				testdata[e[0]].append([e[0],e[1],e[2],e[3],e[4], e[5], e[6], ''])
			elif len(e) == 8:
				testdata[e[0]].append([e[0],e[1],e[2],e[3],e[4], e[5], e[6],e[7]])
			
			if len(e) < 4:
				#print(e)
				pass		
			else:
				tokens[e[0]].append(e[4]) #store tokens of this document
		#add classification
		for n,doc in enumerate(testdata):
			
			taggedDoc = self.tagger.tag(tokens[doc])
			taggedTokens = []
			for sentence in taggedDoc:
				taggedTokens.extend(sentence)

			for i,line in enumerate(testdata[doc]):
				expectedCategory = taggedTokens[i][1]
				testdata[doc][i].append(expectedCategory)

			#use entire doc for getting wiki links
			
			wikiLinks = self.getWikiLinks(testdata[doc])
			for i,line in enumerate(testdata[doc]):
				expectedLink = wikiLinks[i]
				testdata[doc][i].append(expectedLink)

		self.saveFile(testdata)
		self.measures.calculate(testdata) #use the measures script
		self.saveHTML(testdata)

	def saveFile(self, testdata):
		os.remove("data/output.txt")
		with open("data/output.txt", "a") as outputFile:
			for doc in testdata:
				for e in testdata[doc]:
					lineString = "{} {} {}".format(" ".join(e[0:6]), e[8], e[9])
					outputFile.write(lineString.strip() + '\n')


	def saveHTML(self, testdata):
		for html in glob.glob("html/*.html"):
 			os.remove(html)
		docs = []
		for doc in testdata:
			filename = doc + ".html"
			with open("html/" + filename, "a") as htmlfile:
				docs.append((filename, doc))
				htmlfile.write(self.htmlHeader)
				htmlfile.write('<h1>Document '+ doc +'</h1>\n')
				for line in testdata[doc]:
					if line[9] != '':
						url = line[9].split(",")
						htmlfile.write('<a data-toggle="tooltip" data-placement="top" title="Category: ' + line[8] + '" href="' + url[0] + '" target="_blank" class="' + line[8] + '">' + line[4] + ' </a>')
					else:
						htmlfile.write(line[4] + " ")
				
				htmlfile.write('<br /><br /><a href="index.html" class="btn btn-default">Back</a>\n')
				htmlfile.write(self.htmlFooter)

				htmlfile.close()
		
		with open("html/index.html", "a") as htmlfile:
			htmlfile.write(self.htmlHeader)
			htmlfile.write('<h1>List of documents</h1>\n')
			for link in docs:
				htmlfile.write('<li><a href="'+link[0]+'">'+link[1]+'</a></li>\n')
			htmlfile.write(self.htmlFooter)
		

		with open("html/classify.html", "a") as htmlfile:
			htmlfile.write(self.htmlHeader)
			htmlfile.write('<h1>Classify</h1>\n')
			htmlfile.write('''<form action="../htmlClassifier.py" method="post">
							First Name: <input type="text" name="first_name"><br />
							Last Name: <input type="text" name="last_name" />

							<input type="submit" value="Submit" />
							</form>''')
			htmlfile.write(self.htmlFooter)
		

	def getWikiLinks(self, doc):
		#make word pairs, for example new york as one query
		test = []
		skip = 0
		currentToken = []
		lastToken = 'O'
		keywords = []
		result = [''] * len(doc) #make list with default NONE tag
		for i, token in enumerate(doc):				
			if token[8] == lastToken:
				currentToken.append(i)		
			else:
				if lastToken is not 'O':
					keywords.append(currentToken)
				currentToken = [i]
			lastToken = token[8]

		for keyword in keywords:
			query = ''
			for token in keyword:
				query += doc[token][4] + "%20"
			

			url = 'http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch='+query[:-3]+'&format=json'
			
			with urllib.request.urlopen(url) as response:
				str_response = response.readall().decode('utf-8')
				data = json.loads(str_response)
			
			links = []
			for d in data:
				for r in data[d]:
					if r == 'search':
						for s in data[d][r]:
							
							if 'snippet' in s:
								
								links.append('http://en.wikipedia.org/wiki/' + s['title'].replace(" ", "_"))

			if len(links) > 0:

				link = links[0] #todo, check if other links are better
			else:
				link = 'NONE'
			for token in keyword:
				result[token] = link+",1"
		
		return result