def calculate_measures_results_files(qrel, results_files, output_location): if not os.path.exists(output_location): os.makedirs(output_location[:-1]) csv_name = output_location + 'average_measures.csv' with open(csv_name, 'w') as csvfile: fieldnames = [ 'Run Name', 'Mean Average Precision', 'Mean P@10', 'Mean NDCG@10', 'Mean NDCG@1000', 'Mean TBG' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for r in results_files: filename = r.split("/") filename = filename[len(filename) - 1] name = filename.split(".")[0] print("Computing measures for: {}".format(name)) try: results = ResultsParser(r).parse() measures = Measures(qrel, results[0], results[1]) measures_to_csv(name, measures, output_location) append_to_average_file(csv_name, name, measures) except (ResultsParser.ResultsParseError, ValueError, FileNotFoundError) as e: print("Cannot print: {}, bad format".format(name)) bad_format_run(name, output_location) continue
def unmute_notification(self, uuid, name: str): uuid = str(uuid) if not self.is_user_allowed(uuid): return False name = name.lower() if name != "all": var_name = Measures.find_var_by_name(name) if not var_name: return True mutes = self._config.get(uuid, "mute", fallback="") if mutes == "all": all_vars = Measures.ALL_VARS all_vars.remove(var_name) self._config[uuid]["mute"] = ";".join(all_vars) else: current_vars = mutes.split(";") if var_name in current_vars: current_vars = current_vars.remove(var_name) if current_vars is None: self._config[uuid]["mute"] = "" else: self._config[uuid]["mute"] = ";".join(current_vars) else: self._config[uuid]["mute"] = "" self.save() return True
def compare_acc_procars(self, dir_path): accuracies = {} genome = self.parse(dir_path) anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt') for j in anc_genomes: print(j.get_name()) accuracies[j.get_name()] = Measures.calculate_accuracy_measure(genome, j) return accuracies[min(accuracies)]
def add_notification(self, uuid, name, op, value): uuid = str(uuid) value = str(value) if not self.is_user_allowed(uuid): return False op_name = Measures.parse_op(op) if not op_name: return False var_name = Measures.find_var_by_name(name) if not var_name: return False record_name = "{0} {1}".format(var_name, op_name) self._config[uuid][record_name] = value self.save() return True
def compare_acc_GASTS(self, dir_path): accuracies = {} genomes = self.parse(dir_path) anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt') for i in genomes: for j in anc_genomes: if i == j: accuracies[i] = Measures.calculate_accuracy_measure(genomes[i], anc_genomes[j]) return accuracies
def compare_acc_Infercarspro(self, dir_path): accuracies = {} genomes = self.parse(dir_path) anc_genomes = Handler.parse_genomes_in_grimm_file(dir_path + '/ancestral.txt') for i in genomes: for j in anc_genomes: print(j.get_name()) if i == j.get_name(): accuracies[i] = Measures.calculate_accuracy_measure(genomes[i], j) return accuracies[min(accuracies)]
def is_notification_enabled(self, uuid, name): uuid = str(uuid) if not self.has_notification(uuid, name): return False var_name = Measures.find_var_by_name(name.lower()) if not var_name: return False mutes = self._config.get(uuid, "mute", fallback="") return mutes != "all" and var_name not in mutes.split(";")
def newInteration(lines, learnFactor, logger): execution = Execution(learnFactor, logger); linesTraining, linesTeste = execution.separateTrainingTestesLines(lines); execution.training(linesTraining); results = execution.testing(linesTeste); print(results); tableResults = Measures.getTableResults(results); accuracy = Measures.accuracy(tableResults) precision = Measures.precision(tableResults) recall = Measures.recall(tableResults) f1_score = Measures.f1_score(precision, recall) print('\naccuracy: ' + str(accuracy) + '% ' + 'precision: ' + str(precision) + '% ' + 'recall: ' + str(recall) + '% ' + 'f1_score: ' + str(f1_score) + '%'); return accuracy, precision, recall, f1_score;
def __init__(self, argv): classifier = "ner/classifiers/" + "wikification.ser.gz" jar = "ner/stanford-ner-3.4.jar" self.tagger = NERTagger(classifier, jar) self.testfile = open(sys.argv[1]) with open('html/htmlheader.txt', 'r') as h: self.htmlHeader = h.read() with open('html/htmlfooter.txt', 'r') as f: self.htmlFooter = f.read() self.measures = Measures() self.classify()
def print_cpp_info(conn, project_info: ProjectInfo): print("\tC++ (files = {0}):".format(db.get_number_of_files(conn, "C++", project_info) + db.get_number_of_files(conn, "C/C++ Header", project_info))) print("\t\tCyclomatic complexity per function:") print("\t\t" + str(Measures.from_data(db.get_all_functions_ccn(conn, "C++", project_info) + db.get_all_functions_ccn(conn, "C/C++ Header", project_info)))) print("\t\tCode lines per function:") print("\t\t" + str(Measures.from_data(db.get_all_functions_code_lines(conn, "C++", project_info) + db.get_all_functions_code_lines(conn, "C/C++ Header", project_info)))) print("\t\tComment lines per function:") print("\t\t" + str(Measures.from_data(db.get_all_functions_comment_lines(conn, "C++", project_info) + db.get_all_functions_comment_lines(conn, "C/C++ Header", project_info)))) print("\t\tCode lines per class:") print("\t\t" + str(Measures.from_data(db.get_all_classes_code_lines(conn, "C++", project_info) + db.get_all_classes_code_lines(conn, "C/C++ Header", project_info)))) print("\t\tComment lines per class:") print("\t\t" + str(Measures.from_data(db.get_all_classes_comment_lines(conn, "C++", project_info) + db.get_all_classes_comment_lines(conn, "C/C++ Header", project_info)))) print("\t\tMethods per class:") print("\t\t" + str(Measures.from_data(db.get_all_classes_n_methods(conn, "C++", project_info) + db.get_all_classes_n_methods(conn, "C/C++ Header", project_info)))) print()
def print_cs_info(conn): print("Overall C# (files = {0}):".format(db.get_number_of_files( conn, "C#"))) print("\tCyclomatic complexity per method:") print("\t" + str(Measures.from_data(db.get_all_functions_ccn(conn, "C#")))) print("\tCode lines per method:") print("\t" + str(Measures.from_data(db.get_all_functions_code_lines(conn, "C#")))) print("\tComment lines per method:") print("\t" + str( Measures.from_data(db.get_all_functions_comment_lines(conn, "C#")))) print("\tCode lines per class:") print("\t" + str(Measures.from_data(db.get_all_classes_code_lines(conn, "C#")))) print("\tComment lines per class:") print( "\t" + str(Measures.from_data(db.get_all_classes_comment_lines(conn, "C#")))) print("\tMethods per class:") print("\t" + str(Measures.from_data(db.get_all_classes_n_methods(conn, "C#")))) print()
def has_notification(self, uuid, name): uuid = str(uuid) if not self._config.has_section(uuid): return False var_name = Measures.find_var_by_name(name) if not var_name: return False if len([x for x in self._config.options(uuid) if x.startswith(var_name)]) > 0: return True return False
def remove_notification(self, uuid, name): uuid = str(uuid) if not self.is_user_allowed(uuid): return False var_name = Measures.find_var_by_name(name) if not var_name: return False # now I have to find all records which start with this variable name and remove them for record_name in [x for x in self._config.options(uuid) if x.startswith(var_name)]: self._config.remove_option(uuid, record_name) self.save() return True
def mute_notification(self, uuid, name: str): uuid = str(uuid) if not self.is_user_allowed(uuid): return False name = name.lower() if name != "all": var_name = Measures.find_var_by_name(name) if not var_name: return False mutes = self._config.get(uuid, "mute", fallback="") if mutes != "all" and var_name not in mutes.split(";"): self._config[uuid]["mute"] = mutes + ";" + var_name else: self._config[uuid]["mute"] = "all" self.save() return True
def main(learnFactor): dateNow = datetime.now(); dateNowFormated = dateNow.strftime('%Y-%m-%d %H:%M:%S') print('Start at: ' + dateNowFormated); # dirResutsName = dateNow.strftime('%Y-%m-%d_%H:%M'); dirName = './results/' + 'predict2-' + str(learnFactor); try: os.mkdir(dirName); print("Directory " , dirName , " Created ") ; except: print("Directory " , dirName , " create error"); planilhaDeCorpusTreino = open("./corpus.tsv", "r"); lines = planilhaDeCorpusTreino.readlines(); lines.pop(0); interationLogFile = Logger(dirName + '/interation_L01_I01.txt'); bateryLogFile = Logger(dirName + '/batery_tests.tsv'); learFactorLogFile = Logger(dirName + '/learFactor_tests.tsv'); # resumeLogFile = Logger(dirName + '/resume_tests.txt'); bateryTestsString = ("Taxa_aprendizado" + "\t" + "Interação" + "\t" + "Acuracia" + "\t" + "Precisão" + "\t" + "Recall" + "\t" + "F1_score"); bateryLogFile.write(bateryTestsString); learnTestsString = ("learnFactor" + "\t" + "acuracia_media" + "\t" + "acuracia_dispersion" + "\t" + "acuracia_defaultError" + "\t" + "precision_media" + "\t" + "precision_dispersion" + "\t" + "precision_defaultError" + "\t" + "recall_media" + "\t" + "recall_dispersion" + "\t" + "recall_defaultError" + "\t" + "f1_score_media" + "\t" + "f1_score_dispersion" + "\t" + "f1_score_defaultError"); learFactorLogFile.write(learnTestsString); learnFactor = learnFactor; learFactorPass = 0.2; learnFactorMax = learnFactor + 0.1; while learnFactor < learnFactorMax: learnPercents = 100/((learnFactorMax - learnFactor)/learFactorPass); print('learn is in ' + str(learnPercents) + '%'); accuracyValues = []; precisionValues = []; recallValues = []; scoresValues = []; for index in range(5): interationPercents = (100 * index)/float(5); print('interatction is in ' + str(interationPercents) + '%'); print( 'learn :' + str(learnFactor) + ' interation: ' + str(index)); accuracy, precision, recall, f1_score = newInteration(lines, learnFactor, interationLogFile); bateryTestsString = (str(learnFactor) + "\t" + str(index) + "\t" + str(accuracy) + "\t" + str(precision) + "\t" + str(recall) + "\t" + str(f1_score)); bateryLogFile.write(bateryTestsString); accuracyValues.append(accuracy); precisionValues.append(precision); recallValues.append(recall); scoresValues.append(f1_score); acuracia_media, acuracia_dispersion, acuracia_defaultError = Measures.valueStatistic(accuracyValues); precision_media, precision_dispersion, precision_defaultError = Measures.valueStatistic(precisionValues); recall_media, recall_dispersion, recall_defaultError = Measures.valueStatistic(recallValues); f1_score_media, f1_score_dispersion, f1_score_defaultError = Measures.valueStatistic(scoresValues); learnTestsString = (str(learnFactor) + "\t" + str(acuracia_media) + "\t" + str(acuracia_dispersion) + "\t" + str(acuracia_defaultError) + "\t" + str(precision_media) + "\t" + str(precision_dispersion) + "\t" + str(precision_defaultError) + "\t" + str(recall_media) + "\t" + str(recall_dispersion) + "\t" + str(recall_defaultError) + "\t" + str(f1_score_media) + "\t" + str(f1_score_dispersion) + "\t" + str(f1_score_defaultError)); learFactorLogFile.write(learnTestsString); learnFactor += learFactorPass; interationLogFile.close(); bateryLogFile.close(); learFactorLogFile.close(); dateNow2 = datetime.now(); passTime = dateNow - dateNow2; passTimeInSec = passTime.total_seconds(); passDays = divmod(passTimeInSec, 86400); passHours = divmod(days[1], 3600); passMinutes = divmod(hours[1], 60); passSeconds = divmod(minutes[1], 1); print('Runtime, calculate at %d days, %d hours, %d minutes and %d seconds' % (passDays[0], passHours[0], passMinutes[0], passSeconds[0]))
import gc import machine from wifi import wifi_connect from api import api_send from logs import logs_error from measures import Measures from settings import WIFI_SSID, WIFI_PASSWORD, COUNT_FOR_AVERAGE, SENDING_DELTA wdt = machine.WDT(timeout=180000) try: wifi_connect(WIFI_SSID, WIFI_PASSWORD) while True: measures = Measures(COUNT_FOR_AVERAGE, SENDING_DELTA) data = measures.get_averaged_measures() del (measures) gc.collect() api_send(data) wdt.feed() gc.collect() except Exception as e: logs_error(str(e))
### Evaluation script used for evaluation of baselines for MultiRC dataset # The evaluation script expects the questions, and predicted answers from separate json files. # The predicted answers should be 1s and 0s (no real-valued scores) import json from measures import Measures # this is the location of your data; has to be downloaded from http://cogcomp.org/multirc/ inputFile = '../data/dev_83-fixedIds.json' measures = Measures() def main(): eval('../baseline-scores/human-01.json') # eval('../baseline-scores/allOnes.json') # eval('../baseline-scores/allZeros.json') # eval('../baseline-scores/simpleLR.json') # eval('../baseline-scores/lucene_world.json') # eval('../baseline-scores/lucene_paragraphs.json') # the input to the `eval` function is the file which contains the binary predictions per question-id def eval(outFile): input = json.load(open(inputFile)) output = json.load(open(outFile)) output_map = dict([[a["pid"] + "==" + a["qid"], a["scores"]] for a in output]) assert len(output_map) == len( output), "You probably have redundancies in your keys"
layer1_dot_layer2 = np.dot(activate_i_dot_l1, self.weights_layer_2) return activations.sigmoid(layer1_dot_layer2) if __name__ == '__main__': for i in range(7): f= open("data"+str(i+1)+".txt","r") inputs = [] targets = [] n = int(f.readline()) for line in range(n): l = list(map(int, f.readline().strip().split())) inputs.append(l[:-1]) targets.append([l[-1]]) inputs = np.array(inputs) targets = np.array(targets) nn = network(inputs,targets) nn.fit(10000) pred = nn.predict(inputs) print(pred) print(targets) m = Measures(targets,pred) print("precision", m.precision()) print("recall", m.recall()) print("f1", m.f1()) print("accuracy", m.accuracy()) print("confusion matrix") for i in m.cmatrix(): print(i) print("---------------")
dataset = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) weights = np.array([1, 1]) theta = 1 McCullochPitts = Neuron(weights, bias=False, theta=theta) real = np.array([0, 1, 1, 1]) pred = np.array([]) for data in dataset: McCullochPitts.process(data) McCullochPitts.activate(lambda v, t: 1 if v >= t else 0) pred = np.append(pred, McCullochPitts.z) m = Measures(real, pred) print("precision %s" % m.precision()) print("recall %s" % m.recall()) print("f1 %s" % m.f1()) print("accuracy %s" % m.accuracy()) Miscellaneous.printcmatrix(m.cmatrix()) x1 = np.linspace(-3, 3, 10) x2 = np.linspace(-3, 3, 10) v = -(weights[0] / weights[1]) * x1 + theta / weights[1] plt.fill_between(x1, v, 3, where=v < 3, color='g', alpha=0.5) ones = dataset[np.where(real > 0)]
class Ner(): def __init__(self, argv): classifier = "ner/classifiers/" + "wikification.ser.gz" jar = "ner/stanford-ner-3.4.jar" self.tagger = NERTagger(classifier, jar) self.testfile = open(sys.argv[1]) with open('html/htmlheader.txt', 'r') as h: self.htmlHeader = h.read() with open('html/htmlfooter.txt', 'r') as f: self.htmlFooter = f.read() self.measures = Measures() self.classify() def cleanData(self, line): #function to clean wrong annotated data if len(line) > 6: if line[6] == '-': line[6] = '' if len(line) > 7: if line[7] == '-': line[7] = '' return line def classify(self): #create test data with as key document and tuple of word and label testdata = defaultdict(list) tokens = defaultdict(list) for line in self.testfile: e = self.cleanData(line.strip().split()) if len(e) == 6: testdata[e[0]].append([e[0],e[1],e[2],e[3], e[4], e[5], 'O', '']) elif len(e) == 7: testdata[e[0]].append([e[0],e[1],e[2],e[3],e[4], e[5], e[6], '']) elif len(e) == 8: testdata[e[0]].append([e[0],e[1],e[2],e[3],e[4], e[5], e[6],e[7]]) if len(e) < 4: #print(e) pass else: tokens[e[0]].append(e[4]) #store tokens of this document #add classification for n,doc in enumerate(testdata): taggedDoc = self.tagger.tag(tokens[doc]) taggedTokens = [] for sentence in taggedDoc: taggedTokens.extend(sentence) for i,line in enumerate(testdata[doc]): expectedCategory = taggedTokens[i][1] testdata[doc][i].append(expectedCategory) #use entire doc for getting wiki links wikiLinks = self.getWikiLinks(testdata[doc]) for i,line in enumerate(testdata[doc]): expectedLink = wikiLinks[i] testdata[doc][i].append(expectedLink) self.saveFile(testdata) self.measures.calculate(testdata) #use the measures script self.saveHTML(testdata) def saveFile(self, testdata): os.remove("data/output.txt") with open("data/output.txt", "a") as outputFile: for doc in testdata: for e in testdata[doc]: lineString = "{} {} {}".format(" ".join(e[0:6]), e[8], e[9]) outputFile.write(lineString.strip() + '\n') def saveHTML(self, testdata): for html in glob.glob("html/*.html"): os.remove(html) docs = [] for doc in testdata: filename = doc + ".html" with open("html/" + filename, "a") as htmlfile: docs.append((filename, doc)) htmlfile.write(self.htmlHeader) htmlfile.write('<h1>Document '+ doc +'</h1>\n') for line in testdata[doc]: if line[9] != '': url = line[9].split(",") htmlfile.write('<a data-toggle="tooltip" data-placement="top" title="Category: ' + line[8] + '" href="' + url[0] + '" target="_blank" class="' + line[8] + '">' + line[4] + ' </a>') else: htmlfile.write(line[4] + " ") htmlfile.write('<br /><br /><a href="index.html" class="btn btn-default">Back</a>\n') htmlfile.write(self.htmlFooter) htmlfile.close() with open("html/index.html", "a") as htmlfile: htmlfile.write(self.htmlHeader) htmlfile.write('<h1>List of documents</h1>\n') for link in docs: htmlfile.write('<li><a href="'+link[0]+'">'+link[1]+'</a></li>\n') htmlfile.write(self.htmlFooter) with open("html/classify.html", "a") as htmlfile: htmlfile.write(self.htmlHeader) htmlfile.write('<h1>Classify</h1>\n') htmlfile.write('''<form action="../htmlClassifier.py" method="post"> First Name: <input type="text" name="first_name"><br /> Last Name: <input type="text" name="last_name" /> <input type="submit" value="Submit" /> </form>''') htmlfile.write(self.htmlFooter) def getWikiLinks(self, doc): #make word pairs, for example new york as one query test = [] skip = 0 currentToken = [] lastToken = 'O' keywords = [] result = [''] * len(doc) #make list with default NONE tag for i, token in enumerate(doc): if token[8] == lastToken: currentToken.append(i) else: if lastToken is not 'O': keywords.append(currentToken) currentToken = [i] lastToken = token[8] for keyword in keywords: query = '' for token in keyword: query += doc[token][4] + "%20" url = 'http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch='+query[:-3]+'&format=json' with urllib.request.urlopen(url) as response: str_response = response.readall().decode('utf-8') data = json.loads(str_response) links = [] for d in data: for r in data[d]: if r == 'search': for s in data[d][r]: if 'snippet' in s: links.append('http://en.wikipedia.org/wiki/' + s['title'].replace(" ", "_")) if len(links) > 0: link = links[0] #todo, check if other links are better else: link = 'NONE' for token in keyword: result[token] = link+",1" return result