def generate_answer(): question = request.form['question'] if question == '': return render_template('form_action.html', content='none') a = get_answer(question) print("FOUND: %d (<<%s>> -> %s)" % (len(a.sources), question, a.text)) higlighted_question = a.q.text for word in a.q.not_in_kw: higlighted_question = highlight_question_wrong(higlighted_question, word) sources = create_sources(a) return render_template('form_action.html', content='block', sources=sources, question=higlighted_question, answer=a.text, query=a.q.query)
def regenerate(splitname): # TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile from argus.main_frame import get_answer from separate_relevance import relevance_load from argus.features import feature_list_official as flo q_num = 0 info_files = [ open('tests/feature_prints/%s/%s.tsv' % (splitname, i_f), 'wb') for i_f in flo ] writers = [ csv.writer(info_file, delimiter='\t') for info_file in info_files ] info_all = open('tests/feature_prints/%s/all_features.tsv' % (splitname, ), 'wb') writer_all = csv.writer(info_all, delimiter='\t') info_rel = open( 'tests/feature_prints/%s/all_features_rel.tsv' % (splitname, ), 'wb') writer_rel = csv.writer(info_rel, delimiter='\t') info_turk = open( 'tests/feature_prints/%s/turk_sentences.tsv' % (splitname, ), 'wb') writer_turk = csv.writer(info_turk, delimiter=',') first = False r = relevance_load() with open('tests/f%s.tsv' % (splitname, ), 'wb') as featfile: writer = csv.writer(featfile, delimiter='\t') with open('tests/q%s.tsv' % (splitname, )) as qfile: i = 0 for line in csv.reader(qfile, delimiter='\t'): qorigin, qrunid, qtopic, qtext, qgsans, qsrc = line if i == 0: # CSV header i += 1 info = [ 'HITID', 'Question', 'TurkAnswer', 'OurAnswer', 'OurKeywords', 'FoundSentence', 'OurHeadline', 'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info' ] info += flo if q_num == 0: writer.writerow(info) writer_turk.writerow(['question', 'sentence']) first = True continue q_num += 1 # Generate answer from question; this implies generating # various question features ouranswer = get_answer(qtext) # Toggle comment to keep only sources that were manually # annotated as relevant at mturk # filter_sources(ouranswer) # Write details to various auxiliary csv files url = '' headline = '' sentence = '' source = '' feat = '' info = [] if len(ouranswer.sources) != 0: feature_print_all(writer_all, ouranswer, first, qgsans) feature_print_rel(writer_rel, ouranswer, r, first) feature_print(writers, ouranswer) turk_print(writer_turk, ouranswer) url = ouranswer.sources[0].url headline = ouranswer.sources[0].headline sentence = ouranswer.sources[0].sentence source = ouranswer.sources[0].source for j in range(len(flo)): for s in ouranswer.sources: feat += str(s.features[j].get_value()) + ":" feat = feat[:-1] info.append(feat) feat = '' # Write details to the output.tsv info = [ qrunid, qtext, qgsans, ouranswer.text, ouranswer.q.summary(), sentence, headline, qtopic, qsrc, url, source, ouranswer.info ] + info info = [field.encode('utf-8') for field in info] writer.writerow(info) ############### if q_num % 10 == 0: print 'answering question', splitname, q_num for i_f in info_files: i_f.close()
def evaluate(): # TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile from argus.main_frame import get_answer from separate_relevance import relevance_load from argus.features import feature_list_official as flo q_num = 0 info_files = [open('tests/feature_prints/' + i_f + '.tsv', 'wb') for i_f in flo] writers = [csv.writer(info_file, delimiter='\t') for info_file in info_files] info_all = open('tests/feature_prints/all_features.tsv', 'wb') writer_all = csv.writer(info_all, delimiter='\t') info_rel = open('tests/feature_prints/all_features_rel.tsv', 'wb') writer_rel = csv.writer(info_rel, delimiter='\t') info_turk = open('tests/feature_prints/turk_sentences.tsv', 'wb') writer_turk = csv.writer(info_turk, delimiter=',') first = False r = relevance_load() npy_rel = [] with open(OUTFILE, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter='\t') for csvfile in os.listdir(CSV_FOLDER): if not csvfile.endswith(".csv"): continue i = 0 for line in csv.reader(open(CSV_FOLDER + '/' + csvfile), delimiter=',', skipinitialspace=True): if i == 0: # CSV header i += 1 info = ['HITID', 'Question', 'TurkAnswer', 'OurAnswer', 'OurKeywords', 'FoundSentence', 'OurHeadline', 'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info'] info += flo if q_num == 0: writer.writerow(info) writer_turk.writerow(['question', 'sentence']) first = True continue if line[16] == 'Rejected': continue q_num += 1 # Generate answer from question ouranswer = get_answer(line[30]) # Toggle comment to keep only sources that were manually # annotated as relevant at mturk # filter_sources(ouranswer) # Write details to various auxiliary csv files url = '' headline = '' sentence = '' source = '' feat = '' info = [] turk_answer = line[28] if len(ouranswer.sources) != 0: feature_print_all(writer_all, ouranswer, first, turk_answer) feature_print_rel(writer_rel, ouranswer, r, first) feature_print(writers, ouranswer) turk_print(writer_turk, ouranswer) url = ouranswer.sources[0].url headline = ouranswer.sources[0].headline sentence = ouranswer.sources[0].sentence source = ouranswer.sources[0].source for j in range(len(flo)): for s in ouranswer.sources: feat += str(s.features[j].get_value()) + ":" feat = feat[:-1] info.append(feat) feat = '' # Write details to the output.tsv info = [line[0], line[30], turk_answer, ouranswer.text, ouranswer.q.query, sentence, headline, line[31], line[29], url, source, ouranswer.info] + info info = [field.encode('utf-8') for field in info] writer.writerow(info) # Store relevance features + gs for possible separate classifier # training for triplet in r: if ouranswer.q.text == triplet[0]: for s in ouranswer.sources: if s.sentence == triplet[1]: fs = [f.get_value() for f in s.features if '@' in f.get_type()] if len(npy_rel) == 0: npy_rel = np.array(fs + [triplet[-1] / 2]) else: npy_rel = np.vstack((npy_rel, np.array(fs + [triplet[-1] / 2]))) ############### if q_num % 10 == 0: print 'answering question', q_num for i_f in info_files: i_f.close() np.save('tests/batches/relevance/npy_rel', npy_rel)
def regenerate(splitname): # TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile from argus.main_frame import get_answer from separate_relevance import relevance_load from argus.features import feature_list_official as flo q_num = 0 info_files = [open('tests/feature_prints/%s/%s.tsv' % (splitname, i_f), 'wb') for i_f in flo] writers = [csv.writer(info_file, delimiter='\t') for info_file in info_files] info_all = open('tests/feature_prints/%s/all_features.tsv' % (splitname,), 'wb') writer_all = csv.writer(info_all, delimiter='\t') info_rel = open('tests/feature_prints/%s/all_features_rel.tsv' % (splitname,), 'wb') writer_rel = csv.writer(info_rel, delimiter='\t') info_turk = open('tests/feature_prints/%s/turk_sentences.tsv' % (splitname,), 'wb') writer_turk = csv.writer(info_turk, delimiter=',') first = False r = relevance_load() with open('tests/f%s.tsv' % (splitname,), 'wb') as featfile: writer = csv.writer(featfile, delimiter='\t') with open('tests/q%s.tsv' % (splitname,)) as qfile: i = 0 for line in csv.reader(qfile, delimiter='\t'): qorigin, qrunid, qtopic, qtext, qgsans, qsrc = line if i == 0: # CSV header i += 1 info = ['HITID', 'Question', 'TurkAnswer', 'OurAnswer', 'OurKeywords', 'FoundSentence', 'OurHeadline', 'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info'] info += flo if q_num == 0: writer.writerow(info) writer_turk.writerow(['question', 'sentence']) first = True continue q_num += 1 # Generate answer from question; this implies generating # various question features ouranswer = get_answer(qtext) # Toggle comment to keep only sources that were manually # annotated as relevant at mturk # filter_sources(ouranswer) # Write details to various auxiliary csv files url = '' headline = '' sentence = '' source = '' feat = '' info = [] if len(ouranswer.sources) != 0: feature_print_all(writer_all, ouranswer, first, qgsans) feature_print_rel(writer_rel, ouranswer, r, first) feature_print(writers, ouranswer) turk_print(writer_turk, ouranswer) url = ouranswer.sources[0].url headline = ouranswer.sources[0].headline sentence = ouranswer.sources[0].sentence source = ouranswer.sources[0].source for j in range(len(flo)): for s in ouranswer.sources: feat += str(s.features[j].get_value()) + ":" feat = feat[:-1] info.append(feat) feat = '' # Write details to the output.tsv info = [qrunid, qtext, qgsans, ouranswer.text, ouranswer.q.summary(), sentence, headline, qtopic, qsrc, url, source, ouranswer.info] + info info = [field.encode('utf-8') for field in info] writer.writerow(info) ############### if q_num % 10 == 0: print 'answering question', splitname, q_num for i_f in info_files: i_f.close()
def evaluate(): # TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile from argus.main_frame import get_answer from separate_relevance import relevance_load from argus.features import feature_list_official as flo q_num = 0 info_files = [ open('tests/feature_prints/' + i_f + '.tsv', 'wb') for i_f in flo ] writers = [ csv.writer(info_file, delimiter='\t') for info_file in info_files ] info_all = open('tests/feature_prints/all_features.tsv', 'wb') writer_all = csv.writer(info_all, delimiter='\t') info_rel = open('tests/feature_prints/all_features_rel.tsv', 'wb') writer_rel = csv.writer(info_rel, delimiter='\t') info_turk = open('tests/feature_prints/turk_sentences.tsv', 'wb') writer_turk = csv.writer(info_turk, delimiter=',') first = False r = relevance_load() npy_rel = [] with open(OUTFILE, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter='\t') for csvfile in os.listdir(CSV_FOLDER): if not csvfile.endswith(".csv"): continue i = 0 for line in csv.reader(open(CSV_FOLDER + '/' + csvfile), delimiter=',', skipinitialspace=True): if i == 0: # CSV header i += 1 info = [ 'HITID', 'Question', 'TurkAnswer', 'OurAnswer', 'OurKeywords', 'FoundSentence', 'OurHeadline', 'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info' ] info += flo if q_num == 0: writer.writerow(info) writer_turk.writerow(['question', 'sentence']) first = True continue if line[16] == 'Rejected': continue q_num += 1 # Generate answer from question ouranswer = get_answer(line[30]) # Toggle comment to keep only sources that were manually # annotated as relevant at mturk # filter_sources(ouranswer) # Write details to various auxiliary csv files url = '' headline = '' sentence = '' source = '' feat = '' info = [] turk_answer = line[28] if len(ouranswer.sources) != 0: feature_print_all(writer_all, ouranswer, first, turk_answer) feature_print_rel(writer_rel, ouranswer, r, first) feature_print(writers, ouranswer) turk_print(writer_turk, ouranswer) url = ouranswer.sources[0].url headline = ouranswer.sources[0].headline sentence = ouranswer.sources[0].sentence source = ouranswer.sources[0].source for j in range(len(flo)): for s in ouranswer.sources: feat += str(s.features[j].get_value()) + ":" feat = feat[:-1] info.append(feat) feat = '' # Write details to the output.tsv info = [ line[0], line[30], turk_answer, ouranswer.text, ouranswer.q.query, sentence, headline, line[31], line[29], url, source, ouranswer.info ] + info info = [field.encode('utf-8') for field in info] writer.writerow(info) # Store relevance features + gs for possible separate classifier # training for triplet in r: if ouranswer.q.text == triplet[0]: for s in ouranswer.sources: if s.sentence == triplet[1]: fs = [ f.get_value() for f in s.features if '@' in f.get_type() ] if len(npy_rel) == 0: npy_rel = np.array(fs + [triplet[-1] / 2]) else: npy_rel = np.vstack( (npy_rel, np.array(fs + [triplet[-1] / 2]))) ############### if q_num % 10 == 0: print 'answering question', q_num for i_f in info_files: i_f.close() np.save('tests/batches/relevance/npy_rel', npy_rel)