コード例 #1
0
ファイル: web_interface.py プロジェクト: ittailup/argus
def generate_answer():
    question = request.form['question']
    if question == '':
        return render_template('form_action.html', content='none')

    a = get_answer(question)
    print("FOUND: %d (<<%s>> -> %s)" % (len(a.sources), question, a.text))

    higlighted_question = a.q.text
    for word in a.q.not_in_kw:
        higlighted_question = highlight_question_wrong(higlighted_question, word)

    sources = create_sources(a)

    return render_template('form_action.html', content='block', sources=sources, question=higlighted_question,
                           answer=a.text, query=a.q.query)
コード例 #2
0
def generate_answer():
    question = request.form['question']
    if question == '':
        return render_template('form_action.html', content='none')

    a = get_answer(question)
    print("FOUND: %d (<<%s>> -> %s)" % (len(a.sources), question, a.text))

    higlighted_question = a.q.text
    for word in a.q.not_in_kw:
        higlighted_question = highlight_question_wrong(higlighted_question,
                                                       word)

    sources = create_sources(a)

    return render_template('form_action.html',
                           content='block',
                           sources=sources,
                           question=higlighted_question,
                           answer=a.text,
                           query=a.q.query)
コード例 #3
0
def regenerate(splitname):
    #  TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile
    from argus.main_frame import get_answer
    from separate_relevance import relevance_load
    from argus.features import feature_list_official as flo
    q_num = 0
    info_files = [
        open('tests/feature_prints/%s/%s.tsv' % (splitname, i_f), 'wb')
        for i_f in flo
    ]
    writers = [
        csv.writer(info_file, delimiter='\t') for info_file in info_files
    ]
    info_all = open('tests/feature_prints/%s/all_features.tsv' % (splitname, ),
                    'wb')
    writer_all = csv.writer(info_all, delimiter='\t')
    info_rel = open(
        'tests/feature_prints/%s/all_features_rel.tsv' % (splitname, ), 'wb')
    writer_rel = csv.writer(info_rel, delimiter='\t')
    info_turk = open(
        'tests/feature_prints/%s/turk_sentences.tsv' % (splitname, ), 'wb')
    writer_turk = csv.writer(info_turk, delimiter=',')
    first = False
    r = relevance_load()
    with open('tests/f%s.tsv' % (splitname, ), 'wb') as featfile:
        writer = csv.writer(featfile, delimiter='\t')
        with open('tests/q%s.tsv' % (splitname, )) as qfile:
            i = 0
            for line in csv.reader(qfile, delimiter='\t'):
                qorigin, qrunid, qtopic, qtext, qgsans, qsrc = line

                if i == 0:
                    # CSV header
                    i += 1
                    info = [
                        'HITID', 'Question', 'TurkAnswer', 'OurAnswer',
                        'OurKeywords', 'FoundSentence', 'OurHeadline',
                        'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info'
                    ]
                    info += flo
                    if q_num == 0:
                        writer.writerow(info)
                        writer_turk.writerow(['question', 'sentence'])
                        first = True
                    continue
                q_num += 1

                # Generate answer from question; this implies generating
                # various question features
                ouranswer = get_answer(qtext)

                # Toggle comment to keep only sources that were manually
                # annotated as relevant at mturk
                # filter_sources(ouranswer)

                # Write details to various auxiliary csv files
                url = ''
                headline = ''
                sentence = ''
                source = ''
                feat = ''
                info = []
                if len(ouranswer.sources) != 0:
                    feature_print_all(writer_all, ouranswer, first, qgsans)
                    feature_print_rel(writer_rel, ouranswer, r, first)
                    feature_print(writers, ouranswer)
                    turk_print(writer_turk, ouranswer)
                    url = ouranswer.sources[0].url
                    headline = ouranswer.sources[0].headline
                    sentence = ouranswer.sources[0].sentence
                    source = ouranswer.sources[0].source
                    for j in range(len(flo)):
                        for s in ouranswer.sources:
                            feat += str(s.features[j].get_value()) + ":"
                        feat = feat[:-1]
                        info.append(feat)
                        feat = ''

                # Write details to the output.tsv
                info = [
                    qrunid, qtext, qgsans, ouranswer.text,
                    ouranswer.q.summary(), sentence, headline, qtopic, qsrc,
                    url, source, ouranswer.info
                ] + info
                info = [field.encode('utf-8') for field in info]
                writer.writerow(info)

                ###############
                if q_num % 10 == 0:
                    print 'answering question', splitname, q_num

    for i_f in info_files:
        i_f.close()
コード例 #4
0
ファイル: batch_test.py プロジェクト: ittailup/argus
def evaluate():
    #  TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile
    from argus.main_frame import get_answer
    from separate_relevance import relevance_load
    from argus.features import feature_list_official as flo
    q_num = 0
    info_files = [open('tests/feature_prints/' + i_f + '.tsv', 'wb') for i_f in flo]
    writers = [csv.writer(info_file, delimiter='\t') for info_file in info_files]
    info_all = open('tests/feature_prints/all_features.tsv', 'wb')
    writer_all = csv.writer(info_all, delimiter='\t')
    info_rel = open('tests/feature_prints/all_features_rel.tsv', 'wb')
    writer_rel = csv.writer(info_rel, delimiter='\t')
    info_turk = open('tests/feature_prints/turk_sentences.tsv', 'wb')
    writer_turk = csv.writer(info_turk, delimiter=',')
    first = False
    r = relevance_load()
    npy_rel = []
    with open(OUTFILE, 'wb') as csv_file:
        writer = csv.writer(csv_file, delimiter='\t')
        for csvfile in os.listdir(CSV_FOLDER):
            if not csvfile.endswith(".csv"):
                continue
            i = 0
            for line in csv.reader(open(CSV_FOLDER + '/' + csvfile), delimiter=',', skipinitialspace=True):
                if i == 0:
                    # CSV header
                    i += 1
                    info = ['HITID', 'Question', 'TurkAnswer', 'OurAnswer',
                            'OurKeywords', 'FoundSentence', 'OurHeadline',
                            'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info']
                    info += flo
                    if q_num == 0:
                        writer.writerow(info)
                        writer_turk.writerow(['question', 'sentence'])
                        first = True
                    continue
                if line[16] == 'Rejected':
                    continue
                q_num += 1

                # Generate answer from question
                ouranswer = get_answer(line[30])

                # Toggle comment to keep only sources that were manually
                # annotated as relevant at mturk
                # filter_sources(ouranswer)

                # Write details to various auxiliary csv files
                url = ''
                headline = ''
                sentence = ''
                source = ''
                feat = ''
                info = []
                turk_answer = line[28]
                if len(ouranswer.sources) != 0:
                    feature_print_all(writer_all, ouranswer, first, turk_answer)
                    feature_print_rel(writer_rel, ouranswer, r, first)
                    feature_print(writers, ouranswer)
                    turk_print(writer_turk, ouranswer)
                    url = ouranswer.sources[0].url
                    headline = ouranswer.sources[0].headline
                    sentence = ouranswer.sources[0].sentence
                    source = ouranswer.sources[0].source
                    for j in range(len(flo)):
                        for s in ouranswer.sources:
                            feat += str(s.features[j].get_value()) + ":"
                        feat = feat[:-1]
                        info.append(feat)
                        feat = ''

                # Write details to the output.tsv
                info = [line[0], line[30], turk_answer,
                        ouranswer.text, ouranswer.q.query, sentence,
                        headline, line[31], line[29], url, source,
                        ouranswer.info] + info
                info = [field.encode('utf-8') for field in info]
                writer.writerow(info)

                # Store relevance features + gs for possible separate classifier
                # training
                for triplet in r:
                    if ouranswer.q.text == triplet[0]:
                        for s in ouranswer.sources:
                            if s.sentence == triplet[1]:
                                fs = [f.get_value() for f in s.features if '@' in f.get_type()]
                                if len(npy_rel) == 0:
                                    npy_rel = np.array(fs + [triplet[-1] / 2])
                                else:
                                    npy_rel = np.vstack((npy_rel, np.array(fs + [triplet[-1] / 2])))

                ###############
                if q_num % 10 == 0:
                    print 'answering question', q_num

    for i_f in info_files:
        i_f.close()
    np.save('tests/batches/relevance/npy_rel', npy_rel)
コード例 #5
0
def regenerate(splitname):
    #  TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile
    from argus.main_frame import get_answer
    from separate_relevance import relevance_load
    from argus.features import feature_list_official as flo
    q_num = 0
    info_files = [open('tests/feature_prints/%s/%s.tsv' % (splitname, i_f), 'wb') for i_f in flo]
    writers = [csv.writer(info_file, delimiter='\t') for info_file in info_files]
    info_all = open('tests/feature_prints/%s/all_features.tsv' % (splitname,), 'wb')
    writer_all = csv.writer(info_all, delimiter='\t')
    info_rel = open('tests/feature_prints/%s/all_features_rel.tsv' % (splitname,), 'wb')
    writer_rel = csv.writer(info_rel, delimiter='\t')
    info_turk = open('tests/feature_prints/%s/turk_sentences.tsv' % (splitname,), 'wb')
    writer_turk = csv.writer(info_turk, delimiter=',')
    first = False
    r = relevance_load()
    with open('tests/f%s.tsv' % (splitname,), 'wb') as featfile:
        writer = csv.writer(featfile, delimiter='\t')
        with open('tests/q%s.tsv' % (splitname,)) as qfile:
            i = 0
            for line in csv.reader(qfile, delimiter='\t'):
                qorigin, qrunid, qtopic, qtext, qgsans, qsrc = line

                if i == 0:
                    # CSV header
                    i += 1
                    info = ['HITID', 'Question', 'TurkAnswer', 'OurAnswer',
                            'OurKeywords', 'FoundSentence', 'OurHeadline',
                            'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info']
                    info += flo
                    if q_num == 0:
                        writer.writerow(info)
                        writer_turk.writerow(['question', 'sentence'])
                        first = True
                    continue
                q_num += 1

                # Generate answer from question; this implies generating
                # various question features
                ouranswer = get_answer(qtext)

                # Toggle comment to keep only sources that were manually
                # annotated as relevant at mturk
                # filter_sources(ouranswer)

                # Write details to various auxiliary csv files
                url = ''
                headline = ''
                sentence = ''
                source = ''
                feat = ''
                info = []
                if len(ouranswer.sources) != 0:
                    feature_print_all(writer_all, ouranswer, first, qgsans)
                    feature_print_rel(writer_rel, ouranswer, r, first)
                    feature_print(writers, ouranswer)
                    turk_print(writer_turk, ouranswer)
                    url = ouranswer.sources[0].url
                    headline = ouranswer.sources[0].headline
                    sentence = ouranswer.sources[0].sentence
                    source = ouranswer.sources[0].source
                    for j in range(len(flo)):
                        for s in ouranswer.sources:
                            feat += str(s.features[j].get_value()) + ":"
                        feat = feat[:-1]
                        info.append(feat)
                        feat = ''

                # Write details to the output.tsv
                info = [qrunid, qtext, qgsans,
                        ouranswer.text, ouranswer.q.summary(), sentence,
                        headline, qtopic, qsrc, url, source,
                        ouranswer.info] + info
                info = [field.encode('utf-8') for field in info]
                writer.writerow(info)

                ###############
                if q_num % 10 == 0:
                    print 'answering question', splitname, q_num

    for i_f in info_files:
        i_f.close()
コード例 #6
0
def evaluate():
    #  TODO: remove irrelevant printouts, remove sentence, url, headline,.. from outfile
    from argus.main_frame import get_answer
    from separate_relevance import relevance_load
    from argus.features import feature_list_official as flo
    q_num = 0
    info_files = [
        open('tests/feature_prints/' + i_f + '.tsv', 'wb') for i_f in flo
    ]
    writers = [
        csv.writer(info_file, delimiter='\t') for info_file in info_files
    ]
    info_all = open('tests/feature_prints/all_features.tsv', 'wb')
    writer_all = csv.writer(info_all, delimiter='\t')
    info_rel = open('tests/feature_prints/all_features_rel.tsv', 'wb')
    writer_rel = csv.writer(info_rel, delimiter='\t')
    info_turk = open('tests/feature_prints/turk_sentences.tsv', 'wb')
    writer_turk = csv.writer(info_turk, delimiter=',')
    first = False
    r = relevance_load()
    npy_rel = []
    with open(OUTFILE, 'wb') as csv_file:
        writer = csv.writer(csv_file, delimiter='\t')
        for csvfile in os.listdir(CSV_FOLDER):
            if not csvfile.endswith(".csv"):
                continue
            i = 0
            for line in csv.reader(open(CSV_FOLDER + '/' + csvfile),
                                   delimiter=',',
                                   skipinitialspace=True):
                if i == 0:
                    # CSV header
                    i += 1
                    info = [
                        'HITID', 'Question', 'TurkAnswer', 'OurAnswer',
                        'OurKeywords', 'FoundSentence', 'OurHeadline',
                        'TurkTopic', 'TurkURL', 'OurURL', 'Source', 'info'
                    ]
                    info += flo
                    if q_num == 0:
                        writer.writerow(info)
                        writer_turk.writerow(['question', 'sentence'])
                        first = True
                    continue
                if line[16] == 'Rejected':
                    continue
                q_num += 1

                # Generate answer from question
                ouranswer = get_answer(line[30])

                # Toggle comment to keep only sources that were manually
                # annotated as relevant at mturk
                # filter_sources(ouranswer)

                # Write details to various auxiliary csv files
                url = ''
                headline = ''
                sentence = ''
                source = ''
                feat = ''
                info = []
                turk_answer = line[28]
                if len(ouranswer.sources) != 0:
                    feature_print_all(writer_all, ouranswer, first,
                                      turk_answer)
                    feature_print_rel(writer_rel, ouranswer, r, first)
                    feature_print(writers, ouranswer)
                    turk_print(writer_turk, ouranswer)
                    url = ouranswer.sources[0].url
                    headline = ouranswer.sources[0].headline
                    sentence = ouranswer.sources[0].sentence
                    source = ouranswer.sources[0].source
                    for j in range(len(flo)):
                        for s in ouranswer.sources:
                            feat += str(s.features[j].get_value()) + ":"
                        feat = feat[:-1]
                        info.append(feat)
                        feat = ''

                # Write details to the output.tsv
                info = [
                    line[0], line[30], turk_answer, ouranswer.text,
                    ouranswer.q.query, sentence, headline, line[31], line[29],
                    url, source, ouranswer.info
                ] + info
                info = [field.encode('utf-8') for field in info]
                writer.writerow(info)

                # Store relevance features + gs for possible separate classifier
                # training
                for triplet in r:
                    if ouranswer.q.text == triplet[0]:
                        for s in ouranswer.sources:
                            if s.sentence == triplet[1]:
                                fs = [
                                    f.get_value() for f in s.features
                                    if '@' in f.get_type()
                                ]
                                if len(npy_rel) == 0:
                                    npy_rel = np.array(fs + [triplet[-1] / 2])
                                else:
                                    npy_rel = np.vstack(
                                        (npy_rel,
                                         np.array(fs + [triplet[-1] / 2])))

                ###############
                if q_num % 10 == 0:
                    print 'answering question', q_num

    for i_f in info_files:
        i_f.close()
    np.save('tests/batches/relevance/npy_rel', npy_rel)