Beispiel #1
0
def seed():

    es = WikiSearchAdmin(
        config_path='fact_verification_system/search/config.yaml').es
    print(es)
    print(dir(es))
    print(es.ping)
    print(es.info())
Beispiel #2
0
def main():
    wsa = WikiSearchAdmin(
        config_path='fact_verification_system/search/config.yaml')

    # mock_input_claim = "Robert Downey jr is famous for playing Ironman."
    mock_input_claim = "Wildcats"

    res = wsq.query(wsa.es, mock_input_claim)
    # pp.pprint(res.results)
    pp.pprint(res.get_hits(limit=10))
def eval(args):
    config = 'fact_verification_system/search/config.yaml'
    ss = SentenceSelection()
    es = WikiSearchAdmin(config).es

    total = 0
    total_matched = 0

    with open(
            '../dataset/devset.json', 'r'
    ) as fp:  # NOTE: change to train.json when you receive the dataset
        train_json = json.load(fp)

        for i, data in enumerate(train_json.values()):
            claim = data.get('claim')
            # get RETRIEVED (page_id, sent_idx) tuples from returned search results
            limit = 10
            res = wsq.query(es, claim)
            rel_pageid_sentidx = res.get_page_id_sent_idx(limit=limit)
            sentences = res.get_sentences(limit=limit)

            # sentence selection
            filtered_sentences = ss.filtered_sentences(claim, sentences)
            indices = (i for (i, sent) in filtered_sentences)

            # modified retrieved data
            filtered_rel = set(
                filter(lambda x: x[0] in indices, enumerate(rel_pageid_sentidx)
                       ))  #x[0] is an index, x[1] is (page_id, sent_idx)

            # get TRUE (page_id, sent_idx) tuples from devset.json
            true_pageid_sentidx = set([(ev[0], ev[1])
                                       for ev in data.get('evidence')])

            matched = len(filtered_rel.intersection(true_pageid_sentidx))
            total_matched += matched

            total += matched
            total += len(data.get('evidence'))

            if args.debug:
                print("retrieved:\n{}".format(rel_pageid_sentidx))
                print("filtered_sentences:\n{}".format(filtered_sentences))
                print("filtered:\n{}".format(filtered_rel))
                print("true:\n{}".format(true_pageid_sentidx))
                if i >= args.debug - 1:
                    print("Eval ended. (DEBUG MODE)")
                    return

    # recall = (retrieved & true) / true
    recall = float(total_matched) / float(total)
    print("Recall: {:.2f}".format(recall))
def setup():
    print("Setting up elasticsearch database...")
    index_name = 'wiki'
    config = 'fact_verification_system/search/config.yaml'
    wsa = WikiSearchAdmin(config)
    try:
        print("Creating index: {}".format(index_name))
        wsa.es.indices.create(index=index_name)
    except RequestError as e:
        print(e)
        print("[setup.py] Index: {} already exists.".format(index_name))
        return None

    except Exception as e:
        print("[setup.py] Error while setting up elasticsearch index.")
        raise e    
def seed():
    index_name = 'wiki'
    
    config = 'fact_verification_system/search/config.yaml'
    es = WikiSearchAdmin(config).es

    prompt_str = "Are you sure you want to seed the elastic search database?\n\
(This will delete the existing 'wiki' index if it exists)\n\
Enter y/n: "

    confirm_prompt = input(prompt_str)
    
    if confirm_prompt != 'y':
        exit("Exited.")

    # ES 7.0 has removed mapping types, API calls are now Typeless
    print("Creating new index: {}...".format(index_name))
    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
    es.indices.create(index=index_name)
    
    print("Seeding newly created index: {}...".format(index_name))
    cpu_count = multiprocessing.cpu_count()
    
    # deque(parallel_bulk(
    # client=wsa.es,
    # actions=_iter_wiki_data_for_es(index_name),
    # thread_count=cpu_count
    # ), maxlen=0)

    pb = parallel_bulk(
    client=es,
    actions=_iter_wiki_data_for_es(index_name),
    thread_count=cpu_count
    )

    for success, info in pb:
        if not success:
            print("A document failed: {}".format(info))

    es.indices.refresh()

    print("Bulk insert complete.")
def eval(args):
    config = 'fact_verification_system/search/config.yaml'
    print("Running evaluation script for search engine...")
    print("Using elasticsearch config path: {}".format(config))

    total = 0
    total_matched = 0
    es = WikiSearchAdmin(config_path=config).es
    with open(
            '../dataset/devset.json', 'r'
    ) as fp:  # NOTE: change to train.json when you receive the dataset
        train_json = json.load(fp)

        for i, data in tqdm(enumerate(train_json.values())):
            claim = data.get('claim')
            # get RETRIEVED (page_id, sent_idx) tuples from returned search results
            rel_pageid_sentidx = wsq.query(
                es, claim).get_page_id_sent_idx(limit=10)

            # get TRUE (page_id, sent_idx) tuples from devset.json
            true_pageid_sentidx = set([(ev[0], ev[1])
                                       for ev in data.get('evidence')])

            matched = len(rel_pageid_sentidx.intersection(true_pageid_sentidx))
            total_matched += matched

            total += matched
            total += len(data.get('evidence'))

            if args.debug:
                print("retrieved:\n {}".format(rel_pageid_sentidx))
                print("true:\n {}".format(true_pageid_sentidx))
                if i >= args.debug:
                    print("Eval ended. (DEBUG MODE)")
                    return

    # recall = (retrieved & true) / true
    recall = float(total_matched) / float(total)
    print("Recall: {:.2f}".format(recall))
from flask import request, abort, Response
from flask_restplus import Resource, Api, fields

import pprint
pp = pprint.PrettyPrinter(indent=2).pprint

from fact_verification_system.search.wiki_search_admin import WikiSearchAdmin
from fact_verification_system.search.wiki_search_query import WikiSearchQuery as wsq
from fact_verification_system.sentence_selection.sentence_selection import SentenceSelection
from fact_verification_system.classifier.tfx.predict import TFXPredict

api = Api(app)

config = 'fact_verification_system/search/config.yaml'
es = WikiSearchAdmin(config).es
ss = SentenceSelection()

tfxp = TFXPredict()


@api.route('/health')
class Health(Resource):
    def get(self):
        # TODO: send a query to es, sentence selection then classifier.
        return formattedResponse(None, "Fact Verification System is active.")


@api.route('/evidence')
class Evidence(Resource):
    def post(self):