Ejemplo n.º 1
0
from common.dataset.qald_7_ml import Qald_7_ml
from common.kb.dbpedia import DBpedia

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Generate one hop ')
    parser.add_argument('--dataset',
                        default='lc_quad',
                        help='`lc_quad` or `qald_7_ml`')
    parser.add_argument('--max_length', default=3, type=int)
    args = parser.parse_args()

    dataset_name = args.dataset
    if dataset_name == 'lc_quad':
        file_name = config['lc_quad']['entity_one_hop']
        dataset = LC_QuAD(config['lc_quad']['train'],
                          config['lc_quad']['test'],
                          config['lc_quad']['vocab'], False, True)
    elif dataset_name == 'qald_7_ml':
        file_name = config['qald_7_ml']['entity_one_hop']
        dataset = Qald_7_ml(config['qald_7_ml']['train'],
                            config['qald_7_ml']['test'],
                            config['qald_7_ml']['vocab'], False, False)
    kb = DBpedia()

    if os.path.exists(file_name):
        with open(file_name, 'rb') as f:
            one_hop = pk.load(f)
    else:
        one_hop = {}
    for qa_row in tqdm(dataset.test_set + dataset.train_set):
        for entity in qa_row.sparql.entities:
Ejemplo n.º 2
0
                     bounds=dict(min=0.0000001, max=0.1)),
                # dict(name='positive_reward', type='int', bounds=dict(min=1, max=10)),
                dict(name='negetive_reward',
                     type='int',
                     bounds=dict(min=-10, max=0)),
            ],
            metrics=[dict(name='function_value')],
            parallel_bandwidth=1,
            # Define an Observation Budget for your experiment
            observation_budget=100,
        )
        print("Created experiment: https://app.sigopt.com/experiment/" +
              experiment.id)

    args = parse_args()
    lc_quad = LC_QuAD(config['lc_quad']['train'], config['lc_quad']['test'],
                      config['lc_quad']['vocab'], args.remove_entity,
                      args.remove_stop_words)

    # Run the Optimization Loop until the Observation Budget is exhausted
    while experiment.progress.observation_count < experiment.observation_budget:
        suggestion = conn.experiments(experiment.id).suggestions().create()
        value = evaluate_model(lc_quad, args, suggestion.assignments)
        conn.experiments(experiment.id).observations().create(
            suggestion=suggestion.id,
            value=value,
        )

        # Update the experiment object
        experiment = conn.experiments(experiment.id).fetch()
Ejemplo n.º 3
0
    except RuntimeError as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 408
    except Exception as expt:
        logger.error(expt)
        return flask.jsonify({'error': str(expt)}), 422


@app.errorhandler(404)
def not_found(error):
    return flask.make_response(flask.jsonify({'error': 'Command Not found'}), 404)


if __name__ == '__main__':
    logger = logging.getLogger(__name__)
    Utils.setup_logging()
    args = parse_args()

    dataset = LC_QuAD(config['lc_quad']['train'], config['lc_quad']['test'], config['lc_quad']['vocab'],
                      False, args.remove_stop_words)

    runner = Runner(dataset, args)
    runner.load_checkpoint()
    runner.environment.entity_linker = None
    runner.environment.relation_linker = None

    print(runner.link("Who has been married to both Penny Lancaster and Alana Stewart?", k=10, e=0.1))
    logger.info("Starting the HTTP server")
    http_server = WSGIServer(('', args.port), app)
    http_server.serve_forever()
Ejemplo n.º 4
0
from config import config
from common.vocab import Vocab
from common.word_vectorizer.glove import Glove
from common.dataset.lc_quad import LC_QuAD
from common.dataset.qald_7_ml import Qald_7_ml
from common.dataset.qald_6_ml import Qald_6_ml
from common.dataset.simple_dbpedia_qa import SimpleDBpediaQA

if __name__ == '__main__':
    print('Create Vocab')
    datasets = [
        Qald_7_ml(config['qald_7_ml']['train'], config['qald_7_ml']['test'],
                  config['qald_7_ml']['vocab'], False, False),
        Qald_6_ml(config['qald_6_ml']['train'], config['qald_6_ml']['test'],
                  config['qald_6_ml']['vocab'], False, False),
        LC_QuAD(config['lc_quad']['train'], config['lc_quad']['test'],
                config['lc_quad']['vocab'], False, False)
    ]
    if not os.path.exists(config['vocab']):
        #         SimpleDBpediaQA(config['SimpleDBpediaQA']['train'], config['SimpleDBpediaQA']['test'],
        #                         config['SimpleDBpediaQA']['vocab'],
        #                         False, False)]
        vocab = set()
        for dataset in tqdm(datasets):
            lines = dataset.corpus
            for tokens in lines:
                vocab |= set(tokens)
            if dataset.one_hop is not None:
                for entity, uris in dataset.one_hop.items():
                    for idx in range(len(uris)):
                        uri = URI(uris[idx][0])
                        vocab |= set(uri.tokens)