Esempio n. 1
0
    def __init__(self):
        config = flags.FLAGS
        config.out_dir = os.path.join(config.out_base_dir, config.model_name,
                                      str(config.run_id).zfill(2))
        config.max_sent_size = config.sent_size_th
        config.max_num_sents = config.num_sents_th
        config.max_ques_size = config.ques_size_th
        config.max_word_size = config.word_size_th
        config.max_para_size = config.para_size_th

        self.config = config
        self.test_data = None
        self.data_ready(update=True)

        config = self.config

        set_dirs(config)
        models = get_multi_gpu_models(config)
        self.evaluator = ForwardEvaluator(
            config,
            models[0],
            tensor_dict=models[0].tensor_dict if config.vis else None)

        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        self.graph_handler = GraphHandler(config, models[0])
        self.graph_handler.initialize(self.sess)
        self.config = config
Esempio n. 2
0
def _forward(config):
    assert config.load
    test_data = read_data(config, config.forward_name, True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
        new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = ForwardEvaluator(config, model)
    graph_handler = GraphHandler(config, model)  # controls all tensors and variables in the graph, including loading /saving

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_batches = math.ceil(test_data.num_examples / config.batch_size)
    if 0 < config.test_num_batches < num_batches:
        num_batches = config.test_num_batches
    e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e, path=config.answer_path)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e, path=config.eval_path)
Esempio n. 3
0
class Demo(object):
    def __init__(self):
        config = flags.FLAGS
        config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2))
        config.max_sent_size = config.sent_size_th
        config.max_num_sents = config.num_sents_th
        config.max_ques_size = config.ques_size_th
        config.max_word_size = config.word_size_th
        config.max_para_size = config.para_size_th

        self.config = config
        self.test_data = None
        self.data_ready(update=True)
	

        config = self.config

        set_dirs(config)
        models = get_multi_gpu_models(config)
        self.evaluator = ForwardEvaluator(config, models[0], tensor_dict=models[0].tensor_dict if config.vis else None)

        self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        self.graph_handler = GraphHandler(config, models[0])
        self.graph_handler.initialize(self.sess)
        self.config = config

    def data_ready(self, data=None, update=False):
        
        config = self.config
        config.batch_size = 1
        test_data = read_data(self.config, 'demo', True, data=data, data_set=self.test_data)

        if update:
            update_config(self.config, [test_data])
            if config.use_glove_for_unk:
                word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
                new_word2idx_dict = test_data.shared['new_word2idx']
                idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
                new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
                config.new_emb_mat = new_emb_mat
        self.config = config
        self.test_data = test_data

    def run(self, data):
        self.data_ready(data=data)
        test_data = self.test_data
        config = self.config
        e = None
        for multi_batch in test_data.get_batches(config.batch_size, num_batches=1, cluster=config.cluster):
            ei = self.evaluator.get_evaluation(self.sess, multi_batch)
            e = ei if e is None else e + ei
        return (e.id2answer_dict[0])
Esempio n. 4
0
def _server(config):
    import bottle

    # Pre-load model
    assert config.load
    server_update_config(config)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = ForwardEvaluator(config, model)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Pre-load all GloVe vectors
    glove_path = 'glove/glove.6B.100d.txt'  # hard code GloVe file
    num_lines = 400000
    glove_dict = {}
    print('Reading all GloVe vectors from %s' % glove_path)
    with open(glove_path, 'r', encoding='utf-8') as fh:
        for line in tqdm(fh, total=num_lines):
            array = line.lstrip().rstrip().split(" ")
            word = array[0]
            vector = list(map(float, array[1:]))
            glove_dict[word] = vector

    # Create the app
    orig_data_dir = config.data_dir
    app = bottle.Bottle()

    @app.route('/query', method='post')
    def query():
        with tempfile.TemporaryDirectory(dir=orig_data_dir) as inter_dir:
            # Recieve data, process it
            data = bottle.request.json
            config.data_dir = inter_dir
            with tempfile.NamedTemporaryFile('w',
                                             suffix='.json',
                                             dir=orig_data_dir) as data_file:
                json.dump(data, data_file)
                data_file.flush()
                prepro_args = prepro.get_args([
                    '--mode', 'single', '--single_path', data_file.name, '-pm',
                    '--target_dir', inter_dir
                ])
                prepro.prepro(prepro_args, glove_dict=glove_dict)
            test_data = read_data(config, config.forward_name, True)
            num_batches = math.ceil(test_data.num_examples / config.batch_size)
            if 0 < config.eval_num_batches < num_batches:
                num_batches = config.eval_num_batches

            # Run model on data
            e = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(test_data.get_batches(config.batch_size,
                                           num_batches=num_batches),
                     total=num_batches))
            eval_path = os.path.join(inter_dir, 'eval.pkl.gz')
            graph_handler.dump_eval(e, path=eval_path)

            # Extract predictions through the ensemble code
            data_path = os.path.join(inter_dir, 'data_single.json')
            with open(data_path) as f:
                data_single_obj = json.load(f)
            shared_path = os.path.join(inter_dir, 'shared_single.json')
            with open(shared_path) as f:
                shared_single_obj = json.load(f)
            with tempfile.NamedTemporaryFile('w',
                                             suffix='.json',
                                             dir=orig_data_dir) as target_file:
                target_path = target_file.name
                ensemble_args = ensemble.get_args([
                    '--data_path', data_path, '--shared_path', shared_path,
                    '-o', target_path, eval_path
                ])
                ensemble.ensemble(ensemble_args)
                target_file.flush()
                with open(target_path, 'r') as f:
                    pred_obj = json.load(f)

        return {
            'data_single': data_single_obj,
            'eval': e.dict,
            'shared_single': shared_single_obj,
            'predictions': pred_obj
        }

    # Run the app
    bottle.run(app, host='localhost', port=SERVER_PORT)