def __init__(self): config = flags.FLAGS config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2)) config.max_sent_size = config.sent_size_th config.max_num_sents = config.num_sents_th config.max_ques_size = config.ques_size_th config.max_word_size = config.word_size_th config.max_para_size = config.para_size_th self.config = config self.test_data = None self.data_ready(update=True) config = self.config set_dirs(config) models = get_multi_gpu_models(config) self.evaluator = ForwardEvaluator( config, models[0], tensor_dict=models[0].tensor_dict if config.vis else None) self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.graph_handler = GraphHandler(config, models[0]) self.graph_handler.initialize(self.sess) self.config = config
def _forward(config): assert config.load test_data = read_data(config, config.forward_name, True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()} new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = ForwardEvaluator(config, model) graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_batches = math.ceil(test_data.num_examples / config.batch_size) if 0 < config.test_num_batches < num_batches: num_batches = config.test_num_batches e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches)) print(e) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e, path=config.answer_path) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e, path=config.eval_path)
class Demo(object): def __init__(self): config = flags.FLAGS config.out_dir = os.path.join(config.out_base_dir, config.model_name, str(config.run_id).zfill(2)) config.max_sent_size = config.sent_size_th config.max_num_sents = config.num_sents_th config.max_ques_size = config.ques_size_th config.max_word_size = config.word_size_th config.max_para_size = config.para_size_th self.config = config self.test_data = None self.data_ready(update=True) config = self.config set_dirs(config) models = get_multi_gpu_models(config) self.evaluator = ForwardEvaluator(config, models[0], tensor_dict=models[0].tensor_dict if config.vis else None) self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) self.graph_handler = GraphHandler(config, models[0]) self.graph_handler.initialize(self.sess) self.config = config def data_ready(self, data=None, update=False): config = self.config config.batch_size = 1 test_data = read_data(self.config, 'demo', True, data=data, data_set=self.test_data) if update: update_config(self.config, [test_data]) if config.use_glove_for_unk: word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()} new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat self.config = config self.test_data = test_data def run(self, data): self.data_ready(data=data) test_data = self.test_data config = self.config e = None for multi_batch in test_data.get_batches(config.batch_size, num_batches=1, cluster=config.cluster): ei = self.evaluator.get_evaluation(self.sess, multi_batch) e = ei if e is None else e + ei return (e.id2answer_dict[0])
def _server(config): import bottle # Pre-load model assert config.load server_update_config(config) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = ForwardEvaluator(config, model) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Pre-load all GloVe vectors glove_path = 'glove/glove.6B.100d.txt' # hard code GloVe file num_lines = 400000 glove_dict = {} print('Reading all GloVe vectors from %s' % glove_path) with open(glove_path, 'r', encoding='utf-8') as fh: for line in tqdm(fh, total=num_lines): array = line.lstrip().rstrip().split(" ") word = array[0] vector = list(map(float, array[1:])) glove_dict[word] = vector # Create the app orig_data_dir = config.data_dir app = bottle.Bottle() @app.route('/query', method='post') def query(): with tempfile.TemporaryDirectory(dir=orig_data_dir) as inter_dir: # Recieve data, process it data = bottle.request.json config.data_dir = inter_dir with tempfile.NamedTemporaryFile('w', suffix='.json', dir=orig_data_dir) as data_file: json.dump(data, data_file) data_file.flush() prepro_args = prepro.get_args([ '--mode', 'single', '--single_path', data_file.name, '-pm', '--target_dir', inter_dir ]) prepro.prepro(prepro_args, glove_dict=glove_dict) test_data = read_data(config, config.forward_name, True) num_batches = math.ceil(test_data.num_examples / config.batch_size) if 0 < config.eval_num_batches < num_batches: num_batches = config.eval_num_batches # Run model on data e = evaluator.get_evaluation_from_batches( sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches)) eval_path = os.path.join(inter_dir, 'eval.pkl.gz') graph_handler.dump_eval(e, path=eval_path) # Extract predictions through the ensemble code data_path = os.path.join(inter_dir, 'data_single.json') with open(data_path) as f: data_single_obj = json.load(f) shared_path = os.path.join(inter_dir, 'shared_single.json') with open(shared_path) as f: shared_single_obj = json.load(f) with tempfile.NamedTemporaryFile('w', suffix='.json', dir=orig_data_dir) as target_file: target_path = target_file.name ensemble_args = ensemble.get_args([ '--data_path', data_path, '--shared_path', shared_path, '-o', target_path, eval_path ]) ensemble.ensemble(ensemble_args) target_file.flush() with open(target_path, 'r') as f: pred_obj = json.load(f) return { 'data_single': data_single_obj, 'eval': e.dict, 'shared_single': shared_single_obj, 'predictions': pred_obj } # Run the app bottle.run(app, host='localhost', port=SERVER_PORT)