Ejemplo n.º 1
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)
    graph_handler = GraphHandler(config, model)
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        #saver = tf.train.Saver()
        graph_handler.initialize(sess)
        #saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        ensember_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            start_logits, stop_logits, qa_id, loss, yp1, yp2 = sess.run([
                model.start_logits, model.stop_logits, model.qa_id, model.loss,
                model.yp1, model.yp2
            ])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            start_logits.tolist()
            stop_logits.tolist()
            for id, start, stop in zip(qa_id, start_logits, stop_logits):
                ensember_dict[str(id)] = {'yp1': start, 'yp2': stop}
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        answer_path = config.answer_file + "_" + str(config.load_step)
        with open(answer_path, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
        ensember_dict['loss'] = loss
        ensember_dict['exact_math'] = metrics['exact_match']
        ensember_dict['f1'] = metrics['f1']
        file_name = config.model_name + '_' + config.run_id + '.pklz'
        save_path = os.path.join(config.result_path, file_name)
        with gzip.open(save_path, 'wb', compresslevel=3) as fh:
            pickle.dump(ensember_dict, fh)
Ejemplo n.º 2
0
def test(config):
    _config_test(config)

    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()
    
    model = ConvSeq2Seq(config)
    graph_handler = GraphHandler(config)
    inferencer = Inferencer(config, model)
    sess = tf.Session()
    graph_handler.initialize(sess)

    global_step = 0
    refs = []
    hypotheses = []
    with codecs.open(os.path.join(config.eval_dir, config.model_name), "w", "utf-8") as fout:
        for i, batch in tqdm(enumerate(get_batch_for_test())):
            preds = inferencer.run(sess, batch)
            sources = batch['source']
            targets = batch['target']
            for source, target, pred in zip(sources, targets, preds):
                got = " ".join(idx2en[idx] for idx in pred).split("</S>")[0].strip()
                fout.write("- source: " + source +"\n")
                fout.write("- expected: " + target + "\n")
                fout.write("- got: " + got + "\n\n")
                fout.flush()

                ref = target.split()
                hypothesis = got.split()
                if len(ref) > 3 and len(hypothesis) > 3:
                    refs.append([ref])
                    hypotheses.append(hypothesis)

        score = corpus_bleu(refs, hypotheses)
        fout.write("Bleu Score = " + str(100*score))
Ejemplo n.º 3
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUEvaluator(
        config,
        models,
        tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches
    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(
                config.eval_dir, "{}-{}".format(ei.data_type,
                                                str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)

    print("test acc: %f, loss: %f" % (e.acc, e.loss))
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
Ejemplo n.º 4
0
def _check(config):
    word2idx = Counter(
        json.load(open("data/word2idx_new.json", "r"))["word2idx"])
    vocab_size = len(word2idx)
    #word2vec = {} # or get_word2vec(word2idx)
    word2vec = Counter(
        json.load(
            open("data/word2vec_{}.json".format(config.pretrain_from),
                 "r"))["word2vec"])
    idx2vec = {
        word2idx[word]: vec
        for word, vec in word2vec.items() if word in word2idx and word != "UNK"
    }
    unk_embedding = np.random.multivariate_normal(
        np.zeros(config.word_embedding_size),
        np.eye(config.word_embedding_size))
    config.emb_mat = np.array([
        idx2vec[idx] if idx in idx2vec else unk_embedding
        for idx in range(vocab_size)
    ])
    config.vocab_size = vocab_size
    print("emb_mat:", config.emb_mat.shape)

    if config.data_from == "reuters":
        train_data = read_reuters(config, data_type="train", word2idx=word2idx)
        dev_data = read_reuters(config, data_type="test", word2idx=word2idx)
    elif config.data_from == "20newsgroup":
        train_data = read_news(config, data_type="train", word2idx=word2idx)
        dev_data = read_newss(config, data_type="test", word2idx=word2idx)
    config.train_size = train_data.get_data_size()
    config.dev_size = dev_data.get_data_size()
    pprint(config.__flags, indent=2)
    model = get_model(config)
    graph_handler = GraphHandler(config, model)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_batches = config.num_batches or int(
        math.ceil(
            train_data.num_examples / config.batch_size)) * config.num_epochs
    global_step = 0

    for batch in tqdm(train_data.get_batches(config.batch_size,
                                             num_batches=num_batches,
                                             shuffle=True,
                                             cluster=config.cluster),
                      total=num_batches):
        batch_idx, batch_ds = batch
        global_step = sess.run(model.global_step) + 1
        # print("global_step:", global_step)
        get_summary = global_step % config.log_period
        feed_dict = model.get_feed_dict(batch, config)
        check, xx_final, xx_context = sess.run(
            [model.check, model.xx_final, model.xx_context],
            feed_dict=feed_dict)
        print("check:", check.shape, type(check), xx_final.shape,
              xx_context.shape)
Ejemplo n.º 5
0
def test(config):
    gpu_options = tf.GPUOptions(visible_device_list="2")
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 gpu_options=gpu_options)
    sess_config.gpu_options.allow_growth = True
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)
    graph_handler = GraphHandler(config, model)

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        graph_handler.initialize(sess)
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_, outlier = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            print("\n", loss)
            if (loss > 50):
                for i, j, k in zip(qa_id.tolist(), yp1.tolist(), yp2.tolist()):
                    print(answer_dict[str(i)], j, k)
                #print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\
                #	loss.tolist(), yp1.tolist(), yp2.tolist()))
        loss = np.mean(losses)
        # evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
        # since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
         metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
         metrics['rouge-l-r']))
Ejemplo n.º 6
0
def test(model_params):

    model_params.batch_size = _DECODE_BATCH_SIZE

    model_params.load_model = FLAGS.is_load
    if FLAGS.load_path:
        model_params.load_path = FLAGS.load_path

    test_file = os.path.join(FLAGS.data_dir, _TEST_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)
    test_data =utils_file.read_file(test_file, _JSON_TAG)

    dicts_path = os.path.join(model_params.dict_dir, 'dicts' + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)

    dicts = utils_file.read_file(dicts_path, _JSON_TAG)

    assert dicts is not None

    is_binary = True

    if FLAGS.dataset_type != 'binary':
        is_binary = False

    test_data = dataset.VTTExample(test_data, 'test', model_params, dicts, is_binary)

    emb_mat_token, emb_mat_glove = test_data.emb_mat_token, test_data.emb_mat_glove

    with tf.variable_scope('model') as scope:
        if FLAGS.dataset_type != 'multiple':
            model = Model(emb_mat_token, emb_mat_glove, len(test_data.dicts['token']), len(test_data.dicts['char']),
                          test_data.max_token_size, model_params, scope=scope.name)

        else:
            model = Model_Selector(emb_mat_token, emb_mat_glove, len(test_data.dicts['token']),
                                   len(test_data.dicts['char']),
                                   test_data.max_token_size, test_data.max_ans_size, model_params, scope=scope.name)


    graphHandler = GraphHandler(model, model_params)
    evaluator = Evaluator(model, model_params, is_binary)

    gpu_options = tf.GPUOptions()
    graph_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)
    sess = tf.Session(config=graph_config)

    graphHandler.initialize(sess)

    test_loss, test_acc, test_dict = evaluator.get_evaluation(sess, test_data)

    tf.logging.info('test loss : %.4f accuracy %.4f' % (test_loss, test_acc))
Ejemplo n.º 7
0
def load_model(data, model_params):
    with tf.compat.v1.variable_scope('model') as scope:
        model = Model_Selector(data.emb_mat_token, data.emb_mat_glove,
                               len(data.dicts['token']),
                               len(data.dicts['char']), data.max_token_size,
                               data.max_ans_size, model_params, scope.name)

    graphHandler = GraphHandler(model, model_params)

    #gpu_options = tf.GPUOptions()
    graph_config = tf.ConfigProto()
    graph_config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=graph_config)  #config=graph_config)

    graphHandler.initialize(sess)

    return model, sess
Ejemplo n.º 8
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = AccuracyEvaluator(config.test_num_can, config, model,
                                  tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))

    e = None
    tensor=[]
    for i, multi_batch in enumerate(tqdm(
            test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps,
                                        cluster=config.cluster), total=num_steps)):

        ei = evaluator.get_evaluation(sess, multi_batch)
        # outfinal=ei.tensor
        # tensor.extend(outfinal)

        e = ei if e is None else e + ei
        # if config.vis:
        #     eval_subdir = os.path.join(config.eval_dir,
        #                                "{}-{}".format(multi_batch[0][1].data_type, str(ei.global_step).zfill(6)))
        #     if not os.path.exists(eval_subdir):
        #         os.mkdir(eval_subdir)
        #     path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
        #     graph_handler.dump_eval(ei, path=path)

    print(e.acc)

    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
    if config.dump_answer:
        print("dumping answers ...")
        graph_handler.dump_answer(e)
Ejemplo n.º 9
0
def _test(config):
  if config.data_from == "20newsgroup": config.test_batch_size = 281

  word2idx = Counter(json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"])
  idx2word = json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["idx2word"]
  assert len(word2idx) == len(idx2word)
  for i in range(10):  assert word2idx[idx2word[i]] == i
  vocab_size = len(word2idx)
  word2vec = Counter(json.load(open("../data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"])
  # word2vec = {} if config.debug or config.load  else get_word2vec(config, word2idx)
  idx2vec = {word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx}
  unk_embedding = np.random.multivariate_normal(np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size))
  config.emb_mat = np.array([idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size)])
  config.vocab_size = vocab_size
  test_dict = {}
  if os.path.exists("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype)):
    test_dict = json.load(open("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype), "r"))

  if config.data_from == "reuters":
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_reuters(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "20newsgroup":
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_news(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "ice":
    dev_data = DataSet(test_dict, config.dev_type)

  config.dev_size = dev_data.get_data_size()
  # if config.use_glove_for_unk:
  pprint(config.__flags, indent=2)
  model = get_model(config)
  graph_handler = GraphHandler(config, model)
  sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
  graph_handler.initialize(sess)
  # check
  #w_embeddings = sess.run(model.word_embeddings)
  #print("w_embeddings:", w_embeddings.shape, w_embeddings)

  dev_evaluate = Evaluator(config, model)
  num_steps = math.floor(dev_data.num_examples / config.test_batch_size)
  if 0 < config.val_num_batches < num_steps:
    num_steps = config.val_num_batches
  # print("num_steps:", num_steps)
  e_dev = dev_evaluate.get_evaluation_from_batches(
    sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps))
def main(unused_argv):
    assert FLAGS.input_file_pattern, "--input_file_pattern is required"
    assert FLAGS.train_dir, "--train_dir is required"
    model_config = configuration.ModelConfig()
    model_config.input_file_pattern = FLAGS.input_file_pattern
    # Create training directory.
    train_dir = FLAGS.train_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)
    # Build the TensorFlow graph.
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = auto_encoder_model.Auto_Encoder_Model(model_config,
                                                      mode="train")
        model.build()
        graph_handler = GraphHandler(model_config, model)
        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            queue_runner = tf.train.start_queue_runners(sess=sess, coord=coord)
            graph_handler.initialize(sess)
            for _ in tqdm(range(1, model_config.num_steps + 1)):
                global_step = sess.run(model.global_step) + 1
                loss, train_op, output = sess.run(
                    [model.total_loss, model.train_op, model.outputs_])
                if global_step % model_config.period == 0:
                    summary_op = tf.summary.merge_all()
                    summaries = sess.run(summary_op)
                    graph_handler.writer.add_summary(summaries, global_step)
                    graph_handler.writer.flush()
                if global_step % model_config.checkpoint == 0:
                    filename = os.path.join(
                        model_config.save_dir,
                        "{}_{}.ckpt".format(model_config.model_name,
                                            global_step))
                    graph_handler.save(sess, filename)
            coord.join(queue_runner)
Ejemplo n.º 11
0
def train(config):
    model = ConvSeq2Seq(config)
    trainer = Trainer(config, model)
    graph_handler = GraphHandler(config)
    sess = tf.Session()
    graph_handler.initialize(sess)

    for i, batch in tqdm(enumerate(get_batch(num_epoch=config.num_epoch))):
        global_step = sess.run(model.global_step) + 1
        loss, acc, summary = trainer.run_step(sess, batch)
        print "global_step: %d,    loss: %f,     acc: %f" % (global_step, loss, acc)

        get_summary = global_step % config.log_period == 0
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        if global_step % config.save_period == 0:
            graph_handler.save_model(sess, global_step)

            if global_step % config.eval_period == 0:
                pass

    if global_step % config.save_period != 0:
        graph_handler.save_model(sess)
Ejemplo n.º 12
0
def train(model_params):

    train_file = os.path.join(FLAGS.data_dir, _TRAIN_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)
    validation_file = os.path.join(FLAGS.data_dir, _VALIDATION_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)
    test_file = os.path.join(FLAGS.data_dir, _TEST_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)

    filepaths = [train_file, validation_file, test_file]

    train_data, validation_data, test_data = utils_file.get_spilited_data(filepaths)

    dicts_file = os.path.join(model_params.dict_dir, 'dicts' + '_' + FLAGS.dataset_type + '.' + _JSON_TAG)

    if FLAGS.use_dicts:
        dicts = utils_file.read_file(dicts_file, _JSON_TAG)

    else:
        dicts = preprocess_data.build_dicts(train_data, model_params.glove_path, FLAGS.dataset_type)
        utils_file.write_file(dicts_file, dicts, _JSON_TAG)

    is_binary = True

    if FLAGS.dataset_type != 'binary':
        is_binary = False

    train_data = dataset.VTTExample(train_data, 'train', model_params, dicts, is_binary)
    valid_data= dataset.VTTExample(validation_data, 'validation', model_params, dicts, is_binary)
    # test_data = dataset.VTTExample(test_data, 'test', model_params, dicts, is_binary)

    emb_mat_token, emb_mat_glove = train_data.emb_mat_token, train_data.emb_mat_glove

    with tf.variable_scope('model') as scope:
        if FLAGS.dataset_type != 'multiple':
            model = Model(emb_mat_token, emb_mat_glove, len(train_data.dicts['token']), len(train_data.dicts['char']),
                    train_data.max_token_size, model_params=model_params, scope=scope.name)

        else:
            model = Model_Selector(emb_mat_token, emb_mat_glove, len(train_data.dicts['token']), len(train_data.dicts['char']),
                    train_data.max_token_size, train_data.max_ans_size, model_params, scope.name)

    model_params.load_model = FLAGS.is_load
    if FLAGS.load_path:
        model_params.load_path = FLAGS.load_path

    graphHandler = GraphHandler(model, model_params)
    evaluator = Evaluator(model, model_params, is_binary)
    perform_recoder = PerformRecoder(model_params)


    gpu_options = tf.GPUOptions()
    graph_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)
    sess = tf.Session(config=graph_config)

    graphHandler.initialize(sess)

    steps_per_epoch = int(math.ceil((train_data.sample_num / model_params.batch_size)))
    num_steps = FLAGS.num_steps or FLAGS.epochs * steps_per_epoch

    global_step = 0

    for sample_batch, batch_num, epoch, batch_idx in train_data.generate_batch_sample(num_steps):
        global_step = sess.run(model.global_step) + 1

        if_get_summary = global_step % (model_params.log_period) == 0
        loss, summary, train_op = model.step(sess, sample_batch, get_summary=if_get_summary)

        if global_step % 100 == 0:
            tf.logging.info('global_steps : %d' % global_step)
            tf.logging.info('loss : %.4f' % loss)

        if if_get_summary:
            graphHandler.add_summary(summary, global_step)

        if global_step % model_params.eval_period == 0 or epoch == FLAGS.epochs:

            train_loss, train_acc, train_dict = evaluator.get_evaluation(sess, train_data, global_step)
            tf.logging.info('train loss : %.4f accuracy %.4f' % (train_loss, train_acc))

            dev_loss, dev_acc, dev_dict = evaluator.get_evaluation(sess, valid_data, global_step)

            tf.logging.info('validation loss : %.4f accuracy %.4f' % (dev_loss, dev_acc))

            is_in_top, deleted_step = perform_recoder.update_top_list(global_step, dev_acc, sess)

            if train_acc - dev_acc > 0.02 :
                break

        if epoch == FLAGS.epochs:
            break
Ejemplo n.º 13
0
def train(config):

    gpu_options = tf.GPUOptions(visible_device_list="2")
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 gpu_options=gpu_options)
    sess_config.gpu_options.allow_growth = True

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.train_eval_file, "r") as fh:
        train_eval_file = json.load(fh)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)
    with open(config.dev_meta, "r") as fh:
        meta = json.load(fh)

    dev_total = meta["total"]
    print("Building model...")
    parser = get_record_parser(config)
    train_dataset = get_batch_dataset(config.train_record_file, parser, config)
    dev_dataset = get_dataset(config.dev_record_file, parser, config)
    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle,
                                                   train_dataset.output_types,
                                                   train_dataset.output_shapes)
    train_iterator = train_dataset.make_one_shot_iterator()
    dev_iterator = dev_dataset.make_one_shot_iterator()

    model = Model(config, iterator, word_mat, char_mat)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    loss_save = 100.0
    patience = 0
    lr = config.init_lr

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        graph_handler.initialize(sess)
        train_handle = sess.run(train_iterator.string_handle())
        dev_handle = sess.run(dev_iterator.string_handle())
        sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool)))
        sess.run(tf.assign(model.lr, tf.constant(lr, dtype=tf.float32)))
        print("Started training")
        for _ in tqdm(range(1, config.num_steps + 1)):
            global_step = sess.run(model.global_step) + 1
            loss, train_op = sess.run([model.loss, model.train_op],
                                      feed_dict={handle: train_handle})
            if global_step % config.period == 0:
                loss_sum = tf.Summary(value=[
                    tf.Summary.Value(tag="model/loss", simple_value=loss),
                ])
                graph_handler.add_summary(loss_sum, global_step)
            if global_step % config.checkpoint == 0:
                sess.run(
                    tf.assign(model.is_train, tf.constant(False,
                                                          dtype=tf.bool)))
                _, summ = evaluate_batch(model, config.val_num_batches,
                                         train_eval_file, sess, "train",
                                         handle, train_handle)
                for s in summ:
                    graph_handler.add_summary(s, global_step)
                metrics, summ = evaluate_batch(
                    model, dev_total // config.batch_size + 1, dev_eval_file,
                    sess, "dev", handle, dev_handle)
                sess.run(
                    tf.assign(model.is_train, tf.constant(True,
                                                          dtype=tf.bool)))

                dev_loss = metrics["loss"]
                if dev_loss < loss_save:
                    loss_save = dev_loss
                    patience = 0
                else:
                    patience += 1
                if patience >= config.patience:
                    lr /= 2.0
                    loss_save = dev_loss
                    patience = 0
                sess.run(tf.assign(model.lr, tf.constant(lr,
                                                         dtype=tf.float32)))
                graph_handler.add_summaries(summ, global_step)
                graph_handler.writer.flush()
                filename = os.path.join(
                    config.save_dir,
                    "{}_{}.ckpt".format(config.model_name, global_step))
                graph_handler.save(sess, filename)
Ejemplo n.º 14
0
def _train(config):
    word2idx = Counter(
        json.load(
            open(
                "data/{}/word2idx_{}.json".format(config.data_from,
                                                  config.data_from),
                "r"))["word2idx"])
    vocab_size = len(word2idx)
    print("vocab_size", vocab_size)
    word2vec = Counter(
        json.load(
            open(
                "data/{}/word2vec_{}.json".format(config.data_from,
                                                  config.pretrain_from),
                "r"))["word2vec"])
    # word2vec = {} if config.debug or config.load  else get_word2vec(config, word2idx)
    idx2vec = {
        word2idx[word]: vec
        for word, vec in word2vec.items() if word in word2idx and word != "UNK"
    }
    unk_embedding = np.random.multivariate_normal(
        np.zeros(config.word_embedding_size),
        np.eye(config.word_embedding_size))
    config.emb_mat = np.array([
        idx2vec[idx] if idx in idx2vec else unk_embedding
        for idx in range(vocab_size)
    ])
    config.vocab_size = vocab_size
    print("emb_mat:", config.emb_mat.shape)

    train_dict, test_dict = {}, {}
    if os.path.exists("data/{}/{}_{}.json".format(config.data_from,
                                                  config.data_from, "train")):
        train_dict = json.load(
            open(
                "data/{}/{}_{}.json".format(config.data_from, config.data_from,
                                            "train"), "r"))
    if os.path.exists("data/{}/{}_{}.json".format(config.data_from,
                                                  config.data_from, "test")):
        test_dict = json.load(
            open(
                "data/{}/{}_{}.json".format(config.data_from, config.data_from,
                                            "test"), "r"))
    # check

    if config.data_from == "reuters":
        train_data = DataSet(train_dict,
                             "train") if len(train_dict) > 0 else read_reuters(
                                 config, data_type="train", word2idx=word2idx)
        dev_data = DataSet(test_dict,
                           "test") if len(test_dict) > 0 else read_reuters(
                               config, data_type="test", word2idx=word2idx)
    elif config.data_from == "20newsgroup":
        train_data = DataSet(train_dict,
                             "train") if len(train_dict) > 0 else read_news(
                                 config, data_type="train", word2idx=word2idx)
        dev_data = DataSet(test_dict,
                           "test") if len(test_dict) > 0 else read_news(
                               config, data_type="test", word2idx=word2idx)

    config.train_size = train_data.get_data_size()
    config.dev_size = dev_data.get_data_size()
    print("train/dev:", config.train_size, config.dev_size)
    if config.max_docs_length > 2000: config.max_docs_length = 2000
    pprint(config.__flags, indent=2)
    model = get_model(config)
    graph_handler = GraphHandler(config, model)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_batches = config.num_batches or int(
        math.ceil(
            train_data.num_examples / config.batch_size)) * config.num_epochs
    global_step = 0

    dev_evaluate = Evaluator(config, model)

    for batch in tqdm(train_data.get_batches(config.batch_size,
                                             num_batches=num_batches,
                                             shuffle=True,
                                             cluster=config.cluster),
                      total=num_batches):
        batch_idx, batch_ds = batch
        '''
    if config.debug:
      for key, value in batch_ds.data.items():
        if not key.startswith("x"):
          print(key, value)
      continue
    '''
        global_step = sess.run(model.global_step) + 1
        # print("global_step:", global_step)
        get_summary = global_step % config.log_period
        feed_dict = model.get_feed_dict(batch, config)
        logits, y, y_len, loss, summary, train_op = sess.run(
            [
                model.logits, model.y, model.y_seq_length, model.loss,
                model.summary, model.train_op
            ],
            feed_dict=feed_dict)
        #print("logits:", logits[0:3], y[0:3], y_len[0:3], logits.shape, y.shape, y_len.shape)
        print("loss:", loss)
        if get_summary:
            graph_handler.add_summary(summary, global_step)
        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)
        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            #config.test_batch_size = config.dev_size/3
            num_steps = math.ceil(dev_data.num_examples /
                                  config.test_batch_size)
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            # print("num_steps:", num_steps)
            e_dev = dev_evaluate.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_batches(config.test_batch_size,
                                          num_batches=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)
Ejemplo n.º 15
0
def _train(config):
    np.set_printoptions(threshold=np.inf)
    train_data = read_data(config, 'train', config.load)
    dev_data = read_data(config, 'dev', True)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    def make_idx2word():
        """
        return index of the word from the preprocessed dictionary. 
        """
        idx2word = {}
        d = train_data.shared['word2idx']
        for word, idx in d.items():
            print(word)
            idx2word[idx] = word
        if config.use_glove_for_unk:
            d2 = train_data.shared['new_word2idx']
            for word, idx in d2.items():
                print(word)
                idx2word[idx + len(d)] = word
        return idx2word

    idx2word = make_idx2word()
    # Save total number of words used in this dictionary: words in GloVe + etc tokens(including UNK, POS, ... etc)
    print("size of config.id2word len:", len(idx2word))
    print("size of config.total_word_vocab_size:",
          config.total_word_vocab_size)

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUEvaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    min_val = {}
    min_val['loss'] = 100.0
    min_val['acc'] = 0
    min_val['step'] = 0
    min_val['patience'] = 0

    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))

            # num_steps: total steps to finish this training session.
            # val_num_batches: 100
            if 0 < config.val_num_batches < num_steps:
                # if config.val_num_batches is less the the actual steps required to run whole dev set. Run evaluation up to the step.
                num_steps = config.val_num_batches

            # This train loss is calulated from sampling the same number of data size of dev_data.

            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)

            # This e_dev may differ from the dev_set used in test time because some data is filtered out here.
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)
            print("%s e_train: loss=%.4f" % (header, e_train.loss))
            print("%s e_dev: loss=%.4f" % (header, e_dev.loss))
            print()
            if min_val['loss'] > e_dev.loss:
                min_val['loss'] = e_dev.loss
                min_val['step'] = global_step
                min_val['patience'] = 0
            else:
                min_val['patience'] = min_val['patience'] + 1
                if min_val['patience'] >= 1000:
                    slack.notify(
                        text="%s patience reached %d. early stopping." %
                        (header, min_val['patience']))
                    break

            slack.notify(text="%s e_dev: loss=%.4f" % (header, e_dev.loss))

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)

    slack.notify(
        text=
        "%s <@U024BE7LH|insikk> Train is finished. e_dev: loss=%.4f at step=%d\nPlease assign another task to get more research result"
        % (header, min_val['loss'], min_val['step']))

    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
Ejemplo n.º 16
0
def _train(config):
  word2idx = Counter(json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"])
  idx2word = json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["idx2word"]
  assert len(word2idx) == len(idx2word)
  for i in range(10):  assert word2idx[idx2word[i]] == i
  vocab_size = len(word2idx)
  print("vocab_size", vocab_size, idx2word[:10])
  word2vec = Counter(json.load(open("../data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"])
  # word2vec = {} if config.debug or config.load  else get_word2vec(config, word2idx)
  idx2vec = {word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx}
  print("no unk words:", len(idx2vec))

  unk_embedding = np.random.multivariate_normal(np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size))
  config.emb_mat = np.array([idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size)])
  config.vocab_size = vocab_size
  print("emb_mat:", config.emb_mat.shape)
  test_type = "test"
  if config.data_from == "ice":
    test_type = "dev"
  else:
    test_type = "test"

  train_dict, test_dict = {}, {}
  ice_flat = ""
  if config.data_from == "ice" and config.model_name.endswith("flat"):
    ice_flat = "_flat"
  if os.path.exists("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, "train", ice_flat, config.clftype)):
    train_dict = json.load(open("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, "train", ice_flat, config.clftype), "r"))
  if os.path.exists("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, test_type, ice_flat, config.clftype)):
    test_dict = json.load(open("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, test_type, ice_flat, config.clftype), "r"))

  # check
  for key, val in train_dict.items():
    if isinstance(val[0], list) and len(val[0])>10: print(key, val[0][:50])
    else: print(key, val[0:4])
  print("train:", len(train_dict))
  print("test:", len(test_dict))
  if config.data_from == "reuters":
    train_data = DataSet(train_dict, "train") if len(train_dict)>0 else read_reuters(config, data_type="train", word2idx=word2idx)
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_reuters(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "20newsgroup":
    train_data = DataSet(train_dict, "train") if len(train_dict)>0 else read_news(config, data_type="train", word2idx=word2idx)
    dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_news(config, data_type="test", word2idx=word2idx)
  elif config.data_from == "ice":
    train_data = DataSet(train_dict, "train")
    dev_data = DataSet(test_dict, "dev")

  config.train_size = train_data.get_data_size()
  config.dev_size = dev_data.get_data_size()
  print("train/dev:", config.train_size, config.dev_size)

  # calculate doc length
  # TO CHECK
  avg_len = 0
  for d_l in train_dict["x_len"]:
    avg_len += d_l/config.train_size
  print("avg_len at train:", avg_len)

  if config.max_docs_length > 2000:  config.max_docs_length = 2000
  pprint(config.__flags, indent=2)
  model = get_model(config)
  trainer = Trainer(config, model)
  graph_handler = GraphHandler(config, model)
  sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
  graph_handler.initialize(sess)

  num_batches = config.num_batches or int(math.ceil(train_data.num_examples / config.batch_size)) * config.num_epochs
  global_step = 0

  dev_evaluate = Evaluator(config, model)

  best_f1 = 0.50
  for batch in tqdm(train_data.get_batches(config.batch_size, num_batches=num_batches, shuffle=True, cluster=config.cluster), total=num_batches):
    global_step = sess.run(model.global_step) + 1
    # print("global_step:", global_step)
    get_summary = global_step % config.log_period
    loss, summary, train_op = trainer.step(sess, batch, get_summary)

    if get_summary:
      graph_handler.add_summary(summary, global_step)
    # occasional saving
    # if global_step % config.save_period == 0 :
    #  graph_handler.save(sess, global_step=global_step)
    if not config.eval:
      continue
    # Occasional evaluation
    if global_step % config.eval_period == 0:
      #config.test_batch_size = config.dev_size/3
      num_steps = math.ceil(dev_data.num_examples / config.test_batch_size)
      if 0 < config.val_num_batches < num_steps:
        num_steps = config.val_num_batches
      # print("num_steps:", num_steps)
      e_dev = dev_evaluate.get_evaluation_from_batches(
        sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps))
      if e_dev.fv > best_f1:
        best_f1 = e_dev.fv
        #if global_step % config.save_period == 0:
        graph_handler.save(sess, global_step=global_step)
      graph_handler.add_summaries(e_dev.summaries, global_step)
  print("f1:", best_f1)
Ejemplo n.º 17
0
def _train(config):
    train_data = read_data(config, 'val_train', config.load)
    dev_data = read_data(config, 'val_val', True)
    # test = read_data(config, 'test', True)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                        else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                        for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = AccuracyEvaluator(config.train_num_can, config, model,
                                  tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config,
                                 model)  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    best_dev=[0,0]

    for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
                                                     num_steps=num_steps, shuffle=False, cluster=config.cluster), total=num_steps):
        global_step = sess.run(model.global_step) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)

        if get_summary:
            graph_handler.add_summary(summary, global_step)


        if not config.eval:
            continue

        if global_step % config.eval_period == 0:

            num_steps_dev = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
            num_steps_train = math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))


            e_train = evaluator.get_evaluation_from_batches(
                sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_train),
                           total=num_steps_train)
            )
            # graph_handler.add_summaries(e_test.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_dev),
                           total=num_steps_dev))
            # graph_handler.dump_eval(e)
            # graph_handler.add_summaries(e_dev.summaries, global_step)
            print('train step:{}  loss:{}  acc:{}'.format(global_step, e_train.loss, e_train.acc))
            print('val step:{}  loss:{}  acc:{}'.format(global_step, e_dev.loss, e_dev.acc))
            # print('w_s:{}'.format(w_s))
            if global_step > 700:
                 config.save_period = 50
                 config.eval_period = 50

            if best_dev[0] < e_dev.acc:
                best_dev=[e_dev.acc,global_step,e_train.acc]
                graph_handler.save(sess, global_step=global_step)



            # if config.dump_eval:
            #     graph_handler.dump_eval(e_dev)

    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
    print (best_dev)
    print ("you can test on test data set and set load setp is {}".format(best_dev[1]))
Ejemplo n.º 18
0
        all_fixed_noise_samples = Generator(BATCH_SIZE,
                                            labels,
                                            noise=fixed_noise)

        def generate_image(iteration):
            samples = session.run(all_fixed_noise_samples)
            samples = ((samples + 1.) * (255.99 / 2)).astype('int32')
            lib.save_images.save_images(
                samples.reshape((BATCH_SIZE, 3, 64, 64)),
                'samples_{}.png'.format(iteration))

        coord = tf.train.Coordinator()
        queue_runner = tf.train.start_queue_runners(sess=session, coord=coord)
        # Train loop
        graph_handler = GraphHandler(config)
        graph_handler.initialize(session)
        session.run(tf.global_variables_initializer())
        # 训练
        session.run(tf.assign(is_train, tf.constant(True, dtype=tf.bool)))

        for _ in range(ITERS):
            start_time = time.time()
            iteration = session.run(global_step) + 1
            # Train generator
            if iteration > 0:
                _ = session.run(gen_train_op)
            # Train critic
            # model hot start
            if (MODE == 'dcgan') or (MODE == 'lsgan'):
                disc_iters = 1
            else: