Ejemplo n.º 1
0
def get_data_loader(args):
    data_loader_options = {
        'model_type': 'translation',
        'source_file': args.source_file,
        'target_file': args.target_file,
        'bucket_quant': args.bucket_quant,
    }
    dl = data_loader.Data_Loader(data_loader_options)
    return dl
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='Learning Rate')
    parser.add_argument('--sample_every',
                        type=int,
                        default=500,
                        help='Sample generator output evry x steps')
    parser.add_argument('--summary_every',
                        type=int,
                        default=50,
                        help='Sample generator output evry x steps')
    parser.add_argument('--save_model_every',
                        type=int,
                        default=1500,
                        help='Save model every')
    parser.add_argument('--sample_size',
                        type=int,
                        default=300,
                        help='Sampled output size')
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='Sample from top k predictions')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--text_dir',
                        type=str,
                        default='Data/generator_training_data',
                        help='Directory containing text files')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')
    parser.add_argument('--seed',
                        type=str,
                        default='All',
                        help='Seed for text generation')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )
    config = model_config.predictor_config

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.text_dir
    })
    text_samples, vocab = dl.load_generator_data(config['sample_size'])
    print text_samples.shape

    model_options = {
        'vocab_size': len(vocab),
        'residual_channels': config['residual_channels'],
        'dilations': config['dilations'],
        'filter_width': config['filter_width'],
    }

    generator_model = generator.ByteNet_Generator(model_options)
    generator_model.build_model()

    optim = tf.train.AdamOptimizer(
        args.learning_rate, beta1=args.beta1).minimize(generator_model.loss)

    generator_model.build_generator(reuse=True)
    merged_summary = tf.summary.merge_all()

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    shutil.rmtree('Data/tb_summaries/generator_model')
    train_writer = tf.summary.FileWriter('Data/tb_summaries/generator_model',
                                         sess.graph)

    step = 0
    for epoch in range(args.max_epochs):
        batch_no = 0
        batch_size = args.batch_size
        while (batch_no + 1) * batch_size < text_samples.shape[0]:

            start = time.clock()

            text_batch = text_samples[batch_no * batch_size:(batch_no + 1) *
                                      batch_size, :]
            _, loss, prediction = sess.run([
                optim, generator_model.loss, generator_model.arg_max_prediction
            ],
                                           feed_dict={
                                               generator_model.t_sentence:
                                               text_batch
                                           })
            end = time.clock()
            print "-------------------------------------------------------"
            print "LOSS: {}\tEPOCH: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".format(
                loss, epoch, batch_no, step,
                text_samples.shape[0] / args.batch_size)
            print "TIME FOR BATCH", end - start
            print "TIME FOR EPOCH (mins)", (end - start) * (
                text_samples.shape[0] / args.batch_size) / 60.0

            batch_no += 1
            step += 1

            if step % args.summary_every == 0:
                [summary] = sess.run(
                    [merged_summary],
                    feed_dict={generator_model.t_sentence: text_batch})
                train_writer.add_summary(summary, step)
                print dl.inidices_to_string(prediction, vocab)

            print "********************************************************"

            if step % args.sample_every == 0:
                seed_sentence = np.array(
                    [dl.string_to_indices(args.seed, vocab)], dtype='int32')

                for col in range(args.sample_size):
                    [probs] = sess.run([generator_model.g_probs],
                                       feed_dict={
                                           generator_model.seed_sentence:
                                           seed_sentence
                                       })

                    curr_preds = []
                    for bi in range(probs.shape[0]):
                        pred_word = utils.sample_top(probs[bi][-1],
                                                     top_k=args.top_k)
                        curr_preds.append(pred_word)

                    seed_sentence = np.insert(seed_sentence,
                                              seed_sentence.shape[1],
                                              curr_preds,
                                              axis=1)
                    print col, dl.inidices_to_string(seed_sentence[0], vocab)

                f = open('Data/generator_sample.txt', 'wb')
                f.write(dl.inidices_to_string(seed_sentence[0], vocab))
                f.close()

            if step % args.save_model_every == 0:
                save_path = saver.save(
                    sess,
                    "Data/Models/generation_model/model_epoch_{}_{}.ckpt".
                    format(epoch, step))
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )

    config = model_config.predictor_config

    model_options = {
        'n_source_quant': config['n_source_quant'],
        'n_target_quant': config['n_target_quant'],
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'sample_size': config['sample_size'],
        'decoder_filter_width': config['decoder_filter_width'],
        'batch_size': args.batch_size,
    }

    byte_net = model.Byte_net_model(model_options)
    bn_tensors = byte_net.build_prediction_model()

    optim = tf.train.AdamOptimizer(args.learning_rate,
                                   beta1=args.beta1).minimize(
                                       bn_tensors['loss'],
                                       var_list=bn_tensors['variables'])

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.data_dir
    })
    text_samples = dl.load_generator_data(config['sample_size'])
    print text_samples.shape

    for i in range(args.max_epochs):
        batch_no = 0
        batch_size = args.batch_size
        while (batch_no + 1) * batch_size < text_samples.shape[0]:
            text_batch = text_samples[batch_no * batch_size:(batch_no + 1) *
                                      batch_size, :]
            _, loss, prediction = sess.run(
                [optim, bn_tensors['loss'], bn_tensors['prediction']],
                feed_dict={bn_tensors['sentence']: text_batch})
            print "-------------------------------------------------------"
            print utils.list_to_string(prediction)
            print "Loss", i, batch_no, loss
            print "********************************************************"
            # print prediction
            batch_no += 1

            if (batch_no % 500) == 0:
                save_path = saver.save(
                    sess, "Data/Models/model_epoch_{}.ckpt".format(i))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')
    parser.add_argument('--log_dir',
                        type=str,
                        default='logs',
                        help='Path to TensorBoard logs')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )

    config = model_config.predictor_config

    model_options = {
        'n_source_quant': config['n_source_quant'],
        'n_target_quant': config['n_target_quant'],
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'sample_size': config['sample_size'],
        'decoder_filter_width': config['decoder_filter_width'],
        'batch_size': args.batch_size,
    }

    byte_net = model.Byte_net_model(model_options)
    bn_tensors = byte_net.build_prediction_model()

    # Set up logging for TensorBoard
    writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    optim = tf.train.AdamOptimizer(args.learning_rate, beta1 = args.beta1) \
     .minimize(bn_tensors['loss'], var_list=bn_tensors['variables'])

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.data_dir
    })
    text_samples = dl.load_generator_data(config['sample_size'])
    print(text_samples.shape)

    models_path = "Data/Models/"
    if not os.path.exists(models_path): os.makedirs(models_path)

    for epoch in range(args.max_epochs):
        step = 0
        batch_size = args.batch_size
        while (step + 1) * batch_size < text_samples.shape[0]:
            text_batch = text_samples[step * batch_size:(step + 1) *
                                      batch_size, :]
            _, summary, loss, prediction = sess.run([
                optim, summaries, bn_tensors['loss'], bn_tensors['prediction']
            ],
                                                    feed_dict={
                                                        bn_tensors['sentence']:
                                                        text_batch
                                                    })

            print("-------------------------------------------------------")
            print(utils.list_to_string(prediction))
            print("Epoch", epoch, "  Step", step, "  Loss", loss)
            print("********************************************************")

            writer.add_summary(summary, step)
            writer.add_run_metadata(
                run_metadata, 'epoch_{:04d}, step_{:04d}'.format(epoch, step))

            step += 1

            if step % 500 == 0:
                saver.save(sess,
                           models_path + "model_epoch_{}.ckpt".format(epoch))
Ejemplo n.º 5
0
                    help='choose which split of data to use '
                    '(`train` or `valid`)')
parser.add_argument('--num_layers', type=int, default=15, help='num of layers')

args = parser.parse_args()

model_path = args.model_path

data_loader_options = {
    'model_type': 'translation',
    'source_file': args.source_file,
    'target_file': args.target_file,
    'bucket_quant': args.bucket_quant,
}

dl = data_loader.Data_Loader(data_loader_options, split=args.split, vocab=None)
buckets, source_vocab, target_vocab = dl.load_translation_data()
config = model_config.translator_config

model_options = {
    'source_vocab_size': len(source_vocab),
    'target_vocab_size': len(target_vocab),
    'residual_channels': config['residual_channels'],
    'decoder_dilations': config['decoder_dilations'],
    'encoder_dilations': config['encoder_dilations'],
    'decoder_filter_width': config['decoder_filter_width'],
    'encoder_filter_width': config['encoder_filter_width'],
    'layer_norm': config['layer_norm']
}

translator_model = translator.ByteNet_Translator(model_options)
Ejemplo n.º 6
0
def main():
    args, config = get_args_and_config()

    source_sentence = None
    with open('Data/MachineTranslation/news-commentary-v11.de-en.de') as f:
        source_sentences = f.read().decode("utf-8").split('\n')

    with open('Data/MachineTranslation/news-commentary-v11.de-en.en') as f:
        target_sentences = f.read().decode("utf-8").split('\n')

    idx = 0
    for i in range(len(source_sentences)):
        if 'NEW YORK' in target_sentences[i][0:40]:
            print(target_sentences[i])
            idx = i
            break

    source_sentences = source_sentences[idx:idx + 1]
    target_sentences = target_sentences[idx:idx + 1]

    print(source_sentences)
    print(target_sentences)

    data_loader_options = {
        'model_type': 'translation',
        'source_file': 'Data/MachineTranslation/news-commentary-v11.de-en.de',
        'target_file': 'Data/MachineTranslation/news-commentary-v11.de-en.en',
        'bucket_quant': 25,
    }

    dl = data_loader.Data_Loader(data_loader_options)
    # buckets, source_vocab, target_vocab, frequent_keys = dl.load_translation_data()

    source, target = prepare_source_target_arrays(args, dl, source_sentences)

    model_options = {
        'n_source_quant': len(dl.source_vocab),
        'n_target_quant': len(dl.target_vocab),
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'encoder_dilations': config['encoder_dilations'],
        'sample_size': 10,
        'decoder_filter_width': config['decoder_filter_width'],
        'encoder_filter_width': config['encoder_filter_width'],
        'batch_size': 1,
        'source_mask_chars': [dl.source_vocab['padding']],
        'target_mask_chars': [dl.target_vocab['padding']]
    }

    byte_net = model.Byte_net_model(model_options)
    translator = byte_net.build_translation_model(args.translator_max_length)

    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    saver.restore(sess, args.model_path)

    input_batch = target
    print("INPUT", input_batch)
    print("Source", source)

    for i in range(0, 1000):
        prediction, probs = sess.run(
            [translator['prediction'], translator['probs']],
            feed_dict={
                translator['source_sentence']: source,
                translator['target_sentence']: input_batch,
            })
        # prediction = prediction[0]
        last_prediction = np.array([utils.weighted_pick(probs[i])])
        last_prediction = last_prediction.reshape([1, -1])

        input_batch[:, i + 1] = last_prediction[:, 0]
        res = dl.inidices_to_string(input_batch[0], dl.target_vocab)
        print("RES")
        print(res)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='sample from top k predictions')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.9,
                        help='hyperpara-Adam')
    parser.add_argument('--datapath',
                        type=str,
                        default="Data/coldrec/rec50_pretrain.csv",
                        help='data path')
    parser.add_argument('--save_dir',
                        type=str,
                        default="Models/coldrec_baseline_4_emb64_bs256",
                        help='save dir path')
    parser.add_argument('--eval_iter',
                        type=int,
                        default=1000,
                        help='sample generator output evry x steps')
    parser.add_argument('--early_stop',
                        type=int,
                        default=10,
                        help='after x step early stop')
    parser.add_argument('--step',
                        type=int,
                        default=400000,
                        help='trainging step')
    parser.add_argument('--tt_percentage',
                        type=float,
                        default=0.2,
                        help='0.2 means 80% training 20% testing')
    parser.add_argument('--data_ratio',
                        type=float,
                        default=1,
                        help='real trainging data')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--L2',
                        type=float,
                        default=0.001,
                        help='L2 regularization')
    parser.add_argument('--dilation_count',
                        type=int,
                        default=4,
                        help='dilation count number')
    parser.add_argument('--method',
                        type=str,
                        default="from_scratch",
                        help='from_scratch, random_init, stack')
    parser.add_argument('--load_model',
                        type=ast.literal_eval,
                        default=False,
                        help='whether loading pretrain model')
    parser.add_argument('--copy_softmax',
                        type=ast.literal_eval,
                        default=True,
                        help='whether copying softmax param')
    parser.add_argument('--copy_layernorm',
                        type=ast.literal_eval,
                        default=True,
                        help='whether copying layernorm param')
    parser.add_argument('--model_path',
                        type=str,
                        default="Models/",
                        help='load model path')
    parser.add_argument('--padid', type=int, default=0, help='pad id')
    args = parser.parse_args()

    print(args)

    dl = data_loader.Data_Loader({
        'dir_name': args.datapath,
        'padid': args.padid
    })
    all_samples = dl.item
    print(all_samples.shape)
    items = dl.item_dict
    print("len(items)", len(items))

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(all_samples)))
    all_samples = all_samples[shuffle_indices]

    # Split train/test set
    dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples)))
    train_set, valid_set = all_samples[:dev_sample_index], all_samples[
        dev_sample_index:]

    random.seed(10)
    ratio = args.data_ratio
    train_set_len = len(train_set)
    train_index_set = set(list(range(train_set_len)))

    if ratio == 0.2:
        train_ratio = int(ratio * float(train_set_len))
        real_train_index_set = random.sample(list(train_index_set),
                                             train_ratio)
        real_train_set = train_set[real_train_index_set]
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.4:
        last_ratio = ratio - 0.2
        last_train_ratio = int(last_ratio * float(train_set_len))
        last_train_index_set = random.sample(list(train_index_set),
                                             last_train_ratio)
        last_train_set = train_set[last_train_index_set]

        remain_train_index_set = train_index_set - set(last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 4.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((last_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.6:
        last_last_ratio = ratio - 0.2 - 0.2
        last_last_train_ratio = int(last_last_ratio * float(train_set_len))
        last_last_train_index_set = random.sample(list(train_index_set),
                                                  last_last_train_ratio)
        last_last_train_set = train_set[last_last_train_index_set]

        remain_train_index_set = train_index_set - set(
            last_last_train_index_set)
        remain_len = len(remain_train_index_set)
        last_train_index_set = random.sample(
            list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len)))
        last_train_set = train_set[last_train_index_set]
        real_train_set = np.concatenate((last_last_train_set, last_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 3.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.8:
        last_last_ratio = ratio - 0.2 - 0.2 - 0.2
        last_last_train_ratio = int(last_last_ratio * float(train_set_len))
        last_last_train_index_set = random.sample(list(train_index_set),
                                                  last_last_train_ratio)
        last_last_train_set = train_set[last_last_train_index_set]

        remain_train_index_set = train_index_set - set(
            last_last_train_index_set)
        remain_len = len(remain_train_index_set)
        last_train_index_set = random.sample(
            list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len)))
        last_train_set = train_set[last_train_index_set]
        real_train_set = np.concatenate((last_last_train_set, last_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 3.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]
        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            new_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 2.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 1:
        train_set = np.array(train_set)
        print("real train len", len(train_set))
    else:
        train_ratio = int(ratio * float(train_set_len))
        real_train_index_set = random.sample(list(train_index_set),
                                             train_ratio)
        real_train_set = train_set[real_train_index_set]
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))

    model_para = {
        'item_size': len(items),
        'dilated_channels': 64,
        'dilations': [1, 4] * args.dilation_count,
        'step': args.step,
        'kernel_size': 3,
        'learning_rate': args.learning_rate,
        'L2': args.L2,
        'batch_size': 256,
        'load_model': args.load_model,
        'model_path': args.model_path,
        'copy_softmax': args.copy_softmax,
        'copy_layernorm': args.copy_layernorm,
        'method': args.method
    }

    print(model_para)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    itemrec = generator_deep.NextItNet_Decoder(model_para)
    itemrec.train_graph()
    optimizer = tf.train.AdamOptimizer(model_para['learning_rate'],
                                       beta1=args.beta1).minimize(itemrec.loss)
    itemrec.predict_graph(reuse=True)

    tf.add_to_collection("dilate_input", itemrec.dilate_input)
    tf.add_to_collection("context_embedding", itemrec.context_embedding)

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    saver = tf.train.Saver(max_to_keep=1)

    #writer=tf.summary.FileWriter('./stack_graph',sess.graph)

    numIters = 1
    max_mrr = 0
    break_stick = 0
    early_stop = 0
    while (1):
        if break_stick == 1:
            break

        batch_no = 0
        batch_size = model_para['batch_size']

        while (batch_no + 1) * batch_size < train_set.shape[0]:

            start = time.time()

            item_batch = train_set[batch_no * batch_size:(batch_no + 1) *
                                   batch_size, :]
            _, loss = sess.run([optimizer, itemrec.loss],
                               feed_dict={itemrec.itemseq_input: item_batch})
            end = time.time()
            if numIters % args.eval_iter == 0:
                print(
                    "-------------------------------------------------------train"
                )
                print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".
                      format(loss, batch_no, numIters,
                             train_set.shape[0] / batch_size))
                print("TIME FOR BATCH", end - start)
                print("TIME FOR EPOCH (mins)",
                      (end - start) * (train_set.shape[0] / batch_size) / 60.0)

            batch_no += 1

            if numIters % args.eval_iter == 0:
                print(
                    "-------------------------------------------------------test"
                )
                batch_no_test = 0
                batch_size_test = batch_size * 1
                curr_preds_5 = []
                rec_preds_5 = []
                ndcg_preds_5 = []
                curr_preds_10 = []
                rec_preds_10 = []
                ndcg_preds_10 = []
                while (batch_no_test +
                       1) * batch_size_test < valid_set.shape[0]:
                    item_batch = valid_set[batch_no_test *
                                           batch_size_test:(batch_no_test +
                                                            1) *
                                           batch_size_test, :]
                    [probs_10, probs_5] = sess.run(
                        [itemrec.top_10, itemrec.top_5],
                        feed_dict={itemrec.input_predict: item_batch})
                    #print(probs_10[1].shape) #(256,1,10)
                    for bi in range(batch_size_test):
                        pred_items_10 = probs_10[1][bi][-1]
                        pred_items_5 = probs_5[1][bi][-1]

                        true_item = item_batch[bi][-1]
                        predictmap_5 = {
                            ch: i
                            for i, ch in enumerate(pred_items_5)
                        }
                        pred_items_10 = {
                            ch: i
                            for i, ch in enumerate(pred_items_10)
                        }

                        rank_5 = predictmap_5.get(true_item)
                        rank_10 = pred_items_10.get(true_item)
                        if rank_5 == None:
                            curr_preds_5.append(0.0)
                            rec_preds_5.append(0.0)
                            ndcg_preds_5.append(0.0)
                        else:
                            MRR_5 = 1.0 / (rank_5 + 1)
                            Rec_5 = 1.0
                            ndcg_5 = 1.0 / math.log(rank_5 + 2, 2)
                            curr_preds_5.append(MRR_5)
                            rec_preds_5.append(Rec_5)
                            ndcg_preds_5.append(ndcg_5)
                        if rank_10 == None:
                            curr_preds_10.append(0.0)
                            rec_preds_10.append(0.0)
                            ndcg_preds_10.append(0.0)
                        else:
                            MRR_10 = 1.0 / (rank_10 + 1)
                            Rec_10 = 1.0
                            ndcg_10 = 1.0 / math.log(rank_10 + 2, 2)
                            curr_preds_10.append(MRR_10)
                            rec_preds_10.append(Rec_10)
                            ndcg_preds_10.append(ndcg_10)

                    batch_no_test += 1

                mrr = sum(curr_preds_5) / float(len(curr_preds_5))
                mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10))
                hit = sum(rec_preds_5) / float(len(rec_preds_5))
                hit_10 = sum(rec_preds_10) / float(len(rec_preds_10))
                ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5))
                ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10))

                if mrr > max_mrr:
                    max_mrr = mrr

                    print("Save model!  mrr_5:", mrr)
                    print("Save model!  mrr_10:", mrr_10)
                    print("Save model!  hit_5:", hit)
                    print("Save model!  hit_10:", hit_10)
                    print("Save model!  ndcg_5:", ndcg)
                    print("Save model!  ndcg_10:", ndcg_10)
                    early_stop = 0
                    saver.save(
                        sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format(
                            args.dilation_count, args.learning_rate,
                            args.data_ratio, args.step))
                else:
                    print("mrr_5:", mrr)
                    print("mrr_10:", mrr_10)
                    print("hit_5:", hit)
                    print("hit_10:", hit_10)
                    print("ndcg_5:", ndcg)
                    print("ndcg_10:", ndcg_10)
                    early_stop += 1

            if numIters >= model_para['step']:
                break_stick = 1
                break
            if early_stop >= args.early_stop:
                break_stick = 1
                print("early stop!")
                break

            numIters += 1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=8,
                        help='Learning Rate')
    parser.add_argument('--bucket_quant',
                        type=int,
                        default=50,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument(
        '--source_file',
        type=str,
        default='Data/MachineTranslation/news-commentary-v11.de-en.de',
        help='Source File')
    parser.add_argument(
        '--target_file',
        type=str,
        default='Data/MachineTranslation/news-commentary-v11.de-en.en',
        help='Target File')
    parser.add_argument('--sample_every',
                        type=int,
                        default=500,
                        help='Sample generator output evry x steps')
    parser.add_argument('--summary_every',
                        type=int,
                        default=50,
                        help='Sample generator output evry x steps')
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='Sample from top k predictions')
    parser.add_argument('--resume_from_bucket',
                        type=int,
                        default=0,
                        help='Resume From Bucket')
    args = parser.parse_args()

    data_loader_options = {
        'model_type': 'translation',
        'source_file': args.source_file,
        'target_file': args.target_file,
        'bucket_quant': args.bucket_quant,
    }

    dl = data_loader.Data_Loader(data_loader_options)
    buckets, source_vocab, target_vocab = dl.load_translation_data()
    print "Number Of Buckets", len(buckets)

    config = model_config.translator_config
    model_options = {
        'source_vocab_size': len(source_vocab),
        'target_vocab_size': len(target_vocab),
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'encoder_dilations': config['encoder_dilations'],
        'decoder_filter_width': config['decoder_filter_width'],
        'encoder_filter_width': config['encoder_filter_width'],
    }

    translator_model = translator.ByteNet_Translator(model_options)
    translator_model.build_model()

    optim = tf.train.AdamOptimizer(
        args.learning_rate, beta1=args.beta1).minimize(translator_model.loss)

    translator_model.build_translator(reuse=True)
    merged_summary = tf.summary.merge_all()

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    shutil.rmtree('Data/tb_summaries/translator_model')
    train_writer = tf.summary.FileWriter('Data/tb_summaries/translator_model',
                                         sess.graph)

    bucket_sizes = [bucket_size for bucket_size in buckets]
    bucket_sizes.sort()

    step = 0
    batch_size = args.batch_size
    for epoch in range(args.max_epochs):
        for bucket_size in bucket_sizes:
            if epoch == 0 and bucket_size < args.resume_from_bucket:
                continue

            batch_no = 0
            while (batch_no + 1) * batch_size < len(buckets[bucket_size]):
                start = time.clock()
                source, target = dl.get_batch_from_pairs(
                    buckets[bucket_size][batch_no * batch_size:(batch_no + 1) *
                                         batch_size])

                _, loss, prediction = sess.run(
                    [
                        optim, translator_model.loss,
                        translator_model.arg_max_prediction
                    ],
                    feed_dict={
                        translator_model.source_sentence: source,
                        translator_model.target_sentence: target,
                    })
                end = time.clock()

                print "LOSS: {}\tEPOCH: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}\t bucket_size:{}".format(
                    loss, epoch, batch_no, step,
                    len(buckets[bucket_size]) / args.batch_size, bucket_size)
                print "TIME FOR BATCH", end - start
                print "TIME FOR BUCKET (mins)", (end - start) * (
                    len(buckets[bucket_size]) / args.batch_size) / 60.0

                batch_no += 1
                step += 1

                if step % args.summary_every == 0:
                    [summary] = sess.run(
                        [merged_summary],
                        feed_dict={
                            translator_model.source_sentence: source,
                            translator_model.target_sentence: target,
                        })
                    train_writer.add_summary(summary, step)

                    print "******"
                    print "Source ", dl.inidices_to_string(
                        source[0], source_vocab)
                    print "---------"
                    print "Target ", dl.inidices_to_string(
                        target[0], target_vocab)
                    print "----------"
                    print "Prediction ", dl.inidices_to_string(
                        prediction[0:bucket_size], target_vocab)
                    print "******"

                if step % args.sample_every == 0:
                    log_file = open('Data/translator_sample.txt', 'wb')
                    generated_target = target[:, 0:1]
                    for col in range(bucket_size):
                        [probs] = sess.run(
                            [translator_model.t_probs],
                            feed_dict={
                                translator_model.t_source_sentence:
                                source,
                                translator_model.t_target_sentence:
                                generated_target,
                            })

                        curr_preds = []
                        for bi in range(probs.shape[0]):
                            pred_word = utils.sample_top(probs[bi][-1],
                                                         top_k=args.top_k)
                            curr_preds.append(pred_word)

                        generated_target = np.insert(generated_target,
                                                     generated_target.shape[1],
                                                     curr_preds,
                                                     axis=1)

                        for bi in range(probs.shape[0]):

                            print col, dl.inidices_to_string(
                                generated_target[bi], target_vocab)
                            print col, dl.inidices_to_string(
                                target[bi], target_vocab)
                            print "***************"

                            if col == bucket_size - 1:
                                try:
                                    log_file.write("Predicted: " +
                                                   dl.inidices_to_string(
                                                       generated_target[bi],
                                                       target_vocab) + '\n')
                                    log_file.write(
                                        "Actual Target: " +
                                        dl.inidices_to_string(
                                            target[bi], target_vocab) + '\n')
                                    log_file.write(
                                        "Actual Source: " +
                                        dl.inidices_to_string(
                                            source[bi], source_vocab) +
                                        '\n *******')
                                except:
                                    pass
                                print "***************"
                    log_file.close()

            save_path = saver.save(
                sess,
                "Data/Models/translation_model/model_epoch_{}_{}.ckpt".format(
                    epoch, bucket_size))
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--bucket_quant', type=int, default=50,
                       help='Learning Rate')
    parser.add_argument('--model_path', type=str, default=None,
                       help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--source_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.de',
                       help='Source File')
    parser.add_argument('--target_file', type=str, default='Data/MachineTranslation/news-commentary-v11.de-en.en',
                       help='Target File')
    parser.add_argument('--top_k', type=int, default=5,
                       help='Sample from top k predictions')
    parser.add_argument('--batch_size', type=int, default=16,
                       help='Batch Size')
    parser.add_argument('--bucket_size', type=int, default=None,
                       help='Bucket Size')
    args = parser.parse_args()
    
    data_loader_options = {
        'model_type' : 'translation',
        'source_file' : args.source_file,
        'target_file' : args.target_file,
        'bucket_quant' : args.bucket_quant,
    }

    dl = data_loader.Data_Loader(data_loader_options)
    buckets, source_vocab, target_vocab = dl.load_translation_data()
    print "Number Of Buckets", len(buckets)

    config = model_config.translator_config
    model_options = {
        'source_vocab_size' : len(source_vocab),
        'target_vocab_size' : len(target_vocab),
        'residual_channels' : config['residual_channels'],
        'decoder_dilations' : config['decoder_dilations'],
        'encoder_dilations' : config['encoder_dilations'],
        'decoder_filter_width' : config['decoder_filter_width'],
        'encoder_filter_width' : config['encoder_filter_width'],
    }

    translator_model = translator.ByteNet_Translator( model_options )
    translator_model.build_translator()
    
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.model_path:
        saver.restore(sess, args.model_path)

    
    
    bucket_sizes = [bucket_size for bucket_size in buckets]
    bucket_sizes.sort()

    if not args.bucket_size:
        bucket_size = random.choice(bucket_sizes)
    else:
        bucket_size = args.bucket_size

    source, target = dl.get_batch_from_pairs( 
        random.sample(buckets[bucket_size], args.batch_size)
    )
    
    log_file = open('Data/translator_sample.txt', 'wb')
    generated_target = target[:,0:1]
    for col in range(bucket_size):
        [probs] = sess.run([translator_model.t_probs], 
            feed_dict = {
                translator_model.t_source_sentence : source,
                translator_model.t_target_sentence : generated_target,
            })

        curr_preds = []
        for bi in range(probs.shape[0]):
            pred_word = utils.sample_top(probs[bi][-1], top_k = args.top_k )
            curr_preds.append(pred_word)

        generated_target = np.insert(generated_target, generated_target.shape[1], curr_preds, axis = 1)
        

        for bi in range(probs.shape[0]):

            print col, dl.inidices_to_string(generated_target[bi], target_vocab)
            print col, dl.inidices_to_string(target[bi], target_vocab)
            print "***************"

            if col == bucket_size - 1:
                try:
                    log_file.write("Predicted: " + dl.inidices_to_string(generated_target[bi], target_vocab) + '\n')
                    log_file.write("Actual Target: " + dl.inidices_to_string(target[bi], target_vocab) + '\n')
                    log_file.write("Actual Source: " + dl.inidices_to_string(source[bi], source_vocab) + '\n *******')
                except:
                    pass
                
    log_file.close()
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='sample from top k predictions')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.9,
                        help='hyperpara-Adam')
    parser.add_argument('--datapath',
                        type=str,
                        default="Data/movielen_20/movielen_20.csv",
                        help='data path')
    parser.add_argument('--save_dir',
                        type=str,
                        default="Models/ml20",
                        help='save dir path')
    parser.add_argument('--eval_iter',
                        type=int,
                        default=1000,
                        help='sample generator output evry x steps')
    parser.add_argument('--early_stop',
                        type=int,
                        default=10,
                        help='after x step early stop')
    parser.add_argument('--step',
                        type=int,
                        default=400000,
                        help='trainging step')
    parser.add_argument('--tt_percentage',
                        type=float,
                        default=0.2,
                        help='0.2 means 80% training 20% testing')
    parser.add_argument('--data_ratio',
                        type=float,
                        default=1,
                        help='real trainging data')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--L2',
                        type=float,
                        default=0,
                        help='L2 regularization')
    parser.add_argument('--dilation_count',
                        type=int,
                        default=16,
                        help='dilation count number')
    parser.add_argument('--method',
                        type=str,
                        default="from_scratch",
                        help='from_scratch, StackR, stackC, stackA')
    parser.add_argument('--load_model',
                        type=ast.literal_eval,
                        default=False,
                        help='whether loading pretrain model')
    parser.add_argument('--model_path',
                        type=str,
                        default="Models/",
                        help='load model path')
    parser.add_argument('--padid', type=int, default=0, help='pad id')

    parser.add_argument('--masked_lm_prob',
                        type=float,
                        default=0.2,
                        help='0.2 means 20% items are masked')
    parser.add_argument('--max_predictions_per_seq',
                        type=int,
                        default=50,
                        help='maximum number of masked tokens')
    parser.add_argument(
        '--max_position',
        type=int,
        default=100,
        help=
        'maximum number of for positional embedding, it has to be larger than the sequence lens'
    )
    parser.add_argument(
        '--has_positionalembedding',
        type=bool,
        default=False,
        help='whether contains positional embedding before performing cnnn')

    args = parser.parse_args()

    print(args)

    dl = data_loader.Data_Loader({
        'dir_name': args.datapath,
        'padid': args.padid
    })
    all_samples = dl.item
    print(all_samples.shape)
    items = dl.item_dict
    itemlist = items.values()
    item_size = len(items)
    print("len(items)", item_size)

    max_predictions_per_seq = args.max_predictions_per_seq
    masked_lm_prob = args.masked_lm_prob

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(all_samples)))
    all_samples = all_samples[shuffle_indices]

    # Split train/test set
    dev_sample_index = -1 * int(args.tt_percentage * float(len(all_samples)))
    train_set, valid_set = all_samples[:dev_sample_index], all_samples[
        dev_sample_index:]

    random.seed(10)
    ratio = args.data_ratio
    train_set_len = len(train_set)
    train_index_set = set(list(range(train_set_len)))

    if ratio == 0.2:
        train_ratio = int(ratio * float(train_set_len))
        real_train_index_set = random.sample(list(train_index_set),
                                             train_ratio)
        real_train_set = train_set[real_train_index_set]
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.4:
        last_ratio = ratio - 0.2
        last_train_ratio = int(last_ratio * float(train_set_len))
        last_train_index_set = random.sample(list(train_index_set),
                                             last_train_ratio)
        last_train_set = train_set[last_train_index_set]

        remain_train_index_set = train_index_set - set(last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 4.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((last_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.6:
        last_last_ratio = ratio - 0.2 - 0.2
        last_last_train_ratio = int(last_last_ratio * float(train_set_len))
        last_last_train_index_set = random.sample(list(train_index_set),
                                                  last_last_train_ratio)
        last_last_train_set = train_set[last_last_train_index_set]

        remain_train_index_set = train_index_set - set(
            last_last_train_index_set)
        remain_len = len(remain_train_index_set)
        last_train_index_set = random.sample(
            list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len)))
        last_train_set = train_set[last_train_index_set]
        real_train_set = np.concatenate((last_last_train_set, last_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 3.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 0.8:
        last_last_ratio = ratio - 0.2 - 0.2 - 0.2
        last_last_train_ratio = int(last_last_ratio * float(train_set_len))
        last_last_train_index_set = random.sample(list(train_index_set),
                                                  last_last_train_ratio)
        last_last_train_set = train_set[last_last_train_index_set]

        remain_train_index_set = train_index_set - set(
            last_last_train_index_set)
        remain_len = len(remain_train_index_set)
        last_train_index_set = random.sample(
            list(remain_train_index_set), int(1.0 / 4.0 * float(remain_len)))
        last_train_set = train_set[last_train_index_set]
        real_train_set = np.concatenate((last_last_train_set, last_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            last_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 3.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]
        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)

        remain_train_index_set = remain_train_index_set - set(
            new_train_index_set)
        remain_len = len(remain_train_index_set)
        new_train_index_set = random.sample(list(remain_train_index_set),
                                            int(1.0 / 2.0 * float(remain_len)))
        new_train_set = train_set[new_train_index_set]

        real_train_set = np.concatenate((real_train_set, new_train_set),
                                        axis=0)
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))
    elif ratio == 1:
        train_set = np.array(train_set)
        print("real train len", len(train_set))
    else:
        train_ratio = int(ratio * float(train_set_len))
        real_train_index_set = random.sample(list(train_index_set),
                                             train_ratio)
        real_train_set = train_set[real_train_index_set]
        train_set = np.array(real_train_set)
        print("real train len", len(train_set))

    model_para = {
        'item_size': len(items),
        'dilated_channels': 64,
        'dilations': [1, 4] * args.dilation_count,
        'step': args.step,
        'kernel_size': 3,
        'learning_rate': args.learning_rate,
        'L2': args.L2,
        'batch_size': 1024,
        'load_model': args.load_model,
        'model_path': args.model_path,
        'method': args.method,
        'max_position': args.max_position,
        # maximum number of for positional embedding, it has to be larger than the sequence lens
        'has_positionalembedding': args.has_positionalembedding
    }

    print(model_para)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    itemrec = generator_deep.NextItNet_Decoder(model_para)
    itemrec.train_graph()
    optimizer = tf.train.AdamOptimizer(model_para['learning_rate'],
                                       beta1=args.beta1).minimize(itemrec.loss)
    itemrec.predict_graph(reuse=True)

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    saver = tf.train.Saver(max_to_keep=1)

    init = tf.global_variables_initializer()
    sess.run(init)

    # writer=tf.summary.FileWriter('./stack_graph',sess.graph)

    numIters = 1
    max_mrr = 0
    break_stick = 0
    early_stop = 0
    while (1):
        if break_stick == 1:
            break

        batch_no = 0
        batch_size = model_para['batch_size']

        while (batch_no + 1) * batch_size < train_set.shape[0]:

            start = time.time()

            item_batch = train_set[batch_no * batch_size:(batch_no + 1) *
                                   batch_size, :]

            output_tokens_batch, maskedpositions_batch, maskedlabels_batch, masked_lm_weights_batch = create_masked_lm_predictions_frombatch(
                item_batch,
                masked_lm_prob,
                max_predictions_per_seq,
                items=itemlist,
                rng=None,
                item_size=item_size)

            _, loss = sess.run(
                [optimizer, itemrec.loss],
                feed_dict={
                    itemrec.itemseq_output: item_batch[:,
                                                       1:],  # 2 3 4 5 6 7 8 9
                    itemrec.itemseq_input:
                    output_tokens_batch,  # 1 2 0 4 5 0 7 8 9
                    itemrec.masked_position: maskedpositions_batch,  # [1 4]
                    itemrec.masked_items: maskedlabels_batch,  # [3,6]
                    itemrec.label_weights:
                    masked_lm_weights_batch  # [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0] #useless
                })
            end = time.time()
            if numIters % args.eval_iter == 0:
                print(
                    "-------------------------------------------------------train"
                )
                print("LOSS: {}\tBATCH_NO: {}\t STEP:{}\t total_batches:{}".
                      format(loss, batch_no, numIters,
                             train_set.shape[0] / batch_size))
                print("TIME FOR BATCH", end - start)
                print("TIME FOR EPOCH (mins)",
                      (end - start) * (train_set.shape[0] / batch_size) / 60.0)

            batch_no += 1

            if numIters % args.eval_iter == 0:
                print(
                    "-------------------------------------------------------test"
                )
                batch_no_test = 0
                batch_size_test = batch_size * 1
                curr_preds_5 = []
                rec_preds_5 = []
                ndcg_preds_5 = []
                curr_preds_10 = []
                rec_preds_10 = []
                ndcg_preds_10 = []
                while (batch_no_test +
                       1) * batch_size_test < valid_set.shape[0]:
                    item_batch = valid_set[batch_no_test *
                                           batch_size_test:(batch_no_test +
                                                            1) *
                                           batch_size_test, :]
                    [probs_10, probs_5] = sess.run(
                        [itemrec.top_10, itemrec.top_5],
                        feed_dict={itemrec.itemseq_input: item_batch[:, 0:-1]})
                    # print(probs_10[1].shape) #(256,1,10)
                    for bi in range(batch_size_test):
                        pred_items_10 = probs_10[1][bi][-1]
                        pred_items_5 = probs_5[1][bi][-1]

                        true_item = item_batch[bi][-1]
                        predictmap_5 = {
                            ch: i
                            for i, ch in enumerate(pred_items_5)
                        }
                        pred_items_10 = {
                            ch: i
                            for i, ch in enumerate(pred_items_10)
                        }

                        rank_5 = predictmap_5.get(true_item)
                        rank_10 = pred_items_10.get(true_item)
                        if rank_5 == None:
                            curr_preds_5.append(0.0)
                            rec_preds_5.append(0.0)
                            ndcg_preds_5.append(0.0)
                        else:
                            MRR_5 = 1.0 / (rank_5 + 1)
                            Rec_5 = 1.0
                            ndcg_5 = 1.0 / math.log(rank_5 + 2, 2)
                            curr_preds_5.append(MRR_5)
                            rec_preds_5.append(Rec_5)
                            ndcg_preds_5.append(ndcg_5)
                        if rank_10 == None:
                            curr_preds_10.append(0.0)
                            rec_preds_10.append(0.0)
                            ndcg_preds_10.append(0.0)
                        else:
                            MRR_10 = 1.0 / (rank_10 + 1)
                            Rec_10 = 1.0
                            ndcg_10 = 1.0 / math.log(rank_10 + 2, 2)
                            curr_preds_10.append(MRR_10)
                            rec_preds_10.append(Rec_10)
                            ndcg_preds_10.append(ndcg_10)

                    batch_no_test += 1

                mrr = sum(curr_preds_5) / float(len(curr_preds_5))
                mrr_10 = sum(curr_preds_10) / float(len(curr_preds_10))
                hit = sum(rec_preds_5) / float(len(rec_preds_5))
                hit_10 = sum(rec_preds_10) / float(len(rec_preds_10))
                ndcg = sum(ndcg_preds_5) / float(len(ndcg_preds_5))
                ndcg_10 = sum(ndcg_preds_10) / float(len(ndcg_preds_10))

                if mrr > max_mrr:
                    max_mrr = mrr

                    print("Save model!  mrr_5:", mrr)
                    print("Save model!  mrr_10:", mrr_10)
                    print("Save model!  hit_5:", hit)
                    print("Save model!  hit_10:", hit_10)
                    print("Save model!  ndcg_5:", ndcg)
                    print("Save model!  ndcg_10:", ndcg_10)
                    early_stop = 0
                    saver.save(
                        sess, args.save_dir + "/{}_{}_{}_{}.ckpt".format(
                            args.dilation_count, args.learning_rate,
                            args.data_ratio, args.step))
                else:
                    print("mrr_5:", mrr)
                    print("mrr_10:", mrr_10)
                    print("hit_5:", hit)
                    print("hit_10:", hit_10)
                    print("ndcg_5:", ndcg)
                    print("ndcg_10:", ndcg_10)
                    early_stop += 1

            if numIters >= model_para['step']:
                break_stick = 1
                break
            if early_stop >= args.early_stop:
                break_stick = 1
                print("early stop!")
                break

            numIters += 1
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=16,
                        help='Learning Rate')
    parser.add_argument('--bucket_quant',
                        type=int,
                        default=25,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument(
        '--source_file',
        type=str,
        default='Data/MachineTranslation/news-commentary-v11.de-en.de',
        help='Source File')
    parser.add_argument(
        '--target_file',
        type=str,
        default='Data/MachineTranslation/news-commentary-v11.de-en.en',
        help='Target File')

    args = parser.parse_args()

    data_loader_options = {
        'model_type': 'translation',
        'source_file': args.source_file,
        'target_file': args.target_file,
        'bucket_quant': args.bucket_quant,
    }

    dl = data_loader.Data_Loader(data_loader_options)
    buckets, source_vocab, target_vocab, frequent_keys = dl.load_translation_data(
    )

    config = model_config.translator_config

    model_options = {
        'n_source_quant': len(source_vocab),
        'n_target_quant': len(target_vocab),
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'encoder_dilations': config['encoder_dilations'],
        'sample_size': 10,
        'decoder_filter_width': config['decoder_filter_width'],
        'encoder_filter_width': config['encoder_filter_width'],
        'batch_size': args.batch_size,
        'source_mask_chars': [source_vocab['padding']],
        'target_mask_chars': [target_vocab['padding']]
    }

    last_saved_model_path = None
    if args.resume_model:
        last_saved_model_path = args.resume_model

    print "Number Of Buckets", len(buckets)

    for i in range(1, args.max_epochs):
        cnt = 0
        for _, key in frequent_keys:
            cnt += 1

            print "KEY", cnt, key
            if key > 400:
                continue

            if len(buckets[key]) < args.batch_size:
                print "BUCKET TOO SMALL", key
                continue

            sess = tf.InteractiveSession()

            batch_no = 0
            batch_size = args.batch_size

            byte_net = model.Byte_net_model(model_options)
            bn_tensors = byte_net.build_translation_model(sample_size=key)

            adam = tf.train.AdamOptimizer(args.learning_rate, beta1=args.beta1)

            optim = adam.minimize(bn_tensors['loss'],
                                  var_list=bn_tensors['variables'])

            train_writer = tf.train.SummaryWriter('logs/', sess.graph)
            tf.initialize_all_variables().run()

            saver = tf.train.Saver()
            if last_saved_model_path:
                saver.restore(sess, last_saved_model_path)

            while (batch_no + 1) * batch_size < len(buckets[key]):
                source, target = dl.get_batch_from_pairs(
                    buckets[key][batch_no * batch_size:(batch_no + 1) *
                                 batch_size])

                _, loss, prediction, summary, source_gradient, target_gradient = sess.run(
                    [
                        optim, bn_tensors['loss'], bn_tensors['prediction'],
                        bn_tensors['merged_summary'],
                        bn_tensors['source_gradient'],
                        bn_tensors['target_gradient']
                    ],
                    feed_dict={
                        bn_tensors['source_sentence']: source,
                        bn_tensors['target_sentence']: target,
                    })

                train_writer.add_summary(summary, batch_no * (cnt + 1))
                print "Loss", loss, batch_no, len(
                    buckets[key]) / batch_size, i, cnt, key

                print "******"
                print "Source ", dl.inidices_to_string(source[0], source_vocab)
                print "---------"
                print "Target ", dl.inidices_to_string(target[0], target_vocab)
                print "----------"
                print "Prediction ", dl.inidices_to_string(
                    prediction[0:key], target_vocab)
                print "******"

                batch_no += 1
                if batch_no % 1000 == 0:
                    save_path = saver.save(
                        sess, "Data/Models/model_translation_epoch_{}_{}.ckpt".
                        format(i, cnt))
                    last_saved_model_path = "Data/Models/model_translation_epoch_{}_{}.ckpt".format(
                        i, cnt)

            save_path = saver.save(
                sess, "Data/Models/model_translation_epoch_{}.ckpt".format(i))
            last_saved_model_path = "Data/Models/model_translation_epoch_{}.ckpt".format(
                i)

            tf.reset_default_graph()
            sess.close()
Ejemplo n.º 12
0
    def __init__(self, task):
        bucket_quant = 10

        current_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
        translator_root_path = join(current_path, 'pretrained_models')

        model_path = {
                'en-de-news': join(translator_root_path, 'en-de-news', 'model_epoch_4_145000.ckpt'),
                'en-fr-news': join(translator_root_path, 'en-fr-news', 'model_epoch_4_90000.ckpt'),
                'en-cs-news': join(translator_root_path, 'en-cs-news', 'model_epoch_4_70000.ckpt'),
                'en-de-europarl': join(translator_root_path, 'en-de-europarl', 'model_epoch_1_440000.ckpt')
                }

        data_root_path = join(current_path, 'Data', 'translator_training_data')
        source_file = {
                'en-de-europarl': join(data_root_path, 'europarl-v7.de-en.en'),
                'en-de-news': join(data_root_path, 'news-commentary-v12.de-en.en'),
                'en-fr-news': join(data_root_path, 'news-commentary-v9.fr-en.en'),
                'en-cs-news': join(data_root_path, 'news-commentary-v9.cs-en.en')
                }

        target_file = {
                'en-de-europarl': join(data_root_path, 'europarl-v7.de-en.de'),
                'en-de-news': join(data_root_path, 'news-commentary-v12.de-en.de'),
                'en-fr-news': join(data_root_path, 'news-commentary-v9.fr-en.fr'),
                'en-cs-news': join(data_root_path, 'news-commentary-v9.cs-en.cs')
                }

        data_loader_options = {
                'model_type': 'translation',
                'source_file': source_file[task],
                'target_file': target_file[task],
                'bucket_quant': bucket_quant
                }


        self.dl = data_loader.Data_Loader(data_loader_options)
        self.buckets, self.source_vocab, self.target_vocab = self.dl.load_translation_data()

        config = model_config.translator_config

        model_options = {
            'source_vocab_size' : len(self.source_vocab),
            'target_vocab_size' : len(self.target_vocab),
            'residual_channels' : config['residual_channels'],
            'decoder_dilations' : config['decoder_dilations'],
            'encoder_dilations' : config['encoder_dilations'],
            'decoder_filter_width' : config['decoder_filter_width'],
            'encoder_filter_width' : config['encoder_filter_width'],
            'layer_norm': config['layer_norm']
        }

        self.translator_model = translator.ByteNet_Translator( model_options )
        self.translator_model.build_model()
        self.translator_model.build_translator(reuse=True)

        self.sess = tf.Session()
        saver = tf.train.Saver()

        if model_path[task]:
            saver.restore(self.sess, model_path[task])

        self.features = {}
        for layer_index in range(15):
            dilation = int(math.pow(2, layer_index % 5))

            layer_tensor_name = "bytenet_encoder_layer_%d_%d/add:0" % (layer_index, dilation)
            layer_name = "bytenet_encoder_layer_%d_%d" % (layer_index, dilation)
            self.features[layer_name] = tf.get_default_graph().get_tensor_by_name(layer_tensor_name)
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--sample_size',
                        type=int,
                        default=300,
                        help='Sampled output size')
    parser.add_argument('--top_k',
                        type=int,
                        default=5,
                        help='Sample from top k predictions')
    parser.add_argument('--model_path',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--text_dir',
                        type=str,
                        default='Data/generator_training_data',
                        help='Directory containing text files')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')
    parser.add_argument('--seed',
                        type=str,
                        default='All',
                        help='Seed for text generation')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )
    config = model_config.predictor_config

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.text_dir
    })
    _, vocab = dl.load_generator_data(config['sample_size'])

    model_options = {
        'vocab_size': len(vocab),
        'residual_channels': config['residual_channels'],
        'dilations': config['dilations'],
        'filter_width': config['filter_width'],
    }

    generator_model = generator.ByteNet_Generator(model_options)
    generator_model.build_generator()

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.model_path:
        saver.restore(sess, args.model_path)

    seed_sentence = np.array([dl.string_to_indices(args.seed, vocab)],
                             dtype='int32')

    for col in range(args.sample_size):
        [probs
         ] = sess.run([generator_model.g_probs],
                      feed_dict={generator_model.seed_sentence: seed_sentence})

        curr_preds = []
        for bi in range(probs.shape[0]):
            pred_word = utils.sample_top(probs[bi][-1], top_k=args.top_k)
            curr_preds.append(pred_word)

        seed_sentence = np.insert(seed_sentence,
                                  seed_sentence.shape[1],
                                  curr_preds,
                                  axis=1)
        print col, dl.inidices_to_string(seed_sentence[0], vocab)

        f = open('Data/generator_sample.txt', 'wb')
        f.write(dl.inidices_to_string(seed_sentence[0], vocab))
        f.close()