Ejemplo n.º 1
0
def train(ae_type, latent_dim=2, epochs=100, lr=1e-4, batch_size=1000):

    if ae_type == "AE" or ae_type == "DAE":
        model = AE(latent_dim)
    elif ae_type == "VAE":
        model = VAE(latent_dim)
    elif ae_type == "CVAE":
        model = CVAE(latent_dim)
    elif ae_type == "BetaVAE":
        model = BetaVAE(latent_dim)
    else:
        raise ValueError

    # load train and test data
    train_dataset, test_dataset = data.load_dataset(ae_type, batch_size=batch_size)
    # initialize Adam optimizer
    optimizer = tf.keras.optimizers.Adam(lr)

    for epoch in range(1, epochs + 1):
        last_loss = 0

        for train_x, train_y in train_dataset:
            gradients, loss = compute_gradients(model, train_x, train_y, ae_type)
            apply_gradients(optimizer, gradients, model.trainable_variables)
            last_loss = loss

        if epoch % 2 == 0:
            print('Epoch {}, Loss: {}'.format(epoch, last_loss))

    return model
Ejemplo n.º 2
0
def train_autoencoder(X_dir, Y_dir, batch_size, dim, X_channels, Y_channels,
                      log_dir, shuffle, **kwargs):
    # Dataset
    pairs_filename = load_dataset(X_dir, Y_dir)
    partition = partition_dataset(pairs_filename)
    # Generators
    training_generator = DataGenerator(partition['train'], batch_size, dim,
                                       X_channels, Y_channels, shuffle)
    validation_generator = DataGenerator(partition['validation'], batch_size,
                                         dim, X_channels, Y_channels, shuffle)
    # Design model
    input_img = Input(shape=(*dim, X_channels))
    encoder_img = encoder(n_features=8)
    decoder_lbl = decoder(n_output_features=Y_channels, n_features=8)
    latent_img = encoder_img(input_img)
    latent_lbl = latent_img  # TODO Put res_net here for image to label translation
    restored_lbl = decoder_lbl(latent_lbl)
    img2lbl = Model(input_img, restored_lbl)
    img2lbl.compile(optimizer='adadelta', loss='mean_squared_error')
    # Print summary
    img2lbl.summary()
    print('Model contains a total of %d trainable layers.\n' %
          len(img2lbl.trainable_weights))
    # Train model
    tbi_callback = TensorBoardImage(log_dir=log_dir,
                                    validation_data=validation_generator)
    tb_callback = TensorBoard(log_dir=log_dir)
    img2lbl.fit_generator(generator=training_generator,
                          validation_data=validation_generator,
                          epochs=50,
                          callbacks=[tb_callback, tbi_callback],
                          use_multiprocessing=True,
                          workers=2)
Ejemplo n.º 3
0
def train_model(max_items=-1):
    """train the classifier with up to the given amount of items. Returns the feature extractor and the trained classifier"""
    extractor, (X_train,
                Y_train), (X_test,
                           Y_test) = data.load_dataset("games.json",
                                                       max_items=max_items)
    classifier = classifiers.RandomForest()
    classifier.train(X_train, Y_train)
    test_model(classifier, X_test, Y_test)
    return extractor, classifier
Ejemplo n.º 4
0
def evaluate_file(sess,
                  args,
                  eval_model,
                  vocab,
                  files,
                  batch_size=100,
                  print_logs=False):
    eval_iterator = load_dataset(files,
                                 vocab,
                                 constants.EVAL,
                                 batch_size=batch_size,
                                 min_seq_len=args.min_seq_len,
                                 max_seq_len=args.max_seq_len,
                                 has_source=True)
    eval_next_op = eval_iterator.get_next()

    sess.run([eval_iterator.initializer])
    n_batch = 0
    t0 = time.time()
    losses = []
    while True:
        try:
            data = sess.run(eval_next_op)  # get real data!!
            feed_dict = {
                eval_model.x: data['ids'],
                eval_model.y: data['senti'],
                eval_model.sequence_length: data['length']
            }

            ops = [eval_model.loss]
            res = sess.run(ops, feed_dict=feed_dict)
            losses.append(res[0])
            n_batch += 1
        except tf.errors.OutOfRangeError as e:  # next epoch
            if print_logs:
                print("Test---Total N batch:{}\tCost time:{}".format(
                    n_batch,
                    time.time() - t0))
            break
    del eval_iterator
    del eval_next_op

    return np.mean(losses)
def run_experiment(epochs, model_name, training_type, configs):
    """ Runs the basic experiment"""

    print(epochs, "CONFIGS: ", configs)

    # set seed for reproducibility.
    seed = configs.seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    # gpu training specific seed settings.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # load data
    loaders = load_dataset(configs)

    # load model
    model = load_model(model_name, training_type, configs)

    # loss
    criterion = nn.CrossEntropyLoss().cuda()

    # optimizer
    # optimizer = optim.SGD(model.parameters(), configs.lr,
    #                       momentum=configs.momentum,
    #                       weight_decay=configs.weight_decay)
    optimizer = optim.Adam(model.parameters(), configs.lr)

    # get tracking dictionaries
    model_weights, layer_dict = setup_delta_tracking(model)

    # train model
    rmae_delta_dict, train_acc_arr, test_acc_arr = training(
        epochs, loaders, model, optimizer, criterion, model_weights,
        layer_dict, configs)

    return rmae_delta_dict, train_acc_arr, test_acc_arr
Ejemplo n.º 6
0
    with tf.device(
            "/cpu:0"):  # Input pipeline should always be placed on the CPU.
        print("Use x'->y to update model f(x->y)")
        train_iterator = load_paired_dataset(args.tsf_train_data[B],
                                             args.train_data[B],
                                             src_vocab,
                                             tgt_vocab,
                                             batch_size=args.batch_size)
        dev_iterator = load_paired_dataset(args.tsf_dev_data[B],
                                           args.dev_data[B],
                                           src_vocab,
                                           tgt_vocab,
                                           batch_size=args.batch_size)

        src_test_iterator = load_dataset(args.test_data[A],
                                         src_vocab,
                                         mode=constants.INFER)

        train_next_op = train_iterator.get_next()
        dev_next_op = dev_iterator.get_next()
        src_test_next_op = src_test_iterator.get_next()

    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(config=tf_config)

    # === Train
    if args.mode == "train":
        # Prepare for model saver
Ejemplo n.º 7
0
def main():
    # === Load arguments
    args = load_dual_arguments()
    dump_args_to_yaml(args, args.final_model_save_dir)

    cls_args = load_args_from_yaml(args.cls_model_save_dir)
    nmt_args = load_args_from_yaml(os.path.join(args.nmt_model_save_dir,
                                                '0-1'))
    nmt_args.learning_rate = args.learning_rate  # a smaller learning rate for RL
    min_seq_len = min(int(max(re.findall("\d", cls_args.filter_sizes))),
                      args.min_seq_len)

    # === Load global vocab
    word2id, word2id_size = load_vocab_dict(args.global_vocab_file)
    global_vocab, global_vocab_size = load_vocab(args.global_vocab_file)
    print("Global_vocab_size: %s" % global_vocab_size)
    global_vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.global_vocab_file,
        vocab_size=global_vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)
    src_vocab = tgt_vocab = global_vocab
    src_vocab_size = tgt_vocab_size = global_vocab_size
    src_vocab_rev = tgt_vocab_rev = global_vocab_rev

    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.3
    sess = tf.Session(config=tf_config)

    # === Initial and build model
    cls = cls_create_model(sess,
                           cls_args,
                           global_vocab_size,
                           mode=constants.EVAL,
                           load_pretrained_model=True)

    nmts_train = []
    nmts_random_infer = []
    nmts_greedy_infer = []
    train_data_next = []
    dev_data_next = []
    test_data_next = []
    train_iterators = []
    test_iterators = []
    paired_train_iterators = []
    paired_train_data_next = []
    final_model_save_paths = []

    # === Define nmt model
    for A, B in [(0, 1), (1, 0)]:
        with tf.device("/cpu:0"
                       ):  # Input pipeline should always be placed on the CPU.
            src_train_iterator = load_dataset(args.train_data[A],
                                              src_vocab,
                                              mode=constants.TRAIN,
                                              batch_size=args.batch_size,
                                              min_seq_len=min_seq_len)
            src_dev_iterator = load_dataset(args.dev_data[A],
                                            src_vocab,
                                            mode=constants.EVAL,
                                            batch_size=500)
            src_test_iterator = load_dataset(args.test_data[A],
                                             src_vocab,
                                             mode=constants.EVAL,
                                             batch_size=500)
            # Use (X', Y) to produce pseudo parallel data
            paired_src_train_iterator = load_paired_dataset(
                args.tsf_train_data[B],
                args.train_data[B],
                src_vocab,
                tgt_vocab,
                batch_size=args.batch_size,
                min_seq_len=min_seq_len)

            src_train_next_op = src_train_iterator.get_next(
            )  # To avoid frequent calls of `Iterator.get_next()`
            src_dev_next_op = src_dev_iterator.get_next()
            src_test_next_op = src_test_iterator.get_next()
            src_paired_train_next_op = paired_src_train_iterator.get_next()

            train_data_next.append(src_train_next_op)
            dev_data_next.append(src_dev_next_op)
            test_data_next.append(src_test_next_op)
            paired_train_data_next.append(src_paired_train_next_op)

            train_iterators.append(src_train_iterator)
            test_iterators.append(src_test_iterator)
            paired_train_iterators.append(paired_src_train_iterator)

        direction = "%s-%s" % (A, B)
        nmt_args.sampling_probability = 0.5

        # == Define train model
        nmt_train = nmt_create_model(sess,
                                     nmt_args,
                                     src_vocab_size,
                                     tgt_vocab_size,
                                     src_vocab_rev,
                                     tgt_vocab_rev,
                                     mode=constants.TRAIN,
                                     direction=direction,
                                     load_pretrained_model=True)

        # == Define inference model
        decode_type_before = nmt_args.decode_type

        nmt_args.decode_type = constants.RANDOM
        nmt_random_infer = nmt_create_model(sess,
                                            nmt_args,
                                            src_vocab_size,
                                            tgt_vocab_size,
                                            src_vocab_rev,
                                            tgt_vocab_rev,
                                            mode=constants.INFER,
                                            direction=direction,
                                            reuse=True)

        nmt_args.decode_type = constants.GREEDY
        nmt_greedy_infer = nmt_create_model(sess,
                                            nmt_args,
                                            src_vocab_size,
                                            tgt_vocab_size,
                                            src_vocab_rev,
                                            tgt_vocab_rev,
                                            mode=constants.INFER,
                                            direction=direction,
                                            reuse=True)

        nmt_args.decode_type = decode_type_before  # restore to previous setting

        nmts_train.append(nmt_train)
        nmts_random_infer.append(nmt_random_infer)
        nmts_greedy_infer.append(nmt_greedy_infer)

        # == Prepare for model saver
        print("Prepare for model saver")
        final_model_save_path = "%s/%s-%s/" % (args.final_model_save_dir, A, B)
        if not os.path.exists(final_model_save_path):
            os.makedirs(final_model_save_path)
        print("Model save path:", final_model_save_path)
        final_model_save_paths.append(final_model_save_path)

    # === Start train
    n_batch = -1
    global_step = -1
    A = 1
    B = 0
    G_scores = []

    for i in range(args.n_epoch):
        print("Epoch:%s" % i)
        sess.run([train_iterators[A].initializer])
        sess.run([train_iterators[B].initializer])
        sess.run([paired_train_iterators[A].initializer])
        sess.run([paired_train_iterators[B].initializer])

        while True:
            n_batch += 1
            global_step += 1
            if n_batch % args.eval_step == 0:
                print(
                    '===== Start (N_batch: %s, Steps: %s): Evaluate on test datasets ===== '
                    % (n_batch, global_step))
                _, dst_f_A = inference(nmts_greedy_infer[A],
                                       sess=sess,
                                       args=nmt_args,
                                       A=A,
                                       B=B,
                                       src_test_iterator=test_iterators[A],
                                       src_test_next=test_data_next[A],
                                       src_vocab_rev=src_vocab_rev,
                                       result_dir=args.final_tsf_result_dir,
                                       step=global_step)
                _, dst_f_B = inference(nmts_greedy_infer[B],
                                       sess=sess,
                                       args=nmt_args,
                                       A=B,
                                       B=A,
                                       src_test_iterator=test_iterators[B],
                                       src_test_next=test_data_next[B],
                                       src_vocab_rev=src_vocab_rev,
                                       result_dir=args.final_tsf_result_dir,
                                       step=global_step)
                t0 = time.time()
                # calculate accuracy score
                senti_acc = cls_evaluate_file(sess,
                                              cls_args,
                                              word2id,
                                              cls, [dst_f_A, dst_f_B],
                                              index_list=[B, A])
                # calculate bleu score
                bleu_score_A = bleu_evaluator.score(args.reference[A], dst_f_A)
                bleu_score_B = bleu_evaluator.score(args.reference[B], dst_f_B)
                bleu_score = (bleu_score_A + bleu_score_B) / 2

                G_score = np.sqrt(senti_acc * bleu_score)
                H_score = 2 / (1 / senti_acc + 1 / bleu_score)
                G_scores.append(G_score)
                print(
                    "%s-%s_Test(Batch:%d)\tSenti:%.3f\tBLEU(4ref):%.3f(A:%.3f+B:%.3f)"
                    "\tG-score:%.3f\tH-score:%.3f\tCost time:%.2f" %
                    (A, B, n_batch, senti_acc, bleu_score, bleu_score_A,
                     bleu_score_B, G_score, H_score, time.time() - t0))
                print(
                    '=====  End (N_batch: %s, Steps: %s): Evaluate on test datasets ====== '
                    % (n_batch, global_step))

            if n_batch % args.save_per_step == 0:
                print("=== Save model at dir:", final_model_save_paths[A],
                      final_model_save_paths[B])
                nmts_train[A].saver.save(sess,
                                         final_model_save_paths[A],
                                         global_step=global_step)
                nmts_train[B].saver.save(sess,
                                         final_model_save_paths[B],
                                         global_step=global_step)

            if n_batch % args.change_per_step == 0:
                A, B = B, A
                print(
                    "============= Change to train model {}-{} at {} steps =============="
                    .format(A, B, global_step))

            try:
                t0 = time.time()
                src = sess.run(train_data_next[A])  # get real data!!
                batch_size = np.shape(src["ids"])[0]
                decode_width = nmt_args.decode_width

                tile_src_ids = np.repeat(src["ids"], decode_width,
                                         axis=0)  # [batch_size*sample_size],
                tile_src_length = np.repeat(src['length'],
                                            decode_width,
                                            axis=0)
                tile_src_ids_in = np.repeat(src["ids_in"],
                                            decode_width,
                                            axis=0)
                tile_src_ids_out = np.repeat(src["ids_out"],
                                             decode_width,
                                             axis=0)
                tile_src_ids_in_out = np.repeat(src["ids_in_out"],
                                                decode_width,
                                                axis=0)

                random_predictions = sess.run(
                    nmts_random_infer[A].predictions,
                    feed_dict={
                        nmts_random_infer[A].input_ids: src['ids'],
                        nmts_random_infer[A].input_length: src['length']
                    })
                assert np.shape(
                    random_predictions["ids"])[1] == nmt_args.decode_width
                mid_ids_log_prob = np.reshape(random_predictions["log_probs"],
                                              -1)
                mid_ids, mid_ids_in, mid_ids_out, mid_ids_in_out, mid_ids_length = \
                    process_mid_ids(random_predictions["ids"], random_predictions["length"],
                                   min_seq_len, global_vocab_size)

                greedy_predictions = sess.run(
                    nmts_greedy_infer[A].predictions,
                    feed_dict={
                        nmts_greedy_infer[A].input_ids: src['ids'],
                        nmts_greedy_infer[A].input_length: src['length']
                    })
                assert np.shape(greedy_predictions["ids"])[1] == 1
                mid_ids_bs, mid_ids_in_bs, mid_ids_out_bs, mid_ids_in_out_bs, mid_ids_length_bs = \
                    process_mid_ids(greedy_predictions["ids"], greedy_predictions["length"],
                                   min_seq_len, global_vocab_size)

                # Get style reward from classifier
                cls_probs = sess.run(cls.probs,
                                     feed_dict={
                                         cls.x: mid_ids,
                                         cls.dropout: 1
                                     })
                y_hat = [p > 0.5 for p in cls_probs]  # 1 or 0
                cls_acu = [p == B for p in y_hat
                           ]  # accuracy: count the number of style B
                style_reward = np.array(cls_acu, dtype=np.float32)

                # Get content reward from backward reconstruction
                feed_dict = {
                    nmts_train[B].input_ids: mid_ids,
                    nmts_train[B].input_length: mid_ids_length,
                    nmts_train[B].target_ids_in: tile_src_ids_in,
                    nmts_train[B].target_ids_out: tile_src_ids_out,
                    nmts_train[B].target_length: tile_src_length
                }
                nmtB_loss = sess.run(
                    nmts_train[B].loss_per_sequence,
                    feed_dict=feed_dict)  # nmtB_loss = -log(prob)
                nmtB_reward = nmtB_loss * (
                    -1)  # reward = log(prob) ==> bigger is better

                # Get baseline reward from backward reconstruction
                feed_dict = {
                    nmts_train[B].input_ids: mid_ids_bs,
                    nmts_train[B].input_length: mid_ids_length_bs,
                    nmts_train[B].target_ids_in: src["ids_in"],
                    nmts_train[B].target_ids_out: src["ids_out"],
                    nmts_train[B].target_length: src["length"]
                }
                nmtB_loss_bs = sess.run(nmts_train[B].loss_per_sequence,
                                        feed_dict=feed_dict)
                nmtB_reward_bs = nmtB_loss_bs * (-1)  # nmt baseline reward

                def norm(x):
                    x = np.array(x)
                    x = (x - x.mean()) / (x.std() + safe_divide_constant)
                    # x = x - x.min()  # to make sure > 0
                    return x

                def sigmoid(x,
                            x_trans=0.0,
                            x_scale=1.0,
                            max_y=1,
                            do_norm=False):
                    value = max_y / (1 + np.exp(-(x - x_trans) * x_scale))
                    if do_norm:
                        value = norm(value)
                    return value

                def norm_nmt_reward(x, baseline=None, scale=False):
                    x = np.reshape(x, (batch_size, -1))  # x is in [-16, 0]
                    dim1 = np.shape(x)[1]

                    if baseline is not None:
                        x_baseline = baseline  # [batch_size]
                    else:
                        x_baseline = np.mean(x, axis=1)  # [batch_size]
                    x_baseline = np.repeat(x_baseline,
                                           dim1)  # [batch_size*dim1]
                    x_baseline = np.reshape(x_baseline, (batch_size, dim1))

                    x_norm = x - x_baseline

                    if scale:
                        # x_norm = sigmoid(x_norm, x_scale=0.5)  # x_norm: [-12, 12] => [0, 1]
                        x_norm = sigmoid(
                            x_norm
                        )  # Sharper normalization, x_norm: [-6, 6] => [0, 1]
                    return x_norm.reshape(-1)

                if args.use_baseline:
                    content_reward = norm_nmt_reward(nmtB_reward,
                                                     baseline=nmtB_reward_bs,
                                                     scale=True)
                else:
                    content_reward = norm_nmt_reward(nmtB_reward, scale=True)

                # Calculate reward
                style_reward += safe_divide_constant
                content_reward += safe_divide_constant
                reward = (1 + 0.25) * style_reward * content_reward / (
                    style_reward + 0.25 * content_reward)
                if args.normalize_reward:
                    reward = norm(reward)

                # == Update nmtA via policy gradient training
                feed_dict = {
                    nmts_train[A].input_ids: tile_src_ids,
                    nmts_train[A].input_length: tile_src_length,
                    nmts_train[A].target_ids_in: mid_ids_in,
                    nmts_train[A].target_ids_out: mid_ids_out,
                    nmts_train[A].target_length: mid_ids_length,
                    nmts_train[A].reward: reward
                }
                ops = [
                    nmts_train[A].lr_loss, nmts_train[A].loss,
                    nmts_train[A].loss_per_sequence, nmts_train[A].retrain_op
                ]
                nmtA_loss_final, nmtA_loss_, loss_per_sequence_, _ = sess.run(
                    ops, feed_dict=feed_dict)

                # == Update nmtA with pseudo data
                if args.MLE_anneal:
                    gap = min(
                        args.anneal_max_gap,
                        int(args.anneal_initial_gap *
                            np.power(args.anneal_rate,
                                     global_step / args.anneal_steps)))
                else:
                    gap = args.anneal_initial_gap

                if n_batch % gap == 0:
                    # Update nmtA using original pseudo data (used as pre-training)
                    # This is not a ideal way since the quality of the pseudo-parallel data is not acceptable for
                    # the later iterations of training.
                    # We highly recommend you adopt back translation to generate the pseudo-parallel data on-the-fly
                    if "pseudo" in args.teacher_forcing:
                        data = sess.run(
                            paired_train_data_next[A])  # get real data!!
                        feed_dict = {
                            nmts_train[A].input_ids: data["ids"],
                            nmts_train[A].input_length: data["length"],
                            nmts_train[A].target_ids_in: data["trans_ids_in"],
                            nmts_train[A].target_ids_out:
                            data["trans_ids_out"],
                            nmts_train[A].target_length: data["trans_length"],
                        }
                        nmtA_pse_loss_, _ = sess.run(
                            [nmts_train[A].loss, nmts_train[A].train_op],
                            feed_dict=feed_dict)

                    # Update nmtB using pseudo data generated via back_translation (on-the-fly)
                    if "back_trans" in args.teacher_forcing:
                        feed_dict = {
                            nmts_train[B].input_ids: mid_ids_bs,
                            nmts_train[B].input_length: mid_ids_length_bs,
                            nmts_train[B].target_ids_in: src["ids_in"],
                            nmts_train[B].target_ids_out: src["ids_out"],
                            nmts_train[B].target_length: src["length"],
                        }
                        nmtB_loss_, _ = sess.run(
                            [nmts_train[B].loss, nmts_train[B].train_op],
                            feed_dict=feed_dict)

            except tf.errors.OutOfRangeError as e:  # next epoch
                print("===== DualTrain: Total N batch:{}\tCost time:{} =====".
                      format(n_batch,
                             time.time() - t0))
                n_batch = -1
                break
Ejemplo n.º 8
0
global_data = np.load(GLOBAL_DATA_FILE_NAME).astype(np.float32)

# graph loading
max_neighborhood_size = 0
rag_file = open(RAG_FILE_NAME, 'rb')
rag = pickle.load(rag_file)
rag_file.close()
nodes = rag.nodes()
for id in nodes:
    max_neighborhood_size = max(len(list(rag.neighbors(id))),
                                max_neighborhood_size)
fold = int(sys.argv[1])

print("Fold numero: ", fold)
x_train, x_validation, x_test, y_train, y_validation, y_test, id_train, id_validation, id_test = load_dataset(
    str(fold))

model = STARCANE(units=512, dropout_rate=0.4, n_classes=N_CLASSES)

train_dims = get_neighborhood_sizes(id_train, rag)
valid_dims = get_neighborhood_sizes(id_validation, rag)
test_dims = get_neighborhood_sizes(id_test, rag)

loss_object = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

ckpt = tf.train.Checkpoint(step=tf.Variable(1),
                           optimizer=optimizer,
                           model=model)
manager = tf.train.CheckpointManager(ckpt,
                                     OUTPUT_FOLDER + "_" + str(fold),
Ejemplo n.º 9
0
    # Enable CuDNN optimization
    torch.backends.cudnn.benchmark=True
# Handling cuda
args.cuda = not args.device == 'cpu' and torch.cuda.is_available()
args.device = torch.device(args.device if torch.cuda.is_available() else 'cpu')
print('Optimization will be on ' + str(args.device) + '.')

"""
###################
Basic definitions
################### 
"""
print('[Loading dataset]')
ref_split = args.path + '/reference_split_' + args.dataset+ "_" +args.data + '.npz'
if (args.train_type == 'random' or (not os.path.exists(ref_split))):
    train_loader, valid_loader, test_loader, args = load_dataset(args)
    # Take fixed batch
    fixed_data, fixed_params, fixed_meta, fixed_audio = next(iter(test_loader))
    fixed_data, fixed_params, fixed_meta, fixed_audio = fixed_data.to(args.device), fixed_params.to(args.device), fixed_meta, fixed_audio
    fixed_batch = (fixed_data, fixed_params, fixed_meta, fixed_audio)
    if (args.train_type == 'fixed'):
        np.savez(ref_split, [train_loader, valid_loader, test_loader])
else:
    data = np.load(ref_split)['arr_0']
    train_loader, valid_loader, test_loader = data[0], data[1], data[2]
    fixed_data, fixed_params, fixed_meta, fixed_audio = next(iter(test_loader))
    fixed_data, fixed_params, fixed_meta, fixed_audio = fixed_data.to(args.device), fixed_params.to(args.device), fixed_meta, fixed_audio
    fixed_batch = (fixed_data, fixed_params, fixed_meta, fixed_audio)
    args.output_size = train_loader.dataset.output_size
    args.input_size = train_loader.dataset.input_size
Ejemplo n.º 10
0
    return pred


if __name__ == "__main__":

    # config
    blend_list = [['20200812-125739_bert.csv', 0.35],
                  ['20200814-131828_bert.csv', 0.05],
                  ['20200813-210634_nn_word2vec.csv', 0.2],
                  ['20200826-205330_nn_glove.csv', 0.05],
                  ['20200826-202548_nn_word2vec.csv', 0.05],
                  ['20200810-203854_lgb.csv', 0.25],
                  ['20200825-211210_lgb_tfidf.csv', 0.05]]

    # prepare submit
    _, test_df, _ = load_dataset()
    submit = pd.DataFrame([])
    submit['id'] = test_df['id']
    submit[1] = 0
    submit[2] = 0
    submit[3] = 0
    submit[4] = 0

    # combine
    for filename, weight in blend_list:
        filepath = os.path.join(SUBMITS_DIR, filename)
        sub = pd.read_csv(filepath, names=('id', 'pred'))
        sub = add_onehot(sub)
        for i in range(1, 5):
            submit[i] += sub[i] * weight
Ejemplo n.º 11
0
    with tf.device(
            "/cpu:0"):  # Input pipeline should always be place on the CPU.

        print("args.pseudo_data:", args.pseudo_data)

        if args.mode == "train":
            train_iterator = load_paired_dataset(args.pseudo_data,
                                                 vocab,
                                                 batch_size=args.batch_size,
                                                 min_seq_len=args.min_seq_len,
                                                 max_seq_len=args.max_seq_len)
            train_next_op = train_iterator.get_next()
        else:
            src_test_iterator = load_dataset(args.test_data,
                                             vocab,
                                             mode=constants.INFER,
                                             min_seq_len=args.min_seq_len,
                                             max_seq_len=args.max_seq_len)
            src_test_next_op = src_test_iterator.get_next()

    # Step 2: create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(
        config=tf_config
    )  # limit gpu memory; don"t pre-allocate memory; allocate as-needed

    # Step 3: train model
    if args.mode == "train":
        # Prepare for model saver
Ejemplo n.º 12
0
        build_vocab_from_file(args.train_data, args.vocab_file)
    vocab, vocab_size = load_vocab(args.vocab_file)
    print('Vocabulary size:%s' % vocab_size)

    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)

    with tf.device(
            "/cpu:0"):  # Input pipeline should always be place on the CPU.

        if args.mode == constants.TRAIN:
            train_data_iterator = load_dataset(args.train_data,
                                               vocab,
                                               constants.TRAIN,
                                               batch_size=args.batch_size,
                                               min_seq_len=args.min_seq_len,
                                               max_seq_len=args.max_seq_len)
            train_data_next_op = train_data_iterator.get_next()

            dev_data_iterator = load_dataset(args.dev_data,
                                             vocab,
                                             constants.EVAL,
                                             batch_size=100,
                                             min_seq_len=args.min_seq_len,
                                             max_seq_len=args.max_seq_len)
            dev_data_next_op = dev_data_iterator.get_next()

        test_data_iterator = load_dataset(args.test_data,
                                          vocab,
                                          constants.TEST,
def main():
    args = load_cycle_arguments()
    dump_args_to_yaml(args, args.final_model_save_dir)
    print(args)

    reg_args = load_args_from_yaml(args.reg_model_save_dir)
    s2ss_args = load_args_from_yaml(args.s2ss_model_save_dir)
    # s2ss_args.seq2seq_model_save_dir = args.seq2seq_model_save_dir
    s2ss_args.RL_learning_rate = args.RL_learning_rate  # a smaller learning_rate for RL
    s2ss_args.MLE_learning_rate = args.MLE_learning_rate  # a smaller learning_rate for MLE
    s2ss_args.batch_size = args.batch_size  # a bigger batch_size for RL
    min_seq_len = args.min_seq_len
    max_seq_len = args.max_seq_len

    # === Load global vocab
    vocab, vocab_size = load_vocab(args.vocab_file)
    print("Vocabulary size: %s" % vocab_size)
    vocab_rev = tf.contrib.lookup.index_to_string_table_from_file(
        args.vocab_file,  # target vocabulary file(each lines has a word)
        vocab_size=vocab_size - constants.NUM_OOV_BUCKETS,
        default_value=constants.UNKNOWN_TOKEN)

    bleu_evaluator = BLEUEvaluator()

    # === Create session
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    tf_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(
        config=tf_config
    )  # limit gpu memory; don't pre-allocate memory; allocate as-needed

    # === Load dataset
    with tf.device(
            "/cpu:0"):  # Input pipeline should always be place on the CPU.
        train_data_iterator = load_dataset(args.train_data,
                                           vocab,
                                           mode=constants.TRAIN,
                                           batch_size=args.batch_size,
                                           min_seq_len=min_seq_len,
                                           max_seq_len=max_seq_len)
        dev_data_iterator = load_dataset(args.dev_data,
                                         vocab,
                                         mode=constants.EVAL,
                                         batch_size=100,
                                         min_seq_len=min_seq_len,
                                         max_seq_len=max_seq_len)
        test_data_iterator = load_dataset(args.test_data,
                                          vocab,
                                          mode=constants.TEST,
                                          batch_size=100,
                                          min_seq_len=min_seq_len,
                                          max_seq_len=max_seq_len)
        paired_train_data_iterator = load_paired_dataset(
            args.pseudo_data,
            vocab,
            batch_size=args.batch_size,
            min_seq_len=min_seq_len,
            max_seq_len=max_seq_len)

        train_data_next = train_data_iterator.get_next(
        )  # to avoid high number of `Iterator.get_next()` calls
        dev_data_next = dev_data_iterator.get_next()
        test_data_next = test_data_iterator.get_next()
        paired_train_data_next = paired_train_data_iterator.get_next()

    # === Initialize and build Seq2SentiSeq model
    load_model = False if args.no_pretrain else True
    s2ss_train = s2ss_create_model(sess,
                                   s2ss_args,
                                   constants.TRAIN,
                                   vocab_size,
                                   load_pretrained_model=load_model)

    decode_type_before = s2ss_args.decode_type
    s2ss_args.decode_type = constants.GREEDY
    s2ss_greedy_infer = s2ss_create_model(sess,
                                          s2ss_args,
                                          constants.INFER,
                                          vocab_size,
                                          reuse=True)
    s2ss_args.decode_type = constants.RANDOM
    s2ss_random_infer = s2ss_create_model(sess,
                                          s2ss_args,
                                          constants.INFER,
                                          vocab_size,
                                          reuse=True)
    s2ss_args.decode_type = decode_type_before

    # === Load pre-trained sentiment regression model
    eval_reg = reg_create_model(sess,
                                reg_args,
                                vocab_size,
                                mode=constants.EVAL,
                                load_pretrained_model=True)

    print("Prepare for model saver")
    final_model_save_path = args.final_model_save_dir

    # === Start train
    n_batch = -1
    global_step = -1

    for i in range(args.n_epoch):
        print("Epoch:%s" % i)

        sess.run([train_data_iterator.initializer])
        sess.run([paired_train_data_iterator.initializer])

        senti_reward_all = {  # reward to measure the sentiment transformation of generated sequence
            "upper":
            [],  # reward of ground truth (existed sequence in train dataset)
            "lower": [],  # reward of baseline: random generated sequence
            "real": [],  # reward of real generated sequence
        }
        cont_reward_all = {  # reward to measure the content preservation of generated sequence
            "upper":
            [],  # reward of ground truth (existed sequence in train dataset)
            "lower": [],  # reward of baseline: random generated sequence
            "real": [],  # reward of real generated sequence
        }
        reward_all = []
        reward_expect_all = []  # reward expectation: r*p(y_k|x)

        while True:
            n_batch += 1
            global_step += 1
            if n_batch % args.eval_step == 0:
                print(
                    '\n================ N_batch / Global_step (%s / %s): Evaluate on test datasets ================\n'
                    % (n_batch, global_step))
                dst_fs = inference(
                    s2ss_greedy_infer,
                    sess=sess,
                    args=s2ss_args,
                    decoder_s=constants.SENT_LIST,
                    src_test_iterator=test_data_iterator,
                    src_test_next=test_data_next,
                    vocab_rev=vocab_rev,
                    result_dir=args.final_tsf_result_dir,
                    step=global_step if args.save_each_step else global_step)
                t0 = time.time()
                bleu_scores = bleu_evaluator.score(args.reference,
                                                   dst_fs[1],
                                                   all_bleu=True)
                print(
                    "Test(Batch:%d)\tBLEU-1:%.3f\tBLEU-2:%.3f\tBLEU:%.3f\tCost time:%.2f"
                    % (n_batch, bleu_scores[1], bleu_scores[2], bleu_scores[0],
                       time.time() - t0))

                # improve the diversity of generated sentences
                dst_fs = inference(
                    s2ss_random_infer,
                    sess=sess,
                    args=s2ss_args,
                    decoder_s=constants.SENT_LIST,
                    src_test_iterator=test_data_iterator,
                    src_test_next=test_data_next,
                    vocab_rev=vocab_rev,
                    result_dir=args.final_tsf_result_dir + '-sample',
                    step=global_step if args.save_each_step else global_step)
                t0 = time.time()
                bleu_scores = bleu_evaluator.score(args.reference,
                                                   dst_fs[1],
                                                   all_bleu=True)
                print(
                    "Test(Batch:%d)\tBLEU-1:%.3f\tBLEU-2:%.3f\tBLEU:%.3f\tCost time:%.2f ===> Sampled results"
                    % (n_batch, bleu_scores[1], bleu_scores[2], bleu_scores[0],
                       time.time() - t0))

            if n_batch % args.save_per_step == 0:
                print("Save model at dir:", final_model_save_path)
                s2ss_train.saver.save(sess,
                                      final_model_save_path,
                                      global_step=n_batch)

            try:
                t0 = time.time()
                src = sess.run(train_data_next)  # get real data!!
                batch_size = np.shape(src["ids"])[0]
                decode_width = s2ss_args.decode_width

                t0 = time.time()

                tile_src_ids = np.repeat(src["ids"], decode_width,
                                         axis=0)  # [batch_size*beam_size],
                tile_src_length = np.repeat(src['length'],
                                            decode_width,
                                            axis=0)
                tile_src_ids_in = np.repeat(src["ids_in"],
                                            decode_width,
                                            axis=0)
                tile_src_ids_out = np.repeat(src["ids_out"],
                                             decode_width,
                                             axis=0)
                tile_src_ids_in_out = np.repeat(src["ids_in_out"],
                                                decode_width,
                                                axis=0)
                tile_src_decoder_s = np.repeat(src["senti"],
                                               decode_width,
                                               axis=0)

                tile_tgt_decoder_s = get_tareget_sentiment(size=batch_size)
                tgt_decoder_s = get_tareget_sentiment(size=batch_size,
                                                      random=True)

                t0 = time.time()

                # random
                random_predictions, log_probs = sess.run(
                    [
                        s2ss_random_infer.predictions,
                        s2ss_random_infer.log_probs
                    ],
                    feed_dict={
                        s2ss_random_infer.encoder_input: tile_src_ids,
                        s2ss_random_infer.encoder_input_len: tile_src_length,
                        s2ss_random_infer.decoder_s: tile_tgt_decoder_s
                    })

                mid_ids_log_prob = log_probs
                mid_ids, mid_ids_in, mid_ids_out, mid_ids_in_out, mid_ids_length = \
                    process_mid_ids(random_predictions, min_seq_len, max_seq_len, vocab_size)
                assert tile_src_length[0] == tile_src_length[decode_width - 1]

                # baseline
                greedy_predictions = sess.run(
                    s2ss_greedy_infer.predictions,
                    feed_dict={
                        s2ss_greedy_infer.encoder_input: src['ids'],
                        s2ss_greedy_infer.encoder_input_len: src['length'],
                        s2ss_greedy_infer.decoder_s: tgt_decoder_s
                    })

                mid_ids_bs, mid_ids_in_bs, mid_ids_out_bs, mid_ids_in_out_bs, mid_ids_length_bs = \
                    process_mid_ids(greedy_predictions, min_seq_len, max_seq_len, vocab_size)

                t0 = time.time()

                # == get reward from sentiment scorer/regressor
                def get_senti_reward(pred, gold):
                    if args.scale_sentiment:
                        gold = gold * 0.2 - 0.1  # todo: move this function to one file
                    reward_ = 1 / (np.fabs(pred - gold) + 1.0)
                    return reward_

                # real sentiment reward
                pred_senti_score = sess.run(eval_reg.predict_score,
                                            feed_dict={
                                                eval_reg.x:
                                                mid_ids,
                                                eval_reg.sequence_length:
                                                mid_ids_length
                                            })
                senti_reward = get_senti_reward(pred_senti_score,
                                                tile_tgt_decoder_s)

                # upper bound of sentiment reward
                upper_pred_senti_score = sess.run(eval_reg.predict_score,
                                                  feed_dict={
                                                      eval_reg.x:
                                                      src["ids"],
                                                      eval_reg.sequence_length:
                                                      src["length"]
                                                  })
                upper_senti_reward = get_senti_reward(upper_pred_senti_score,
                                                      src["senti"])

                # lower bound of sentiment reward
                lower_pred_senti_score = sess.run(
                    eval_reg.predict_score,
                    feed_dict={
                        eval_reg.x:
                        np.random.choice(vocab_size, np.shape(tile_src_ids)),
                        eval_reg.sequence_length:
                        tile_src_length
                    })
                lower_senti_reward = get_senti_reward(lower_pred_senti_score,
                                                      tile_src_decoder_s)

                # == get reward from backward reconstruction
                feed_dict = {
                    s2ss_train.encoder_input: mid_ids,
                    s2ss_train.encoder_input_len: mid_ids_length,
                    s2ss_train.decoder_input: tile_src_ids_in,
                    s2ss_train.decoder_target: tile_src_ids_out,
                    s2ss_train.decoder_target_len: tile_src_length + 1,
                    s2ss_train.decoder_s: tile_src_decoder_s,
                }

                loss = sess.run(s2ss_train.loss_per_sequence,
                                feed_dict=feed_dict)
                cont_reward = loss * (-1)  # bigger is better

                t0 = time.time()

                # get baseline content reward
                feed_dict = {
                    s2ss_train.encoder_input: mid_ids_bs,
                    s2ss_train.encoder_input_len: mid_ids_length_bs,
                    s2ss_train.decoder_input: src["ids_in"],
                    s2ss_train.decoder_target: src["ids_out"],
                    s2ss_train.decoder_target_len: src["length"] + 1,
                    s2ss_train.decoder_s: src["senti"],
                }
                loss_bs = sess.run(s2ss_train.loss_per_sequence,
                                   feed_dict=feed_dict)
                cont_reward_bs = loss_bs * (-1)  # baseline content reward

                # get lower bound of content reward
                feed_dict = {
                    s2ss_train.encoder_input:
                    np.random.choice(vocab_size, np.shape(mid_ids)),
                    s2ss_train.encoder_input_len:
                    mid_ids_length,
                    s2ss_train.decoder_input:
                    np.random.choice(vocab_size, np.shape(tile_src_ids_in)),
                    s2ss_train.decoder_target:
                    np.random.choice(vocab_size, np.shape(tile_src_ids_out)),
                    s2ss_train.decoder_target_len:
                    tile_src_length + 1,
                    s2ss_train.decoder_s:
                    tile_src_decoder_s,
                }
                lower_loss = sess.run(s2ss_train.loss_per_sequence,
                                      feed_dict=feed_dict)
                lower_cont_reward = lower_loss * (-1)  # bigger is better

                def norm(x):
                    x = np.array(x)
                    x = (x - x.mean()) / (x.std() + 1e-6)  # safe divide
                    # x = x - x.min()  # to make x > 0
                    return x

                def sigmoid(x,
                            x_trans=0.0,
                            x_scale=1.0,
                            max_y=1,
                            do_norm=False):
                    value = max_y / (1 + np.exp(-(x - x_trans) * x_scale))
                    if do_norm:
                        value = norm(value)
                    return value

                def norm_s2ss_reward(x,
                                     baseline=None,
                                     scale=False,
                                     norm=False):
                    x = np.reshape(x, (batch_size, -1))  # x in [-16, 0]
                    dim1 = np.shape(x)[1]

                    if baseline is not None:
                        x_baseline = baseline  # [batch_size]
                    else:
                        x_baseline = np.mean(x, axis=1)  # [batch_size]
                    x_baseline = np.repeat(x_baseline,
                                           dim1)  # [batch_size*dim1]
                    x_baseline = np.reshape(x_baseline, (batch_size, dim1))

                    x_norm = x - x_baseline

                    if scale:
                        x_norm = sigmoid(x_norm)
                    if norm:
                        x_norm = 2 * x_norm - 1  # new x_norm in [-1, 1]
                    return x_norm.reshape(-1)

                if args.use_baseline:
                    if global_step < 1:  # only print at first 10 steps
                        print('%%% use_baseline')
                    cont_reward = norm_s2ss_reward(cont_reward,
                                                   baseline=cont_reward_bs,
                                                   scale=True)
                    lower_cont_reward = norm_s2ss_reward(
                        lower_cont_reward, baseline=cont_reward_bs, scale=True)

                elif args.scale_cont_reward:
                    if global_step < 1:  # only print at first 1 steps
                        print('%%% scale_cont_reward')
                    cont_reward = sigmoid(
                        cont_reward, x_trans=-3)  # [-6, -2] => [0.1, 0.78]
                    lower_cont_reward = sigmoid(lower_cont_reward, x_trans=-3)

                if args.scale_senti_reward:
                    if global_step < 1:  # only print at first 1 steps
                        print('%%% scale_senti_reward')
                    senti_reward = sigmoid(
                        senti_reward, x_trans=-0.8,
                        x_scale=15)  # [0.6, 1.0] => [0.04, 0.95]
                    lower_senti_reward = sigmoid(lower_senti_reward,
                                                 x_trans=-0.8,
                                                 x_scale=15)
                    upper_senti_reward = sigmoid(upper_senti_reward,
                                                 x_trans=-0.8,
                                                 x_scale=15)

                cont_reward_all["lower"].extend(lower_cont_reward)
                cont_reward_all["real"].extend(cont_reward)

                senti_reward_all["upper"].extend(upper_senti_reward)
                senti_reward_all["lower"].extend(lower_senti_reward)
                senti_reward_all["real"].extend(senti_reward)

                senti_reward += safe_divide_constant
                cont_reward += safe_divide_constant

                if args.increase_beta:
                    beta = min(1, 0.1 * global_step / args.increase_step)
                else:
                    beta = 1

                reward_merge_type = 'H(sentiment, content), beta=%.2f' % beta  # enlarge the influence of senti_reward
                reward = (1 + beta * beta) * senti_reward * cont_reward / (
                    beta * beta * senti_reward + cont_reward)

                reward_all.extend(reward)
                reward_expect_all.extend(reward * np.exp(mid_ids_log_prob))

                # policy gradient training
                if not args.no_RL:
                    feed_dict = {
                        s2ss_train.encoder_input: tile_src_ids,
                        s2ss_train.encoder_input_len: tile_src_length,
                        s2ss_train.decoder_input: mid_ids_in,
                        s2ss_train.decoder_target: mid_ids_out,
                        s2ss_train.decoder_target_len: mid_ids_length + 1,
                        s2ss_train.decoder_s: tile_tgt_decoder_s,
                        s2ss_train.reward: reward
                    }
                    sess.run([s2ss_train.rl_loss, s2ss_train.retrain_op],
                             feed_dict=feed_dict)

                # Teacher forcing data types:
                #  1. back translation data (greedy decode)
                #  2. back translation data (random decode)
                #  3. back translation noise data
                #  4. pseudo data
                #  5. same data (x->x)
                #  6. same_noise (x'->x)

                if "back_trans" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back-translated data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data  # senti-, bleu+
                        feed_dict = {
                            s2ss_train.encoder_input: mid_ids_bs,
                            s2ss_train.encoder_input_len: mid_ids_length_bs,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"],
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "back_trans_random" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back_trans_random data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data with noise
                        feed_dict = {
                            s2ss_train.encoder_input: mid_ids,
                            s2ss_train.encoder_input_len: mid_ids_length,
                            s2ss_train.decoder_input: tile_src_ids_in,
                            s2ss_train.decoder_target: tile_src_ids_out,
                            s2ss_train.decoder_target_len: tile_src_length + 1,
                            s2ss_train.decoder_s: tile_src_decoder_s,
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "back_trans_noise" in args.teacher_forcing:
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 2 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                5,
                                int(1 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 1
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print(
                                '$$$Update B use back_trans_noise data (Update gap:%s)'
                                % gap)
                        # Update Seq2SentiSeq with previous model generated data with noise
                        noise_ids, noise_ids_length = add_noise(
                            mid_ids_bs, mid_ids_length_bs)
                        feed_dict = {
                            s2ss_train.encoder_input: noise_ids,
                            s2ss_train.encoder_input_len: noise_ids_length,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"],
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "pseudo_data" in args.teacher_forcing:  # balance
                    if args.MLE_decay:
                        if args.MLE_decay_type == "linear":
                            gap = min(10, 3 + global_step /
                                      args.MLE_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                100,
                                int(3 / np.power(
                                    args.MLE_decay_rate,
                                    global_step / args.MLE_decay_steps)))
                    else:
                        gap = 3
                    if n_batch % gap == 0:
                        if global_step < 1:
                            print('$$$Update use pseudo data (Update gap:%s)' %
                                  gap)
                        data = sess.run(
                            paired_train_data_next)  # get real data!!
                        feed_dict = {
                            s2ss_train.encoder_input: data["source_ids"],
                            s2ss_train.encoder_input_len:
                            data["source_length"],
                            s2ss_train.decoder_input: data["target_ids_in"],
                            s2ss_train.decoder_target: data["target_ids_out"],
                            s2ss_train.decoder_target_len:
                            data["target_length"] + 1,
                            s2ss_train.decoder_s: data["target_senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "same" in args.teacher_forcing:
                    if args.same_decay:
                        if args.same_decay_type == "linear":
                            gap = min(
                                8, 2 + global_step /
                                args.same_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                10,
                                int(2 / np.power(
                                    args.same_decay_rate,
                                    global_step / args.same_decay_rate)))
                    else:
                        gap = 2
                    if n_batch % gap == 0:
                        print('$$$Update use same data (Update gap:%s)' % gap)
                        # Update Seq2SentiSeq with target output  # senti-, bleu+
                        feed_dict = {
                            s2ss_train.encoder_input: src["ids"],
                            s2ss_train.encoder_input_len: src["length"],
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)

                if "same_noise" in args.teacher_forcing:
                    if args.same_decay:
                        if args.same_decay_type == "linear":
                            gap = min(
                                8, 2 + global_step /
                                args.same_decay_steps)  # 10 after 1 epoch
                        else:
                            gap = min(
                                10,
                                int(2 / np.power(
                                    args.same_decay_rate,
                                    global_step / args.same_decay_rate)))
                    else:
                        gap = 2
                    if n_batch % gap == 0:
                        print('$$$Update use same_noise data (Update gap:%s)' %
                              gap)
                        noise_ids, noise_ids_length = add_noise(
                            src["ids"], src["length"])
                        feed_dict = {
                            s2ss_train.encoder_input: noise_ids,
                            s2ss_train.encoder_input_len: noise_ids_length,
                            s2ss_train.decoder_input: src["ids_in"],
                            s2ss_train.decoder_target: src["ids_out"],
                            s2ss_train.decoder_target_len: src["length"] + 1,
                            s2ss_train.decoder_s: src["senti"]
                        }
                        sess.run([s2ss_train.loss, s2ss_train.train_op],
                                 feed_dict=feed_dict)
            except tf.errors.OutOfRangeError:  # next epoch
                print("Train---Total N batch:{}\tCost time:{}".format(
                    n_batch,
                    time.time() - t0))
                n_batch = -1
                break
Ejemplo n.º 14
0
from models.classification.simple_net import simple_model
from utils.data import load_dataset

UC_MERCED_SIZE = 2100
TEST_SIZE = 10
TRAIN_SIZE = 100 - TEST_SIZE
batch_size = 128
epochs = 100
IMG_HEIGHT = 256
IMG_WIDTH = 256
N_CHANNELS = 3
NUM_CLASSES = 21

ds_test, ds_test_size, ds_train, ds_train_size = load_dataset(
    dataset='uc_merced',
    batch_size=batch_size,
    dataset_size=UC_MERCED_SIZE,
    train_size=TRAIN_SIZE,
    test_size=TEST_SIZE,
    img_shape=(IMG_WIDTH, IMG_HEIGHT))

model = simple_model((IMG_HEIGHT, IMG_WIDTH, N_CHANNELS), logits=NUM_CLASSES)

hist = model.fit(ds_train,
                 validation_data=ds_test,
                 steps_per_epoch=ds_train_size // batch_size,
                 validation_steps=ds_test_size // batch_size,
                 epochs=epochs)
Ejemplo n.º 15
0
if __name__ == '__main__':
    from config import Config

    parser = setup_argparse()
    parser.add_argument('-d', '--dataset_name', default='adult')
    parser.add_argument('-e', '--enc_dec', default='le')
    parser.add_argument('-s', '--subset', type=bool, default=False)
    args = get_parser_args(parser)

    EncoderDecoder = LabelEncoderDecoder
    if args.enc_dec == 'ohe':
        EncoderDecoder = OneHotEncoderDecoder

    sample_config = Config(args.dataset_name, 'default')
    data_version = 'clean'
    if args.subset:
        assert args.dataset_name != 'adult'
        data_version = 'clean_subset'
    clean_data = load_dataset(args.dataset_name, data_version)

    enc_dec_object = EncoderDecoder(sample_config)
    encoded_data = enc_dec_object.encode(clean_data)
    pdb.set_trace()

    if args.enc_dec == 'ohe':
        decoded_data = enc_dec_object.decode(clean_data, encoded_data)
    elif args.enc_dec == 'le':
        decoded_data = enc_dec_object.decode(encoded_data)

    pdb.set_trace()
Ejemplo n.º 16
0
def main():
    filename = sys.argv[1]

    X = data.load_dataset('{}_X.npy'.format(filename))
    Y = data.load_dataset('{}_Y.npy'.format(filename))

    model = network.build_model()

    # vizualize the model
    network.vizualize_model(model, filename)

    # 80:20
    # print network.train_model(model, (X, Y))
    # score = model.evaluate(X, Y, verbose=0)
    # print 'Test score:', score[0]

    # K-Fold
    val_error = []
    losses = []
    kf = KFold(Y.shape[0], n_folds=FOLDS, shuffle=True, random_state=None)
    for train_index, val_index in kf:
        # Generate the dataset for this fold
        X_train, X_val = X[train_index], X[val_index]
        Y_train, Y_val = Y[train_index], Y[val_index]
        print X_train.shape, X_val.shape
        print Y_train.shape, Y_val.shape

        # Train the model on this dataset
        train_history, loss_history = network.train_model(
            model, (X_train, Y_train), (X_val, Y_val))

        # TODO: save the losses to a file.
        losses.append(loss_history.losses)

        # Evaluate the model
        val_error = model.evaluate(X_val, Y_val, verbose=0)
        print 'Validation error:', val_error

        # NOTE: hack to run only one split
        break

    # Print final K-Fold error
    print "K-Fold Error: %0.2f (+/- %0.2f)" % (val_error.mean(),
                                               val_error.std() * 2)

    # Predict some labels
    # TODO: modify this to suit our image needs.
    counter = 0
    while counter < 1:
        idx = random.choice(xrange(Y.shape[0]))
        prediction = network.predict_model(model,
                                           np.expand_dims(X[idx, :], axis=0))
        print 'Testing: sample={}, prediction={}, actual={}'.format(
            idx, prediction, Y[idx, :])

        # save this file
        data.generate_image(prediction)
        counter += 1

    # dump the model to the file
    network.save_model(model, filename)
Ejemplo n.º 17
0

args = parse_args()

if __name__ == "__main__":

    # hyper params
    seed = 1
    fix_seed(seed)

    n_folds = 5
    epochs = 200
    batch_size = 512

    # data
    train_df, test_df, sample_submit_df = load_dataset()
    X, X_test = glove(train_df, test_df)

    X = X.values.astype('float32')
    X_test = X_test.values.astype('float32')
    y = pd.get_dummies(train_df['jobflag']).values.astype('float32')

    trainset = JobInfoDataset(X, y, jobflag=train_df['jobflag'].values)
    testset = JobInfoDataset(X_test)

    # weight
    weight = get_weight(train_df)

    # ---------- Kfold ---------- #
    preds_for_test = [[0 for _ in range(4)] for _ in range(len(X_test))]
    cv = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=seed)
Ejemplo n.º 18
0
        scaler2 = MinMaxScaler()
        data_scaled = scaler2.fit_transform(data)
        joblib.dump(scaler2, self.minmax_scaler_path)

        return data_scaled  # returns numpy array

    def inverse_scale(self, data_scaled):
        scaler2 = joblib.load(self.minmax_scaler_path)
        data_unscaled = scaler2.inverse_transform(data_scaled)

        scaler = joblib.load(self.std_scaler_path)
        data = scaler.inverse_transform(data_unscaled)

        return data  # returns numpy array


if __name__ == '__main__':
    from config import Config

    sample_config = Config('lacity', 'default')
    clean_data = load_dataset('lacity', 'clean')

    enc_dec_object = LabelEncoderDecoder(sample_config)
    encoded_data = enc_dec_object.encode(clean_data)

    preprocess_obj = Preprocessor(sample_config)
    pdb.set_trace()
    scaled = preprocess_obj.scale(encoded_data)
    inv_scaled = preprocess_obj.inverse_scale(scaled)