def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess, epoch):
    """
    Runs through validation data to check the overall mean loss
    :param model:
    :param data:
    :param hparams:
    :param X_ph:
    :param Y_ph:
    :param exp:
    :param sess:
    :param epoch:
    :return:
    """
    print('checking val loss...')
    max_val_batches = 100
    val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=1)

    overall_err = []
    overall_p_1 = []
    overall_p_2 = []
    progbar = Progbar(target=max_val_batches, width=50)
    for batch_nb in range(max_val_batches):
        batch_X, batch_Y = next(val_gen)
        if len(batch_X) == 0:
            continue

        # aggregate data
        feed_dict = {
            X_ph: batch_X,
            Y_ph: batch_Y
        }

        # calculate metrics
        val_err = model.eval(session=sess, feed_dict=feed_dict)
        precission_at_1 = test_precision_at_k(pred_opt, feed_dict, k=1, sess=sess)
        precission_at_2 = test_precision_at_k(pred_opt, feed_dict, k=2, sess=sess)

        # track metrics for means
        overall_err.append(val_err)
        overall_p_1.append(precission_at_1)
        overall_p_2.append(precission_at_2)

        # update exp and progbar
        exp.add_metric_row({'val loss': val_err, 'val P@1': precission_at_1, 'val P@2': precission_at_2})
        progbar.add(n=1)

    # log and save val metrics
    overall_val_mean_err = np.asarray(overall_err).mean()
    overall_p_1_mean = np.asarray(overall_p_1).mean()
    overall_p_2_mean = np.asarray(overall_p_2).mean()
    exp.add_metric_row({'epoch_mean_err': overall_val_mean_err,
                        'epoch_P@1_mean': overall_p_1_mean,
                        'epoch_P@2_mean': overall_p_2_mean,
                        'epoch': epoch + 1})

    print('\nval loss: ', overall_val_mean_err,
          'epoch_P@1_mean: ', overall_p_1_mean,
          'epoch_P@2_mean: ', overall_p_2_mean)
    print('-'*100)
예제 #2
0
def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess,
                    epoch):

    print('checking val loss...')
    max_val_batches = 100
    val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=1)

    overall_err = []
    overall_p_1 = []
    overall_p_2 = []
    progbar = Progbar(target=max_val_batches, width=50)
    for batch_nb in range(max_val_batches):
        batch_X, batch_Y = next(val_gen)
        if len(batch_X) == 0:
            continue

        feed_dict = {X_ph: batch_X, Y_ph: batch_Y}

        val_err = model.eval(session=sess, feed_dict=feed_dict)
        precission_at_1 = test_precision_at_k(pred_opt,
                                              feed_dict,
                                              k=1,
                                              sess=sess)
        precission_at_2 = test_precision_at_k(pred_opt,
                                              feed_dict,
                                              k=2,
                                              sess=sess)

        overall_err.append(val_err)
        overall_p_1.append(precission_at_1)
        overall_p_2.append(precission_at_2)

        exp.add_metric_row({
            'val loss': val_err,
            'val P@1': precission_at_1,
            'val P@2': precission_at_2
        })
        progbar.add(n=1)

    overall_val_mean_err = np.asarray(overall_err).mean()
    overall_p_1_mean = np.asarray(overall_p_1).mean()
    overall_p_2_mean = np.asarray(overall_p_2).mean()
    exp.add_metric_row({
        'epoch_mean_err': overall_val_mean_err,
        'epoch_P@1_mean': overall_p_1_mean,
        'epoch_P@2_mean': overall_p_2_mean,
        'epoch': epoch + 1
    })

    print('\nval loss: ', overall_val_mean_err, 'epoch_P@1_mean: ',
          overall_p_1_mean, 'epoch_P@2_mean: ', overall_p_2_mean)
    print('-' * 100)
def train_main(hparams):
    """
    Main training routine for the dot semantic network bot
    :return:
    """

    # -----------------------
    # INIT EXPERIMENT
    # ----------------------
    exp = Experiment(name=hparams.exp_name,
                     debug=hparams.debug,
                     description=hparams.exp_desc,
                     autosave=False,
                     save_dir=hparams.test_tube_dir)

    exp.add_argparse_meta(hparams)
    exp.save()

    # -----------------------
    # LOAD DATASET
    # ----------------------
    udc_dataset = UDCDataset(vocab_path=hparams.vocab_path,
                             train_path=hparams.dataset_train_path,
                             test_path=hparams.dataset_test_path,
                             val_path=hparams.dataset_val_path,
                             max_seq_len=hparams.max_seq_len)

    # -----------------------
    # INIT TF VARS
    # ----------------------
    # input_x holds chat history
    # input_y holds our responses
    # labels holds the ground truth labels
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=[hparams.batch_size, None],
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32,
                             shape=[hparams.batch_size, None],
                             name='input_y')

    # ----------------------
    # EMBEDDING LAYER
    # ----------------------
    # you can preload your own or learn in the network
    # in this case we'll just learn it in the network
    embedding = tf.get_variable(
        'embedding', [udc_dataset.vocab_size, hparams.embedding_dim])

    # ----------------------
    # RESOLVE EMBEDDINGS
    # ----------------------
    # Lookup the embeddings.
    embedding_x = tf.nn.embedding_lookup(embedding, input_x)
    embedding_y = tf.nn.embedding_lookup(embedding, input_y)

    # Generates 1 vector per training example.
    x = tf.reduce_sum(embedding_x, axis=1)
    y = tf.reduce_sum(embedding_y, axis=1)

    # ----------------------
    # OPTIMIZATION PROBLEM
    # ----------------------
    S = dot_product_scoring(x, y, is_training=True)
    K = tf.reduce_logsumexp(S, axis=1)
    loss = -tf.reduce_mean(tf.diag_part(S) - K)

    # allow optimizer to be changed through hyper params
    optimizer = get_optimizer(hparams=hparams, minimize=loss)

    # ----------------------
    # TF ADMIN (VAR INIT, SESS)
    # ----------------------
    sess = tf.Session()
    init_vars = tf.global_variables_initializer()
    sess.run(init_vars)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # ----------------------
    # TRAINING ROUTINE
    # ----------------------
    # admin vars
    nb_batches_served = 0
    eval_every_n_batches = hparams.eval_every_n_batches

    train_err = 1000
    prec_at_1 = 0
    prec_at_2 = 0

    # iter for the needed epochs
    print('\n\n', '-' * 100,
          '\n  {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100,
          '\n\n')
    for epoch in range(hparams.nb_epochs):
        print('training epoch:', epoch + 1)
        progbar = Progbar(target=udc_dataset.nb_tng, width=50)
        train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size,
                                                max_epochs=1)

        # mini batches
        for batch_context, batch_utterance in train_gen:
            feed_dict = {input_x: batch_context, input_y: batch_utterance}

            # OPT: run one step of optimization
            optimizer.run(session=sess, feed_dict=feed_dict)
            # update loss metrics
            if nb_batches_served % eval_every_n_batches == 0:

                # calculate test error
                train_err = loss.eval(session=sess, feed_dict=feed_dict)
                prec_at_1 = test_precision_at_k(S, feed_dict, k=1, sess=sess)
                prec_at_2 = test_precision_at_k(S, feed_dict, k=2, sess=sess)

                # update prog bar
                exp.add_metric_row({
                    'tng loss': train_err,
                    'P@1': prec_at_1,
                    'P@2': prec_at_2
                })

            nb_batches_served += 1

            progbar.add(n=len(batch_context),
                        values=[('train_err', train_err), ('P@1', prec_at_1),
                                ('P@2', prec_at_2)])

        # ----------------------
        # END OF EPOCH PROCESSING
        # ----------------------
        # calculate the val loss
        print('\nepoch complete...\n')
        check_val_stats(loss, S, udc_dataset, hparams, input_x, input_y, exp,
                        sess, epoch)

        # save model
        save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)

        # save exp data
        exp.save()

    tf.reset_default_graph()
def main():

    batch_size = _BATCH_SIZE
    noise_dim = _NOISE_DIM
    lamb = 10.0

    train = get_data()
    train_images, train_labels = make_batch(train)

    gen = generator()
    dis = discriminator()
    gen.summary()
    dis.summary()

    dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)
    gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)

    gen.trainable = True
    dis.trainable = False
    gen_inputs = Input(shape=(noise_dim, ))
    gen_outputs = gen(gen_inputs)
    dis_outputs = dis(gen_outputs)
    gen_model = Model(inputs=[gen_inputs], outputs=[dis_outputs])
    gen_model.compile(loss=wasserstein_loss, optimizer=gen_opt)
    gen_model.summary()

    gen.trainable = False
    dis.trainable = True
    real_inputs = Input(shape=train_images.shape[1:])
    dis_real_outputs = dis(real_inputs)

    fake_inputs = Input(shape=(noise_dim, ))
    gen_fake_outputs = gen(fake_inputs)
    dis_fake_outputs = dis(gen_fake_outputs)

    interpolate = RandomWeightedAverage()([real_inputs, gen_fake_outputs])
    dis_interpolate_outputs = dis(interpolate)

    gp_reg = partial(gradient_penalty, interpolate=interpolate, lamb=lamb)
    #gp_reg.__name__ = 'gradient_penalty'

    dis_model = Model(inputs=[real_inputs, fake_inputs],\
    outputs=[dis_real_outputs, dis_fake_outputs,dis_interpolate_outputs])

    dis_model.compile(loss=[wasserstein_loss, wasserstein_loss, gp_reg],
                      optimizer=dis_opt)
    dis_model.summary()

    max_epoch = 10001
    max_train_only_dis = 5
    minibatch_size = batch_size * max_train_only_dis
    max_loop = int(train_images.shape[0] / minibatch_size)

    real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2],
                     train_images.shape[3]),
                    dtype=np.float32)
    minibatch_train_images = np.zeros(
        (minibatch_size, train_images.shape[1], train_images.shape[2],
         train_images.shape[3]),
        dtype=np.float32)

    progbar = Progbar(target=max_epoch)
    real_label = [-1] * batch_size
    fake_label = [1] * batch_size
    dummy_label = [0] * batch_size
    for epoch in range(max_epoch):

        np.random.shuffle(train_images)
        for loop in range(max_loop):

            minibatch_train_images = train_images[loop *
                                                  minibatch_size:(loop + 1) *
                                                  minibatch_size]
            for train_only_dis in range(max_train_only_dis):

                real = minibatch_train_images[train_only_dis *
                                              batch_size:(train_only_dis + 1) *
                                              batch_size]
                noise = np.random.uniform(
                    -1, 1, (batch_size, noise_dim)).astype(np.float32)
                dis_loss = dis_model.train_on_batch(
                    [real, noise], [real_label, fake_label, dummy_label])

            noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype(
                np.float32)
            gen_loss = gen_model.train_on_batch(noise, real_label)

        progbar.add(1,
                    values=[("dis_loss", dis_loss[0]), ("gen_loss", gen_loss)])
        if epoch % 100 == 0:
            noise = np.random.uniform(-1, 1,
                                      (batch_size, 10)).astype(np.float32)
            fake = gen.predict(noise)
            tmp = [r.reshape(-1, 32) for r in fake]
            tmp = np.concatenate(tmp, axis=1)
            img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8)
            Image.fromarray(img).save("generate/%d.png" % (epoch))

    backend.clear_session()
예제 #5
0
def main():

    if os.path.isfile(macro._LOCAL_SAVE_DATA) == 0:

        # download data and compute featuers (see "download_data.py")
        # atomic_numbers use to compute composition vector
        # labels is target properties (formation energy)
        train_labels, compositions, features, atomic_numbers = dl.get_data()

        # compute bag-of-atom vector that trains GAN (see "preprocess.py")
        boa_vectors = pre.compute_bag_of_atom_vector(compositions,
                                                     atomic_numbers)
        train_data = np.concatenate([boa_vectors, features], axis=1)

        save_data = pd.DataFrame(
            np.concatenate([train_labels, train_data], axis=1))
        save_data.to_csv(macro._LOCAL_SAVE_DATA, index=False, header=False)

    else:
        data = pd.read_csv(macro._LOCAL_SAVE_DATA,
                           delimiter=',',
                           engine="python",
                           header=None)
        data = np.array(data)
        train_labels, train_data = np.split(data, [1], axis=1)

    # normalization of training data such that min is 0 and max is 1 (see "preprocess.py")
    normalized_train_data, data_max, data_min = pre.normalize_for_train(
        train_data)
    normalized_train_labels, max_train_prop, min_train_prop = pre.normalize_for_train(
        train_labels)

    # Save normalization parameter to .csv to use generation
    save_data = pd.DataFrame(
        np.concatenate([max_train_prop, min_train_prop, data_max, data_min],
                       axis=0))
    save_data.to_csv(macro._SAVE_NORMALIZATION_PARAM,
                     index=False,
                     header=False)

    ### start initialization of training GAN ###

    # set hyperparameters
    batch_size = macro._BATCH_SIZE  # batch size
    noise_dim = macro._NOISE_DIM  # dimension of noise to input generator
    property_dim = macro._PROP_DIM  # the number of properties
    lamb = macro._LAMB  # hyperparameter for W-GAN-GP
    max_epoch = macro._MAX_EPOCH  # maximum iteration of outer loop
    max_train_only_dis = macro._MAX_EPOCH_TRAIN_DISCRIMINATOR  # maximum iteration of inner loop defined by W-GAN-GP paper (https://arxiv.org/pdf/1704.00028.pdf)
    max_loop = int(train_data.shape[0] / batch_size)

    # set model (see "model.py")
    # in this code, we apply AC-GAN based network architecture (https://arxiv.org/abs/1610.09585)
    # difference between AC-GAN is that our model is the regression, not classification
    gen = model.generator(normalized_train_data.shape[1])
    dis = model.discriminator(normalized_train_data.shape[1])

    # rf is the output layer of discriminator that discriminates real or fake
    rf = model.real_fake()

    # pred is the output layer of discriminator that predicts target property
    pred = model.prediction()

    # set optimization method
    dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)
    gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)

    # first set discriminator's parameters for training
    gen.trainable = False  # generator's parameter does not update
    dis.trainable = True
    rf.trainable = True
    pred.trainable = True

    # set variables when inputting real data
    real_inputs = Input(shape=normalized_train_data.shape[1:])
    dis_real_outputs = dis(real_inputs)
    real_fake_from_real = rf(dis_real_outputs)
    predictions_from_real = pred(dis_real_outputs)

    # set variables when inputting fake data
    fake_inputs = Input(shape=(noise_dim + property_dim, ))
    gen_fake_outputs = gen(fake_inputs)
    dis_fake_outputs = dis(gen_fake_outputs)
    real_fake_from_fake = rf(dis_fake_outputs)

    # set loss function for discriminator
    # in this case, we apply W-GAN-GP based loss function because of improving stability
    # W-GAN-GP (https://arxiv.org/pdf/1704.00028.pdf)
    # W-GAN-GP is unsupervised training, on the other hand, our model is supervised (conditional).
    # So, we apply wasserstein_loss to real_fake part and apply mean_squared_error to prediction part
    interpolate = model.RandomWeightedAverage()(
        [real_inputs, gen_fake_outputs])
    dis_interpolate_outputs = dis(interpolate)
    real_fake_interpolate = rf(dis_interpolate_outputs)

    # gradient penalty of W-GAN-GP
    gp_reg = partial(model.gradient_penalty,
                     interpolate=interpolate,
                     lamb=lamb)
    gp_reg.__name__ = 'gradient_penalty'

    # connect inputs and outputs of the discriminator
    # prediction part is trained by only using training dataset (i.e., predict part is not trained by generated samples)
    dis_model = Model(inputs=[real_inputs, fake_inputs],\
    outputs=[real_fake_from_real, real_fake_from_fake, real_fake_interpolate, predictions_from_real])

    # compile
    dis_model.compile(loss=[model.wasserstein_loss,model.wasserstein_loss,\
    gp_reg,'mean_squared_error'],optimizer=dis_opt)

    # second set generator's parameters for training
    gen.trainable = True  # generator's parameters only update
    dis.trainable = False
    rf.trainable = False
    pred.trainable = False

    # set variables when inputting noise and target property
    gen_inputs = Input(shape=(noise_dim + property_dim, ))
    gen_outputs = gen(gen_inputs)

    # set variables for discriminator when inputting fake data
    dis_outputs = dis(gen_outputs)
    real_fake = rf(dis_outputs)
    predictions = pred(dis_outputs)

    # connect inputs and outputs of the discriminator
    gen_model = Model(inputs=[gen_inputs], outputs=[real_fake, predictions])

    # compile
    # generator is trained by real_fake classification and prediction of target property
    gen_model.compile(loss=[model.wasserstein_loss, 'mean_squared_error'],
                      optimizer=gen_opt)

    # if you need progress bar
    progbar = Progbar(target=max_epoch)

    # set the answer to train each model
    real_label = [-1] * batch_size
    fake_label = [1] * batch_size
    dummy_label = [0] * batch_size

    #real = np.zeros((batch_size,train_data.shape[1]), dtype=np.float32)
    inputs = np.zeros((batch_size, noise_dim + property_dim), dtype=np.float32)

    # epoch
    for epoch in range(max_epoch):

        # iteration
        for loop in range(max_loop):

            # shuffle to change the trainng order and select data
            sdata, slabels, bak = pre.paired_shuffle(normalized_train_data,
                                                     normalized_train_labels)
            real = sdata[loop * batch_size:(loop + 1) * batch_size]
            properties = slabels[loop * batch_size:(loop + 1) * batch_size]

            # generator's parameters does not update
            gen.trainable = False
            dis.trainable = True
            rf.trainable = True
            pred.trainable = True

            # train discriminator
            for train_only_dis in range(max_train_only_dis):
                noise = np.random.uniform(
                    -1, 1, (batch_size, noise_dim)).astype(np.float32)
                for i in range(len(noise)):
                    inputs[i] = np.hstack((noise[i], properties[i]))
                dis_loss = dis_model.train_on_batch(
                    [real, inputs],
                    [real_label, fake_label, dummy_label, properties])

            # second train only generator
            gen.trainable = True
            dis.trainable = False
            rf.trainable = False
            pred.trainable = False
            noise = np.random.uniform(-1, 1, (batch_size, noise_dim)).astype(
                np.float32)
            for i in range(len(noise)):
                inputs[i] = np.hstack((noise[i], properties[i]))
            gen_loss = gen_model.train_on_batch([inputs],
                                                [real_label, properties])

        # if you need progress bar
        progbar.add(1,
                    values=[("dis_loss", dis_loss[0]),
                            ("gen_loss", gen_loss[0])])

    # save generated samples and models
    eval.save(normalized_train_data, gen, dis, pred, rf)

    backend.clear_session()
예제 #6
0
def main():
    train = get_data()
    train_images, train_labels = make_batch(train)

    dis = discriminator()
    dis.summary()

    dis_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)
    dis.compile(loss='binary_crossentropy', optimizer=dis_opt)

    gen = generator()
    gen.summary()

    gen.trainable = True
    dis.trainable = False
    comb = combine(gen, dis)
    comb.summary()

    gen_opt = Adam(lr=1.0e-4, beta_1=0.0, beta_2=0.9)
    comb.compile(loss='binary_crossentropy', optimizer=gen_opt)

    batch_size = _BATCH_SIZE
    noise_dim = _NOISE_DIM
    max_epoch = 10001
    max_train_only_dis = 5
    minibatch_size = batch_size * max_train_only_dis
    max_loop = int(train_images.shape[0] / minibatch_size)

    real = np.zeros((batch_size, train_images.shape[1], train_images.shape[2],
                     train_images.shape[3]),
                    dtype=np.float32)
    minibatch_train_images = np.zeros(
        (minibatch_size, train_images.shape[1], train_images.shape[2],
         train_images.shape[3]),
        dtype=np.float32)

    progbar = Progbar(target=max_epoch)
    real_label = [-1] * batch_size
    fake_label = [1] * batch_size
    for epoch in range(max_epoch):

        np.random.shuffle(train_images)
        for loop in range(max_loop):

            minibatch_train_images = train_images[loop *
                                                  minibatch_size:(loop + 1) *
                                                  minibatch_size]
            for train_only_dis in range(max_train_only_dis):

                real = minibatch_train_images[train_only_dis *
                                              batch_size:(train_only_dis + 1) *
                                              batch_size]
                noise = np.random.uniform(
                    -1, 1, (batch_size, noise_dim)).astype(np.float32)

                dis.trainable = False
                y = [1] * batch_size
                gen_loss = comb.train_on_batch(noise, y)

                dis.trainable = True
                y = [1] * batch_size + [0] * batch_size
                fake = gen.predict(noise)
                dis_loss = dis.train_on_batch(np.concatenate((real, fake)), y)

        progbar.add(1, values=[("dis_loss", dis_loss), ("gen_loss", gen_loss)])
        if epoch % 100 == 0:
            tmp = [r.reshape(-1, 32) for r in fake]
            tmp = np.concatenate(tmp, axis=1)
            img = ((tmp / 2.0 + 0.5) * 255.0).astype(np.uint8)
            Image.fromarray(img).save("generate/%d.png" % (epoch))

    backend.clear_session()
def train_main(hparams):
    """
    Main training routine for the dot semantic network bot
    :return:
    """

    # -----------------------
    # INIT EXPERIMENT
    # ----------------------
    exp = Experiment(name=hparams.exp_name,
                     debug=hparams.debug,
                     description=hparams.exp_desc,
                     autosave=False,
                     save_dir=hparams.test_tube_dir)

    exp.add_meta_tags(vars(hparams))

    # -----------------------
    # LOAD DATASET
    # ----------------------
    udc_dataset = UDCDataset(vocab_path=hparams.vocab_path,
                             train_path=hparams.dataset_train_path,
                             test_path=hparams.dataset_test_path,
                             val_path=hparams.dataset_val_path,
                             max_seq_len=hparams.max_seq_len)

    # -----------------------
    # INIT TF VARS
    # ----------------------
    # context holds chat history
    # utterance holds our responses
    # labels holds the ground truth labels
    context_ph = tf.placeholder(dtype="string",
                                shape=[
                                    hparams.batch_size,
                                ],
                                name='context_seq_in')
    utterance_ph = tf.placeholder(dtype="string",
                                  shape=[
                                      hparams.batch_size,
                                  ],
                                  name='utterance_seq_in')

    # ----------------------
    # EMBEDDING LAYER
    # ----------------------
    # you can preload your own or learn in the network
    # in this case we'll just learn it in the network
    # embedding_layer = tf.Variable(tf.random_uniform([udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0), name='embedding')
    #x = prep(udc_dataset.train, hparams.batch_size)
    #print(type(x))
    #print(len(x))

    # elmo_model = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
    sess = tf.Session()

    K.set_session(sess)
    # Initialize sessions
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    # print('elmo')
    # context = list(udc_dataset['Context'])
    # elmo_text = elmo(context, signature="default", as_dict=True)
    # input_text = Input(shape=(100,), tensor= ,dtype="string")
    #custom_layer = MyLayer(output_dim=1024, trainable=True)(tf.convert_to_tensor(x, dtype='string'))
    # embedding = Lambda(ELMoEmbedding, output_shape=(1024, ))(input_text)

    # elmo_text = elmo(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]
    #embedding_layer = Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(custom_layer)

    print('embedding_layer')
    # ----------------------
    # RESOLVE EMBEDDINGS
    # ----------------------
    # look up embeddings
    context_embedding_custom0 = MyLayer(output_dim=1024,
                                        trainable=True)(tf.slice(
                                            context_ph, [0], [1]))
    utterance_embedding_custom0 = MyLayer(output_dim=1024,
                                          trainable=True)(tf.slice(
                                              utterance_ph, [0], [1]))
    print('context')
    print(tf.shape(context_embedding_custom0))

    for batch_num in range(1, hparams.batch_size):

        context_embedding_custom = MyLayer(output_dim=1024,
                                           trainable=True)(tf.slice(
                                               context_ph, [batch_num], [1]))
        utterance_embedding_custom = MyLayer(output_dim=1024, trainable=True)(
            tf.slice(utterance_ph, [batch_num], [1]))
        context_embedding_custom0 = tf.concat(
            [context_embedding_custom0, context_embedding_custom], axis=0)
        utterance_embedding_custom0 = tf.concat(
            [utterance_embedding_custom0, utterance_embedding_custom], axis=0)

    print('concat')
    print(tf.shape(context_embedding_custom0))

    #context_embedding_summed = tf.reduce_mean(context_embedding_custom0, axis=1)
    #utterance_embedding_summed = tf.reduce_mean(utterance_embedding_custom0, axis=1)
    #print('summed')
    #print(tf.shape(context_embedding_summed))

    #context_embedding = Dense(hparams.embedding_dim, activation='relu',
    #kernel_regularizer=keras.regularizers.l2(0.001))(
    #context_embedding_custom0)
    #utterance_embedding = Dense(hparams.embedding_dim, activation='relu',
    #kernel_regularizer=keras.regularizers.l2(0.001))(
    #utterance_embedding_custom0)
    #print('embedding')
    #print(tf.shape(context_embedding))

    # avg all embeddings (sum works better?)
    # this generates 1 vector per training example
    #context_embedding_summed = tf.reduce_mean(context_embedding, axis=1)
    #utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1)

    # ----------------------
    # OPTIMIZATION PROBLEM
    # ----------------------
    model, _, _, pred_opt = dot_semantic_nn(
        context=context_embedding_custom0,
        utterance=utterance_embedding_custom0,
        tng_mode=hparams.train_mode)

    # allow optiizer to be changed through hyper params
    optimizer = get_optimizer(hparams=hparams, minimize=model)

    # ----------------------
    # TF ADMIN (VAR INIT, SESS)
    # ----------------------
    sess = tf.Session()
    init_vars = tf.global_variables_initializer()
    sess.run(init_vars)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # ----------------------
    # TRAINING ROUTINE
    # ----------------------
    # admin vars
    nb_batches_served = 0
    eval_every_n_batches = hparams.eval_every_n_batches

    train_err = 1000
    precission_at_1 = 0
    precission_at_2 = 0

    # iter for the needed epochs
    print('\n\n', '-' * 100,
          '\n  {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100,
          '\n\n')
    for epoch in range(hparams.nb_epochs):
        print('training epoch:', epoch + 1)
        progbar = Progbar(target=udc_dataset.nb_tng, width=50)
        train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size,
                                                max_epochs=1)

        # mini batches
        for batch_context, batch_utterance in train_gen:

            feed_dict = {
                context_ph: batch_context,
                utterance_ph: batch_utterance
            }
            print("optimizer!")
            # OPT: run one step of optimization
            optimizer.run(session=sess, feed_dict=feed_dict)
            # update loss metrics
            if nb_batches_served % eval_every_n_batches == 0:
                # calculate test error
                train_err = model.eval(session=sess, feed_dict=feed_dict)
                precission_at_1 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=1,
                                                      sess=sess)
                precission_at_2 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=2,
                                                      sess=sess)

                # update prog bar
                exp.add_metric_row({
                    'tng loss': train_err,
                    'P@1': precission_at_1,
                    'P@2': precission_at_2
                })

            nb_batches_served += 1

            progbar.add(n=len(batch_context),
                        values=[('train_err', train_err),
                                ('P@1', precission_at_1),
                                ('P@2', precission_at_2)])

        # ----------------------
        # END OF EPOCH PROCESSING
        # ----------------------
        # calculate the val loss
        print('\nepoch complete...\n')
        check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph,
                        utterance_ph, exp, sess, epoch)

        # save model
        save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)
def check_val_stats(model, pred_opt, data, hparams, X_ph, Y_ph, exp, sess,
                    epoch):
    """
    Runs through validation data to check the overall mean loss
    :param model:
    :param data:
    :param hparams:
    :param X_ph:
    :param Y_ph:
    :param exp:
    :param sess:
    :param epoch:
    :return:
    """
    print('checking val loss...')
    max_val_batches = 100
    val_gen = data.val_generator(batch_size=hparams.batch_size, max_epochs=100)

    overall_err = []
    overall_p_1 = []
    overall_p_2 = []
    progbar = Progbar(target=max_val_batches, width=50)
    for batch_nb in range(max_val_batches):
        batch_X, batch_Y = next(val_gen)
        if len(batch_X) == 0:
            continue

        # aggregate data
        feed_dict = {X_ph: batch_X, Y_ph: batch_Y}
        sims = pred_opt.eval(session=sess, feed_dict=feed_dict)
        file = open("result.txt", "a")
        for ban, paras in enumerate(zip(batch_X, batch_Y)):
            pred_num = [
                i[0] for i in sorted(enumerate(sims[ban]), key=lambda x: x[1])
            ][::-1][0]
            file.write("Question \n")
            file.writelines(paras[0] + "\n")
            file.writelines("\n")
            file.write("Right Answer \n")
            file.writelines(paras[1] + "\n")
            file.writelines("\n")
            file.write("Predicted Answer \n")
            file.writelines(batch_Y[pred_num] + "\n")
            file.writelines("*************************************\n")

        # calculate metrics
        val_err = model.eval(session=sess, feed_dict=feed_dict)
        precission_at_1 = test_precision_at_k(pred_opt,
                                              feed_dict,
                                              k=1,
                                              sess=sess)
        precission_at_2 = test_precision_at_k(pred_opt,
                                              feed_dict,
                                              k=2,
                                              sess=sess)

        # track metrics for means
        overall_err.append(val_err)
        overall_p_1.append(precission_at_1)
        overall_p_2.append(precission_at_2)

        # update exp and progbar
        exp.add_metric_row({
            'val loss': val_err,
            'val P@1': precission_at_1,
            'val P@2': precission_at_2
        })
        progbar.add(n=1)

    # log and save val metrics
    overall_val_mean_err = np.asarray(overall_err).mean()
    overall_p_1_mean = np.asarray(overall_p_1).mean()
    overall_p_2_mean = np.asarray(overall_p_2).mean()
    exp.add_metric_row({
        'epoch_mean_err': overall_val_mean_err,
        'epoch_P@1_mean': overall_p_1_mean,
        'epoch_P@2_mean': overall_p_2_mean,
        'epoch': epoch + 1
    })

    print('\nval loss: ', overall_val_mean_err, 'epoch_P@1_mean: ',
          overall_p_1_mean, 'epoch_P@2_mean: ', overall_p_2_mean)
    print('-' * 100)
def train_main(hparams):
    """
    Main training routine for the dot semantic network bot
    :return:
    """

    # -----------------------
    # INIT EXPERIMENT
    # ----------------------
    exp = Experiment(name=hparams.exp_name,
                     debug=hparams.debug,
                     description=hparams.exp_desc,
                     autosave=False,
                     save_dir=hparams.test_tube_dir)

    exp.add_argparse_meta(hparams)
    exp.save()

    # -----------------------
    # LOAD DATASET
    # ----------------------
    udc_dataset = UDCDataset(vocab_path=hparams.vocab_path,
                             train_path=hparams.dataset_train_path,
                             test_path=hparams.dataset_test_path,
                             val_path=hparams.dataset_val_path,
                             max_seq_len=hparams.max_seq_len)

    # -----------------------
    # INIT TF VARS
    # ----------------------
    # context holds chat history
    # utterance holds our responses
    # labels holds the ground truth labels
    context_ph = tf.placeholder(dtype=tf.int32,
                                shape=[hparams.batch_size, None],
                                name='context_seq_in')
    utterance_ph = tf.placeholder(dtype=tf.int32,
                                  shape=[hparams.batch_size, None],
                                  name='utterance_seq_in')

    # ----------------------
    # EMBEDDING LAYER
    # ----------------------
    # you can preload your own or learn in the network
    # in this case we'll just learn it in the network
    embedding_layer = tf.Variable(tf.random_uniform(
        [udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0),
                                  name='embedding')

    # ----------------------
    # RESOLVE EMBEDDINGS
    # ----------------------
    # look up embeddings
    context_embedding = tf.nn.embedding_lookup(embedding_layer, context_ph)
    utterance_embedding = tf.nn.embedding_lookup(embedding_layer, utterance_ph)

    # avg all embeddings (sum works better?)
    # this generates 1 vector per training example
    context_embedding_summed = tf.reduce_mean(context_embedding, axis=1)
    utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1)

    # ----------------------
    # OPTIMIZATION PROBLEM
    # ----------------------
    model, _, _, pred_opt = dot_semantic_nn(
        context=context_embedding_summed,
        utterance=utterance_embedding_summed,
        tng_mode=hparams.train_mode)

    # allow optiizer to be changed through hyper params
    optimizer = get_optimizer(hparams=hparams, minimize=model)

    # ----------------------
    # TF ADMIN (VAR INIT, SESS)
    # ----------------------
    sess = tf.Session()
    init_vars = tf.global_variables_initializer()
    sess.run(init_vars)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # ----------------------
    # TRAINING ROUTINE
    # ----------------------
    # admin vars
    nb_batches_served = 0
    eval_every_n_batches = hparams.eval_every_n_batches

    train_err = 1000
    precission_at_1 = 0
    precission_at_2 = 0

    # iter for the needed epochs
    print('\n\n', '-' * 100,
          '\n  {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100,
          '\n\n')
    for epoch in range(hparams.nb_epochs):
        print('training epoch:', epoch + 1)
        progbar = Progbar(target=udc_dataset.nb_tng, width=50)
        train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size,
                                                max_epochs=1)

        # mini batches
        for batch_context, batch_utterance in train_gen:

            feed_dict = {
                context_ph: batch_context,
                utterance_ph: batch_utterance
            }

            # OPT: run one step of optimization
            optimizer.run(session=sess, feed_dict=feed_dict)
            # update loss metrics
            if nb_batches_served % eval_every_n_batches == 0:

                # calculate test error
                train_err = model.eval(session=sess, feed_dict=feed_dict)
                precission_at_1 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=1,
                                                      sess=sess)
                precission_at_2 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=2,
                                                      sess=sess)

                # update prog bar
                exp.add_metric_row({
                    'tng loss': train_err,
                    'P@1': precission_at_1,
                    'P@2': precission_at_2
                })

            nb_batches_served += 1

            progbar.add(n=len(batch_context),
                        values=[('train_err', train_err),
                                ('P@1', precission_at_1),
                                ('P@2', precission_at_2)])

        # ----------------------
        # END OF EPOCH PROCESSING
        # ----------------------
        # calculate the val loss
        print('\nepoch complete...\n')
        check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph,
                        utterance_ph, exp, sess, epoch)

        # save model
        save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)

        # save exp data
        exp.save()
예제 #10
0
def train_main(hparams):

    exp = Experiment(name=hparams.exp_name,
                     debug=hparams.debug,
                     description=hparams.exp_desc,
                     autosave=False,
                     save_dir=hparams.test_tube_dir)

    exp.add_argparse_meta(hparams)
    exp.save()

    udc_dataset = UDCDataset(vocab_path=hparams.vocab_path,
                             train_path=hparams.dataset_train_path,
                             test_path=hparams.dataset_test_path,
                             val_path=hparams.dataset_val_path,
                             max_seq_len=hparams.max_seq_len)

    context_ph = tf.placeholder(dtype=tf.int32,
                                shape=[hparams.batch_size, None],
                                name='context_seq_in')
    utterance_ph = tf.placeholder(dtype=tf.int32,
                                  shape=[hparams.batch_size, None],
                                  name='utterance_seq_in')

    embedding_layer = tf.Variable(tf.random_uniform(
        [udc_dataset.vocab_size, hparams.embedding_dim], -1.0, 1.0),
                                  name='embedding')

    context_embedding = tf.nn.embedding_lookup(embedding_layer, context_ph)
    utterance_embedding = tf.nn.embedding_lookup(embedding_layer, utterance_ph)

    context_embedding_summed = tf.reduce_mean(context_embedding, axis=1)
    utterance_embedding_summed = tf.reduce_mean(utterance_embedding, axis=1)

    model, _, _, pred_opt = dot_semantic_nn(
        context=context_embedding_summed,
        utterance=utterance_embedding_summed,
        tng_mode=hparams.train_mode)

    optimizer = get_optimizer(hparams=hparams, minimize=model)

    sess = tf.Session()
    init_vars = tf.global_variables_initializer()
    sess.run(init_vars)

    saver = tf.train.Saver()

    nb_batches_served = 0
    eval_every_n_batches = hparams.eval_every_n_batches

    train_err = 1000
    precission_at_1 = 0
    precission_at_2 = 0

    # iter for the needed epochs
    print('\n\n', '-' * 100,
          '\n  {} TRAINING\n'.format(hparams.exp_name.upper()), '-' * 100,
          '\n\n')
    for epoch in range(hparams.nb_epochs):
        print('training epoch:', epoch + 1)
        progbar = Progbar(target=udc_dataset.nb_tng, width=50)
        train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size,
                                                max_epochs=1)

        for batch_context, batch_utterance in train_gen:

            feed_dict = {
                context_ph: batch_context,
                utterance_ph: batch_utterance
            }

            optimizer.run(session=sess, feed_dict=feed_dict)

            if nb_batches_served % eval_every_n_batches == 0:

                train_err = model.eval(session=sess, feed_dict=feed_dict)
                precission_at_1 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=1,
                                                      sess=sess)
                precission_at_2 = test_precision_at_k(pred_opt,
                                                      feed_dict,
                                                      k=2,
                                                      sess=sess)

                exp.add_metric_row({
                    'tng loss': train_err,
                    'P@1': precission_at_1,
                    'P@2': precission_at_2
                })

            nb_batches_served += 1

            progbar.add(n=len(batch_context),
                        values=[('train_err', train_err),
                                ('P@1', precission_at_1),
                                ('P@2', precission_at_2)])

        print('\nepoch complete...\n')
        check_val_stats(model, pred_opt, udc_dataset, hparams, context_ph,
                        utterance_ph, exp, sess, epoch)

        save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)

        exp.save()
예제 #11
0
def transfer_model(source_df, target_df, test_df, method_flag, fold_num):

	source_labels, source_data = np.split(np.array(source_df),[1],axis=1)
	target_labels, target_data = np.split(np.array(target_df),[1],axis=1)
	test_labels, test_data = np.split(np.array(test_df),[1],axis=1)

	# normalization
	#normalized_source_data = pre.normalize(source_data)
	#normalized_target_data = pre.normalize(target_data)
	#normalized_test_data = pre.normalize(test_data)
	normalized_source_data = source_data
	normalized_target_data = target_data
	normalized_test_data = test_data


	### constuct model for source domain task  ###

	# optimization
	opt = Adam()

	# network setting
	latent = models.latent(normalized_source_data.shape[1])
	sll = models.source_last_layer()
	tll = models.target_last_layer()

	source_inputs = Input(shape=normalized_source_data.shape[1:])
	latent_features = latent(source_inputs)
	source_predictors = sll(latent_features)

	latent.trainable = mc._SORUCE_LATENT_TRAIN
	source_predictors.trainable = True

	source_nn = Model(inputs=[source_inputs], outputs=[source_predictors])
	source_nn.compile(loss=['mean_squared_error'],optimizer=opt)
	#source_nn.summary()

	# training using source domain data
	if method_flag != mc._SCRATCH:
		source_max_loop = int(normalized_source_data.shape[0]/mc._BATCH_SIZE)
		source_progbar = Progbar(target=mc._SOURCE_EPOCH_NUM)
		for epoch in range(mc._SOURCE_EPOCH_NUM):
			shuffle_data, shuffle_labels, _ = pre.paired_shuffle(normalized_source_data,source_labels,1)

			for loop in range(source_max_loop):
				batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE]
				batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE]
				batch_train_labels = np.reshape(batch_train_labels, [len(batch_train_labels)])
				one_hots = np.identity(mc._SOURCE_DIM_NUM)[np.array(batch_train_labels, dtype=np.int32)]
				loss = source_nn.train_on_batch([batch_train_data],[one_hots])

			#source_progbar.add(1, values=[("source loss",loss)])

		# save
		#latent.save('../results/source_latent.h5')
		#sll.save('../results/source_last_layer.h5')

	# compute relation vectors
	if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER:
		target_vectors = np.identity(mc._TARGET_DIM_NUM)[np.array(target_labels, dtype=np.int32)]
		target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]])
	elif method_flag == mc._COUNT_ATDL:
		target_labels, relations = rv.compute_relation_labels(source_nn, normalized_target_data, target_labels, fold_num)
		target_vectors = np.identity(mc._SOURCE_DIM_NUM)[np.array(target_labels, dtype=np.int32)]
		target_vectors = np.reshape(target_vectors, [target_vectors.shape[0], target_vectors.shape[2]])
	else:
		relation_vectors = rv.compute_relation_vectors(source_nn, normalized_target_data, target_labels, fold_num, method_flag)
		target_vectors = np.zeros((len(target_labels),mc._SOURCE_DIM_NUM), dtype=np.float32)
		for i in range(len(target_labels)):
			target_vectors[i] = relation_vectors[int(target_labels[i])]

	### tuning model for target domain task	 ###

	latent.trainable = mc._TARGET_LATENT_TRAIN
	target_inputs = Input(shape=normalized_target_data.shape[1:])
	latent_features = latent(target_inputs)
	if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER:
		predictors = tll(latent_features)
		label_num = mc._TARGET_DIM_NUM
	else:
		predictors= sll(latent_features)
		label_num = mc._SOURCE_DIM_NUM

	target_nn = Model(inputs=[target_inputs], outputs=[predictors])
	target_nn.compile(loss=['mean_squared_error'],optimizer=opt)
	#target_nn.summary()

	# training using target domain data
	target_max_loop = int(normalized_target_data.shape[0]/mc._BATCH_SIZE)
	target_progbar = Progbar(target=mc._TARGET_EPOCH_NUM)
	for epoch in range(mc._TARGET_EPOCH_NUM):

		shuffle_data, shuffle_labels, _ = \
		pre.paired_shuffle(normalized_target_data, target_vectors, label_num)
		for loop in range(target_max_loop):
			batch_train_data = shuffle_data[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE]
			batch_train_labels = shuffle_labels[loop*mc._BATCH_SIZE:(loop+1)*mc._BATCH_SIZE]
			loss = target_nn.train_on_batch([batch_train_data],[batch_train_labels])
		#target_progbar.add(1, values=[("target loss",loss)])


	# compute outputs of test data of target domain
	x = target_nn.predict([normalized_test_data])
	if method_flag == mc._SCRATCH or method_flag == mc._CONV_TRANSFER:
		idx = np.argmax(x, axis=1)
	elif method_flag == mc._COUNT_ATDL:
		idx = np.argmax(x,axis=1)
		for j in range(len(test_labels)):
			for i in range(mc._TARGET_DIM_NUM):
				if test_labels[j] == i:
					test_labels[j] = relations[i]
					break
	else:
		distance, idx = Neighbors(x, relation_vectors, 1)
		idx = idx[:,0]

	backend.clear_session()
	return idx.T, test_labels.T