Beispiel #1
0
def evaluate_retrieval(model, dataset, params, session):
    print('evaluating retrieval')
    print('computing vectors...')

    validation_labels = np.array(
        [[y] for y, _ in dataset.rows('validation', num_epochs=1)])
    training_labels = np.array(
        [[y] for y, _ in dataset.rows('training', num_epochs=1)])
    training_labels = np.concatenate((training_labels, validation_labels), 0)
    test_labels = np.array([[y]
                            for y, _ in dataset.rows('test', num_epochs=1)])

    validation_vectors = m.vectors(
        model, dataset.batches('validation', params.batch_size, num_epochs=1),
        session)
    training_vectors = m.vectors(
        model, dataset.batches('training', params.batch_size, num_epochs=1),
        session)
    training_vectors = np.concatenate((training_vectors, validation_vectors),
                                      0)
    test_vectors = m.vectors(
        model, dataset.batches('test', params.batch_size, num_epochs=1),
        session)

    print('evaluating...')

    recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2]
    results = e.evaluate(training_vectors, test_vectors, training_labels,
                         test_labels, recall_values)
    for i, r in enumerate(recall_values):
        print('precision @ {}: {}'.format(r, results[i]))
Beispiel #2
0
def plot_tsne(model, dataset, params):
    with tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=params.num_cores,
            intra_op_parallelism_threads=params.num_cores,
            gpu_options=tf.GPUOptions(allow_growth=True))) as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(params.model)
        saver.restore(session, ckpt.model_checkpoint_path)

        print('computing vectors...')

        validation_labels = np.array(
            [[y] for y, _ in dataset.rows('validation', num_epochs=1)])
        training_labels = np.array(
            [[y] for y, _ in dataset.rows('training', num_epochs=1)])
        training_labels = np.concatenate((training_labels, validation_labels),
                                         0)
        test_labels = np.array([[y]
                                for y, _ in dataset.rows('test', num_epochs=1)
                                ])

        validation_vectors = m.vectors(
            model,
            dataset.batches('validation', params.batch_size, num_epochs=1),
            session)
        training_vectors = m.vectors(
            model, dataset.batches('training', params.batch_size,
                                   num_epochs=1), session)
        training_vectors = np.concatenate(
            (training_vectors, validation_vectors), 0)
        test_vectors = m.vectors(
            model, dataset.batches('test', params.batch_size, num_epochs=1),
            session)
        print("TEST VECTORS: " + str(test_vectors.shape))
        print(test_vectors)
        print("TEST LABELS: " + str(test_labels.shape))
        print(training_labels[:100])

        #data_X = pd.DataFrame(columns=["test_vectors", "test_labels"])
        #data_X["test_vectors"] = test_vectors
        #data_X["test_labels"] = test_labels

        tsne = TSNE(perplexity=40)
        tsne_obj = tsne.fit_transform(test_vectors)
        tsne_df = pd.DataFrame({
            'X': tsne_obj[1:, 0],
            'Y': tsne_obj[1:, 1],
            'test_labels': test_labels.flatten()[1:]
        })
        #print(tsne_df.head(20))
        sns.scatterplot(x="X",
                        y="Y",
                        hue="test_labels",
                        palette=sns.color_palette("deep", 10),
                        legend='full',
                        data=tsne_df)
        plt.show()
Beispiel #3
0
def evaluate(model, dataset, params):
    with tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=params.num_cores,
            intra_op_parallelism_threads=params.num_cores,
            gpu_options=tf.GPUOptions(allow_growth=True))) as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(params.model)
        saver.restore(session, ckpt.model_checkpoint_path)

        print('computing vectors...')

        validation_labels = np.array(
            [[y] for y, _ in dataset.rows('validation', num_epochs=1)])
        training_labels = np.array(
            [[y] for y, _ in dataset.rows('training', num_epochs=1)])
        training_labels = np.concatenate((training_labels, validation_labels),
                                         0)
        test_labels = np.array([[y]
                                for y, _ in dataset.rows('test', num_epochs=1)
                                ])

        validation_vectors = m.vectors(
            model,
            dataset.batches('validation', params.batch_size, num_epochs=1),
            session)
        training_vectors = m.vectors(
            model, dataset.batches('training', params.batch_size,
                                   num_epochs=1), session)
        training_vectors = np.concatenate(
            (training_vectors, validation_vectors), 0)
        test_vectors = m.vectors(
            model, dataset.batches('test', params.batch_size, num_epochs=1),
            session)

        print('evaluating...')

        print("TRAINING VECTORS")
        print(training_vectors[0])
        print("TRAINING LABELS")
        print(training_labels)

        recall_values = [0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.2]
        results = e.evaluate(training_vectors, test_vectors, training_labels,
                             test_labels, recall_values)
        for i, r in enumerate(recall_values):
            print('precision @ {}: {}'.format(r, results[i]))
Beispiel #4
0
def vectors(model, dataset, params):
    with tf.Session(config=tf.ConfigProto(
        inter_op_parallelism_threads=params.num_cores,
        intra_op_parallelism_threads=params.num_cores,
        gpu_options=tf.GPUOptions(allow_growth=True)
    )) as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())
        #ckpt = tf.train.get_checkpoint_state(params.model)
        #saver.restore(session, ckpt.model_checkpoint_path)

        for collection in dataset.collections:
            save_vectors(
                m.vectors(
                    model,
                    dataset.batches(
                        collection,
                        params.batch_size,
                        num_epochs=1
                    ),
                    session
                ),
                collection,
                params.model
            )
Beispiel #5
0
def train(model, dataset, params):
    log_dir = os.path.join(params.model, 'logs')
    model_dir = os.path.join(params.model, 'model')

    with tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=params.num_cores,
            intra_op_parallelism_threads=params.num_cores,
            gpu_options=tf.GPUOptions(allow_growth=True))) as session:
        avg_loss = tf.placeholder(tf.float32, [], 'loss_ph')
        tf.summary.scalar('loss', avg_loss)

        validation = tf.placeholder(tf.float32, [], 'validation_ph')
        tf.summary.scalar('validation', validation)

        summary_writer = tf.summary.FileWriter(log_dir, session.graph)
        summaries = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        losses = []

        # This currently streams from disk. You set num_epochs=1 and
        # wrap this call with something like itertools.cycle to keep
        # this data in memory.
        training_data = dataset.batches('training', params.batch_size)

        best_val = 0.0
        training_labels = np.array(
            [[y] for y, _ in dataset.rows('training', num_epochs=1)])
        validation_labels = np.array(
            [[y] for y, _ in dataset.rows('validation', num_epochs=1)])

        for step in range(params.num_steps + 1):
            _, x, seq_lengths = next(training_data)

            _, loss = session.run([model.opt, model.opt_loss],
                                  feed_dict={
                                      model.x: x,
                                      model.seq_lengths: seq_lengths
                                  })
            losses.append(loss)

            if step % params.log_every == 0:
                print('{}: {:.6f}'.format(step, loss))

            if step and (step % params.save_every) == 0:
                validation_vectors = m.vectors(
                    model,
                    dataset.batches('validation',
                                    params.batch_size,
                                    num_epochs=1), session)
                training_vectors = m.vectors(
                    model,
                    dataset.batches('training',
                                    params.batch_size,
                                    num_epochs=1), session)
                val = e.evaluate(training_vectors, validation_vectors,
                                 training_labels, validation_labels)[0]
                print('validation: {:.3f} (best: {:.3f})'.format(
                    val, best_val or 0.0))

                if val > best_val:
                    best_val = val
                    print('saving: {}'.format(model_dir))
                    saver.save(session, model_dir, global_step=step)

                summary, = session.run(
                    [summaries],
                    feed_dict={
                        model.x: x,
                        model.seq_lengths: seq_lengths,
                        validation: val,
                        avg_loss: np.average(losses)
                    })
                summary_writer.add_summary(summary, step)
                summary_writer.flush()
                losses = []
Beispiel #6
0
def train(model, dataset, params):
    log_dir = os.path.join(params.model, 'logs')
    model_dir = os.path.join(params.model, 'model')

    with tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=params.num_cores,
            intra_op_parallelism_threads=params.num_cores,
            gpu_options=tf.GPUOptions(allow_growth=True))) as session:
        avg_d_loss = tf.placeholder(tf.float32, [], 'd_loss_ph')
        tf.summary.scalar('d_loss', avg_d_loss)
        avg_g_loss = tf.placeholder(tf.float32, [], 'g_loss_ph')
        tf.summary.scalar('g_loss', avg_g_loss)
        validation = tf.placeholder(tf.float32, [], 'validation_ph')
        tf.summary.scalar('validation', validation)
        avg_al_loss = tf.placeholder(tf.float32, [], 'al_loss_ph')
        tf.summary.scalar('al_loss', avg_al_loss)

        summary_writer = tf.summary.FileWriter(log_dir, session.graph)
        summaries = tf.summary.merge_all()
        saver = tf.train.Saver(tf.global_variables())

        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        d_losses = []
        g_losses = []
        al_g_losses = []
        df_disc_losses = pd.DataFrame()
        df_gen_losses = pd.DataFrame()
        df_losses = pd.DataFrame()
        df_val_scores = pd.DataFrame()
        df_al_losses = pd.DataFrame()

        training_data = dataset.batches('training', params.batch_size)

        best_val = 0.0
        training_labels = np.array(
            [[y] for y, _ in dataset.rows('training', num_epochs=1)])
        validation_labels = np.array(
            [[y] for y, _ in dataset.rows('validation', num_epochs=1)])

        for step in range(params.num_steps + 1):
            _, x = next(training_data)

            ###### update discriminator
            d_loss_step = update_disc(model, x, model.D_solver, model.D_loss,
                                      params, session)
            d_losses.append(d_loss_step)
            df_disc_losses = df_disc_losses.append(
                {
                    'step': step,
                    'disc_loss': d_loss_step
                }, ignore_index=True)

            ###### update generator
            g_loss_list = []
            for i in range(0, params.num_gen):
                g_loss_i, al_loss = update_gen(model, x, model.G_solver[i],
                                               model.Gen_loss[i],
                                               model.Al_solver,
                                               model.Al_gen_loss, params,
                                               session)
                g_loss_list.append(g_loss_i)

            al_g_losses.append(al_loss)
            df_al_losses = df_al_losses.append(
                {
                    'step': step,
                    'alpha_gen_loss': al_loss
                }, ignore_index=True)

            df_gen_losses = df_gen_losses.append(
                {
                    'step': step,
                    'g_0_loss': g_loss_list[0],
                    'g_1_loss': g_loss_list[1],
                    'g_2_loss': g_loss_list[2],
                    'g_3_loss': g_loss_list[3],
                    'g_4_loss': g_loss_list[4]
                },
                ignore_index=True)
            g_losses.append(g_loss_list)

            ###### print discriminator and generators losses
            if step % params.log_every == 0:
                text = '{}: {:.6f} \t'
                g_losses_print = g_losses
                print(text.format(
                    step,
                    d_losses[-1],
                ), g_losses_print[-1], al_g_losses[-1])

            ###### print best validation scores
            if step and (step % params.save_every) == 0:
                validation_vectors = m.vectors(
                    model,
                    dataset.batches('validation',
                                    params.batch_size,
                                    num_epochs=1), session)
                training_vectors = m.vectors(
                    model,
                    dataset.batches('training',
                                    params.batch_size,
                                    num_epochs=1), session)
                val = e.evaluate(training_vectors, validation_vectors,
                                 training_labels, validation_labels)[0]
                print('validation: {:.3f} (best: {:.3f})'.format(
                    val, best_val or 0.0))
                df_val_scores = df_val_scores.append(
                    {
                        'step': step,
                        'val_score': val,
                        'best_val_score': best_val
                    },
                    ignore_index=True)

                if val > best_val:
                    best_val = val
                    print('saving: {}'.format(model_dir))
                    saver.save(session, model_dir, global_step=step)

                summary, = session.run(
                    [summaries],
                    feed_dict={
                        model.x:
                        x,
                        model.z:
                        np.random.normal(0, 1,
                                         (params.batch_size, params.z_dim)),
                        validation:
                        val,
                        avg_d_loss:
                        np.average(d_losses),
                        avg_al_loss:
                        np.average(al_g_losses),
                        avg_g_loss:
                        np.average(g_losses)
                    })
                summary_writer.add_summary(summary, step)
                summary_writer.flush()
                d_losses = []
                g_losses = []

        ###### Store and plot discriminator and generator losses
        df_losses = df_disc_losses.join(df_gen_losses.set_index('step'),
                                        on='step',
                                        how='inner')
        df_losses = df_losses.join(df_al_losses.set_index('step'),
                                   on='step',
                                   how='inner')
        df_losses.to_csv(log_dir + '_gen_disc_losses.csv', index=False)
        df_val_scores.to_csv(log_dir + '_val_scores.csv', index=False)

        img_loss = df_losses.plot(
            x='step',
            y=['disc_loss', 'alpha_gen_loss'],
            kind='line',
            title='Trainings Losses: Discriminator & Generator',
            xlabel="Step",
            ylabel="Loss").get_figure()
        img_loss.savefig(log_dir + '_disc_gen_loss.png')
Beispiel #7
0
def evaluate(model, dataset, params):
    log_dir = os.path.join(params.model, 'logs')
    with tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=params.num_cores,
            intra_op_parallelism_threads=params.num_cores,
            gpu_options=tf.GPUOptions(allow_growth=True))) as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(params.model)
        saver.restore(session, ckpt.model_checkpoint_path)

        print('computing vectors...')

        recall_values = [
            0.0001, 0.0002, 0.0005, 0.002, 0.01, 0.05, 0.1, 0.2, 0.5, 1.0
        ]

        validation_labels = np.array(
            [[y] for y, _ in dataset.rows('validation', num_epochs=1)])
        training_labels = np.array(
            [[y] for y, _ in dataset.rows('training', num_epochs=1)])
        training_labels = np.concatenate((training_labels, validation_labels),
                                         0)
        test_labels = np.array([[y]
                                for y, _ in dataset.rows('test', num_epochs=1)
                                ])

        validation_vectors = m.vectors(
            model,
            dataset.batches('validation', params.batch_size, num_epochs=1),
            session)
        training_vectors = m.vectors(
            model, dataset.batches('training', params.batch_size,
                                   num_epochs=1), session)
        training_vectors = np.concatenate(
            (training_vectors, validation_vectors), 0)
        test_vectors = m.vectors(
            model, dataset.batches('test', params.batch_size, num_epochs=1),
            session)

        print('evaluating...')

        results = e.evaluate(training_vectors, test_vectors, training_labels,
                             test_labels, recall_values)

        df_precision_recall = pd.DataFrame(list(zip(recall_values, results)),
                                           columns=['recall', 'precision'])

        for i, r in enumerate(recall_values):
            print('precision @ {}: {}'.format(r, results[i]))

        ###### Plot precision-recall values
        df_precision_recall.to_csv(log_dir + '_precision_recall_values.csv',
                                   index=False)

        img_precision_recall = df_precision_recall.plot(
            x='recall',
            y='precision',
            kind='line',
            title='Precision vs Recall',
            xlabel="Recall",
            ylabel="Precision").get_figure()
        img_precision_recall.savefig(log_dir + '_precision_recall.png')