Esempio n. 1
0
def run():
    if len(sys.argv) < 3:
        print("** Usage: python3 " + sys.argv[0] +
              " <<Model Directory>> <<Test Set>>")
        sys.exit(1)

    np.random.seed(42)
    model_dir = sys.argv[1]
    config = Config.load(
        ['./default.conf',
         os.path.join(model_dir, 'model.conf')])
    model = create_model(config)
    test_data = load_data(sys.argv[2], config.dictionary, config.grammar,
                          config.max_length)
    print("unknown", unknown_tokens)

    with tf.Graph().as_default():
        tf.set_random_seed(1234)
        with tf.device('/cpu:0'):
            model.build()

            test_eval = Seq2SeqEvaluator(model,
                                         config.grammar,
                                         test_data,
                                         'test',
                                         config.reverse_dictionary,
                                         beam_size=config.beam_size,
                                         batch_size=config.batch_size)
            loader = tf.train.Saver()

            with tf.Session() as sess:
                loader.restore(sess, os.path.join(model_dir, 'best'))
                test_eval.eval(sess, save_to_file=True)
Esempio n. 2
0
def predict_whole_sequences(model, X, config_step, until=40):
    n = X.shape[0]
    true_steps = X.shape[1]
    d = X.shape[2]
    final_step = until + 1 if config_step else until
    XX = np.zeros((n, final_step, d))
    XX[:, :true_steps, :] = X
    for j in range(true_steps, final_step):
        pred = model.predict(XX[:, :j, :])
        XX[:, j, -1] = pred[:, -1, 0]
        """if repeat_config:
            XX[:, j, :-1] = XX[:, j-1, :-1]"""
    return pred[:, (true_steps - 1):, 0]


configs, learning_curves = load_data(source_dir='./data')
until = 40

for n_steps in [-1, 5, 10, 20]:
    randomize_length = n_steps == -1

    config_step = True if randomize_length or n_steps == 10 else False

    n_folds = 3
    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=42)
    fold = 0

    fold_test_errors = []

    y_e40 = []
    y_hat_e40 = {5: [], 10: [], 20: [], 30: []}
Esempio n. 3
0
def run():
    if len(sys.argv) < 5:
        print("** Usage: python " + sys.argv[0] +
              " <<Input Vocab>> <<Word Embeddings>> <<Train Set> <<Test Set>>")
        sys.exit(1)

    np.random.seed(42)

    words, reverse = load_dictionary(sys.argv[1], 'tt')
    print("%d words in dictionary" % (len(words), ))
    embeddings_matrix = load_embeddings(sys.argv[2], words, embed_size=300)
    max_length = 60

    grammar = ThingtalkGrammar()

    train_data = load_data(sys.argv[3], words, grammar.dictionary, reverse,
                           grammar.tokens, max_length)
    test_data = load_data(sys.argv[4], words, grammar.dictionary, reverse,
                          grammar.tokens, max_length)
    print("unknown", unknown_tokens)

    # Tell TensorFlow that the model will be built into the default Graph.
    # (not required but good practice)
    with tf.Graph().as_default():
        # Create a session for running Ops in the Graph
        with tf.Session() as sess:
            input_embed_matrix = tf.constant(embeddings_matrix)
            train_inputs = tf.nn.embedding_lookup([input_embed_matrix],
                                                  np.array(train_data[0]))
            train_encoded = tf.reduce_sum(train_inputs, axis=1)
            #print train_encoded.eval()
            train_norm = tf.sqrt(
                tf.reduce_sum(train_encoded * train_encoded, axis=1))

            test_inputs = tf.nn.embedding_lookup([input_embed_matrix],
                                                 np.array(test_data[0]))
            test_encoded = tf.reduce_sum(test_inputs, axis=1)
            test_norm = tf.sqrt(
                tf.reduce_sum(test_encoded * test_encoded, axis=1))
            #print test_encoded.eval()

            #print (train_encoded - test_encoded).eval()

            distances = tf.matmul(test_encoded, tf.transpose(train_encoded))
            distances /= tf.reshape(train_norm, (1, -1))
            distances /= tf.reshape(test_norm, (-1, 1))
            #print distances.eval()
            indices = tf.argmax(distances, axis=1)
            #print indices.eval()

            ok_0 = 0
            ok_ch = 0
            ok_fn = 0
            ok_full = 0
            correct_programs = set()
            gold_programs = set()
            for gold in test_data[2]:
                try:
                    gold = gold[:list(gold).index(grammar.end)]
                except ValueError:
                    pass
                gold_programs.add(tuple(gold))

            indices = indices.eval(session=sess)
            print(indices.shape)

            for test_i, train_i in enumerate(indices):
                gold = list(test_data[2][test_i])
                decoded = list(train_data[2][train_i])
                try:
                    decoded = decoded[:decoded.index(grammar.end)]
                except ValueError:
                    pass
                decoded_tuple = tuple(decoded)

                try:
                    gold = gold[:gold.index(grammar.end)]
                except ValueError:
                    pass

                #print "GOLD:", ' '.join(grammar.tokens[l] for l in gold)
                #print "DECODED:", ' '.join(grammar.tokens[l] for l in decoded)

                if len(decoded) > 0 and len(
                        gold) > 0 and decoded[0] == gold[0]:
                    ok_0 += 1

                def get_functions(seq):
                    return set([
                        x for x in [grammar.tokens[x] for x in seq] if
                        x.startswith('tt:') and not x.startswith('tt:param.')
                    ])

                gold_functions = get_functions(gold)
                decoded_functions = get_functions(decoded)
                gold_channels = set(
                    [x[x.index('.') + 1:] for x in gold_functions])
                decoded_channels = set(
                    [x[x.index('.') + 1:] for x in decoded_functions])
                if len(decoded) > 0 and len(gold) > 0 and decoded[0] == gold[
                        0] and gold_functions == decoded_functions:
                    ok_fn += 1
                if gold_channels == decoded_channels:
                    ok_ch += 1
                if grammar.compare(gold, decoded):
                    correct_programs.add(decoded_tuple)
                    ok_full += 1

        print("ok 0:", float(ok_0) / len(test_data[0]))
        print("ok channel:", float(ok_ch) / len(test_data[0]))
        print("ok function:", float(ok_fn) / len(test_data[0]))
        print("ok full:", float(ok_full) / len(test_data[0]))
        print("recall:", float(len(correct_programs)) / len(gold_programs))
Esempio n. 4
0
def run():
    if len(sys.argv) < 6:
        print(
            "** Usage: python " + sys.argv[0] +
            " <<Benchmark: tt/geo>> <<Model: bagofwords/seq2seq>> <<Input Vocab>> <<Word Embeddings>> <<Model Directory>> <<Train Set>> <<PCA Set>>"
        )
        sys.exit(1)

    np.random.seed(42)
    benchmark = sys.argv[1]
    config, words, reverse, model = initialize(benchmark=benchmark,
                                               model_type=sys.argv[2],
                                               input_words=sys.argv[3],
                                               embedding_file=sys.argv[4])
    model_dir = sys.argv[5]

    train_data = load_data(sys.argv[6], words, config.grammar.dictionary,
                           reverse, config.grammar.tokens, config.max_length)
    pca_data = load_data(sys.argv[7], words, config.grammar.dictionary,
                         reverse, config.grammar.tokens, config.max_length)
    config.apply_cmdline(sys.argv[8:])

    print("unknown", unknown_tokens)

    # Tell TensorFlow that the model will be built into the default Graph.
    # (not required but good practice)
    with tf.Graph().as_default():
        # Build the model and add the variable initializer Op
        model.capture_final_encoder_state = True
        model.build()
        loader = tf.train.Saver()

        # Create a session for running Ops in the Graph
        with tf.Session() as sess:
            loader.restore(sess, os.path.join(model_dir, 'best'))

            inputs, input_lengths, _, _ = train_data

            final_encoder_state = None
            final_encoder_size = None
            if config.rnn_cell_type == 'lstm':
                final_encoder_state = tf.concat([
                    model.final_encoder_state[-1].c,
                    model.final_encoder_state[-1].h
                ], 1)
                final_encoder_size = 2 * config.hidden_size
            else:
                final_encoder_state = model.final_encoder_state[-1]
                final_encoder_size = config.hidden_size

            final_states_arrays = []
            # capture all the final encoder states
            for input_batch, input_length_batch in get_minibatches(
                [inputs, input_lengths], config.batch_size):
                feed_dict = model.create_feed_dict(input_batch,
                                                   input_length_batch)
                state_array = sess.run(final_encoder_state,
                                       feed_dict=feed_dict)
                #print state_array.shape
                final_states_arrays.append(state_array)

            X = np.concatenate(final_states_arrays, axis=0)
            assert X.shape == (len(inputs), final_encoder_size)
            X = tf.constant(X)

            mean = tf.reduce_mean(X, axis=0)
            centered_X = X - mean
            S, U, V = tf.svd(centered_X)

            # take only the top 2 components
            V = V[:2]
            V_array, mean_array = sess.run([V, mean])

            inputs, input_lengths, _, _ = pca_data

            X = final_encoder_state
            centered_X = X - tf.constant(mean_array)
            transformed_X = tf.matmul(centered_X, tf.constant(V_array.T))

            feed_dict = model.create_feed_dict(inputs, input_lengths)
            X_pca = sess.run(transformed_X, feed_dict=feed_dict)

            sentences = reconstruct_sentences(inputs, words['<<EOS>>'],
                                              reverse)
            show_pca(X_pca, sentences)
Esempio n. 5
0
    layer = tf.matmul(layer, w1)
    layer = tf.nn.relu(layer)
    layer = tf.matmul(layer, w2)
    layer = tf.nn.softmax(layer)
    return layer


image_width = 28
class_num = 10

fashion_mnist = keras.datasets.fashion_mnist
class_names = [
    'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt',
    'Sneaker', 'Bag', 'Ankle boot'
]
(train_images, train_labels), (test_images, test_labels) = loader.load_data()
train_images = (train_images / 255.0 - 0.5) * 2
test_images = (test_images / 255.0 - 0.5) * 2

dataset_size = len(train_images)

batch_size = 10
check_interval = 1000
steps = dataset_size // batch_size
steps = steps if dataset_size % batch_size == 0 else steps + 1

# t
train_x = tf.placeholder(tf.float32, shape=(None, 10), name='x-input')
# x
train_y = tf.placeholder(tf.float32, shape=(None, 28, 28), name='y-input')
Esempio n. 6
0
def task3(return_dict,
          config,
          randomize_length,
          n_steps,
          epochs,
          log_dir="logs"):
    from sklearn.model_selection import KFold
    from keras.models import clone_model

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    from util.loader import load_data
    from util.time_series_data import get_time_series, reshape_X, reshape_y
    from models.lstm import lstm
    from util.common import loss
    from util.tensorboard import tensorboard_log_values
    from preprocessing.standard_scaler import StandardScaler
    from preprocessing.augmentation import add_nontraining_time_series, add_perturbed_time_series

    n_steps_valid = n_steps
    n_steps_test = n_steps

    use_configs = True
    config_step = config["config_step"]
    repeat_config = config["repeat_config"]
    scale_configs = True

    validation_split = 0.3
    evaluate_each = 1

    lr = config["lr"]
    batchsize = config["batchsize"]

    lr_decay = config["lr_decay"]
    decay = 0 if not lr_decay else config["decay"]

    regularize = config["weight_decay"]
    alpha = 0 if not regularize else config["alpha"]

    remove_nonlearning = False

    augment = config["augment"]
    add_perturbed = 0 if not augment else config["add_perturbed"]
    add_nontraining = 0 if not augment else config["add_nontraining"]

    # title of current run
    run_name = current_time_str()
    if not randomize_length:
        run_name += "_%is" % n_steps
    else:
        run_name += "_rnd"
    run_name += "_lr%f" % lr
    run_name += "_bs%i" % batchsize
    if lr_decay:
        run_name += "_dc%f" % decay
    if regularize:
        run_name += "_a%f" % alpha
    run_name += "_cstp" if config_step else ""
    run_name += "_rptcnfg" if repeat_config else ""
    if augment:
        run_name += "_augm_%i_%i" % (add_perturbed, add_nontraining)
    print(run_name)

    # functions
    def plot_predicted_curves(model, X_test, test_indices, filename=None):
        plt.figure(figsize=(20, 10))
        n_plots = 20
        pred = predict_whole_sequences(model,
                                       X_test[:n_plots, :n_steps_test, :])
        for i in range(n_plots):
            plt.subplot(4, 5, i + 1)
            plt.plot(learning_curves[test_indices[i]], "g")
            if config_step:
                plt.plot(range(40), pred[i, :, :], "r")
            else:
                plt.plot(range(1, 40), pred[i, :, :], "r")
        if filename != None:
            plt.savefig(filename)
            plt.close()

    def predict_whole_sequences(model, X):
        n = X.shape[0]
        true_steps = X.shape[1]
        d = X.shape[2]
        final_step = 41 if config_step else 40
        XX = np.zeros((n, final_step, d))
        XX[:, :true_steps, :] = X
        for j in range(true_steps, final_step):
            pred = model.predict(XX[:, :j, :])
            XX[:, j, -1] = pred[:, -1, 0]
            if repeat_config:
                XX[:, j, :-1] = XX[:, j - 1, :-1]
        return pred

    def evaluate_step40_loss(model, X_test, test_indices, n_steps_test):
        if config_step:
            n_steps_test += 1
        final_y = [learning_curves[index][-1] for index in test_indices]
        pred = predict_whole_sequences(model, X_test[:, :n_steps_test, :])
        final_y_hat = pred[:, -1, 0]
        return loss(np.array(final_y), final_y_hat)

    # file name for plots
    tmp_file_name = "tmp/model_%s" % run_name

    if config_step:
        n_steps_train = n_steps
        n_steps_valid += 1
        n_steps_test += 1
    else:
        n_steps_train = n_steps - 1

    # read data
    configs, learning_curves = load_data(source_dir='./data')

    if remove_nonlearning:
        keep_indices = [
            i for i in range(len(learning_curves))
            if learning_curves[i][-1] < 0.8
        ]
        configs = [configs[i] for i in keep_indices]
        learning_curves = [learning_curves[i] for i in keep_indices]

    n_params = len(configs[0]) if use_configs else 0
    d = n_params + 1

    # 3 fold CV:
    n_folds = 3
    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=42)
    fold = 0

    fold_test_errors = []

    for training_indices, test_indices in k_fold.split(learning_curves):
        fold = fold + 1

        # split into training and validation
        training_indices = np.random.permutation(training_indices)
        valid_split_index = int(validation_split * len(training_indices))
        validation_indices = training_indices[:valid_split_index]
        training_indices = training_indices[valid_split_index:]

        # prepare training data:
        configs_train = [configs[index] for index in training_indices]
        learning_curves_train = [
            learning_curves[index] for index in training_indices
        ]
        if scale_configs:
            scaler = StandardScaler()
            configs_train = scaler.fit_transform(configs_train)
        if add_perturbed > 0:
            configs_train, learning_curves_train = add_perturbed_time_series(
                configs_train, learning_curves_train, add_perturbed)
        if add_nontraining > 0:
            configs_train, learning_curves_train = add_nontraining_time_series(
                configs_train, learning_curves_train, add_nontraining)
        n_train = len(configs_train)
        X_train = get_time_series(configs_train,
                                  learning_curves_train,
                                  use_configs=use_configs,
                                  repeat_config=repeat_config,
                                  config_step=config_step)
        X_train = reshape_X(X_train)
        Y_train = learning_curves_train

        # prepare validation data:
        configs_valid = [configs[index] for index in validation_indices]
        learning_curves_valid = [
            learning_curves[index] for index in validation_indices
        ]
        if scale_configs:
            configs_valid = scaler.transform(configs_valid)
        X_valid = get_time_series(configs_valid,
                                  learning_curves_valid,
                                  use_configs=use_configs,
                                  repeat_config=repeat_config,
                                  config_step=config_step)
        X_valid = reshape_X(X_valid)

        # prepare test data:
        configs_test = [configs[index] for index in test_indices]
        learning_curves_test = [
            learning_curves[index] for index in test_indices
        ]
        if scale_configs:
            configs_test = scaler.transform(configs_test)
        X_test = get_time_series(configs_test,
                                 learning_curves_test,
                                 use_configs=use_configs,
                                 repeat_config=repeat_config,
                                 config_step=config_step)
        X_test = reshape_X(X_test)

        n_valid = len(validation_indices)
        n_test = len(test_indices)

        Y_train = reshape_y(Y_train)
        Y_valid = [
            learning_curves_valid[i][1:(n_steps_valid + 1)]
            for i in range(n_valid)
        ]
        Y_test = [
            learning_curves_test[i][1:(n_steps_test + 1)]
            for i in range(n_test)
        ]

        n_batches = int(np.ceil(n_train / batchsize))

        model = lstm(d,
                     lr,
                     decay=decay,
                     many2many=True,
                     regularize=regularize,
                     alpha=alpha,
                     batchsize=None)

        best_valid_e40 = {}
        for k in [5, 10, 20, 30]:
            best_valid_e40[k] = float("inf")
        best_mean_valid_e40 = float("inf")
        best_valid_e40_epoch = -1

        for epoch in range(epochs):
            print("epoch = %i" % epoch)

            # random permutation of training data
            permutation = np.random.permutation(range(n_train))
            X_train_permuted = X_train[permutation, :, :]
            Y_train_permuted = Y_train[permutation, :, :]

            training_losses = []
            for batch in range(n_batches):
                if randomize_length:
                    n_steps_train = int(np.random.uniform(5, 21))
                    if config_step:
                        n_steps_train += 1
                batch_begin = batch * batchsize
                batch_end = batch_begin + batchsize
                x = X_train_permuted[batch_begin:batch_end, :n_steps_train, :]
                y = Y_train_permuted[batch_begin:batch_end,
                                     1:(n_steps_train + 1)]
                y_hat = model.predict(x)
                model.train_on_batch(x, y)
                training_losses.append(loss(y, y_hat))
            training_loss = np.mean(training_losses)
            print("training loss =   %f" % training_loss)

            # validation
            if (epoch + 1) % 1 == 0:
                y_hat = model.predict(X_valid[:, :n_steps_valid, :])[:, :, 0]
                validation_loss = np.mean(loss(Y_valid, y_hat))
                print("validation loss = %f" % validation_loss)

            if (epoch + 1) % evaluate_each == 0:
                print(lr, decay, batchsize)
                print("best[:5]  = %f @ %i" %
                      (best_valid_e40[5], best_valid_e40_epoch))
                print("best[:10] = %f @ %i" %
                      (best_valid_e40[10], best_valid_e40_epoch))
                print("best[:20] = %f @ %i" %
                      (best_valid_e40[20], best_valid_e40_epoch))
                print("best[:30] = %f @ %i" %
                      (best_valid_e40[30], best_valid_e40_epoch))

                valid_e40_5 = evaluate_step40_loss(model, X_valid,
                                                   validation_indices, 5)
                print("validation MSE[:5]@40  = %f" % valid_e40_5)
                valid_e40_10 = evaluate_step40_loss(model, X_valid,
                                                    validation_indices, 10)
                print("validation MSE[:10]@40 = %f" % valid_e40_10)
                valid_e40_20 = evaluate_step40_loss(model, X_valid,
                                                    validation_indices, 20)
                print("validation MSE[:20]@40 = %f" % valid_e40_20)
                valid_e40_30 = evaluate_step40_loss(model, X_valid,
                                                    validation_indices, 30)
                print("validation MSE[:30]@40 = %f" % valid_e40_30)

                mean_valid_e40 = np.mean(
                    [valid_e40_5, valid_e40_10, valid_e40_20, valid_e40_30])

                prefix = "losses_f%i/" % fold
                tensorboard_log_values(
                    log_dir, run_name, epoch, {
                        prefix + "training": training_loss,
                        prefix + "validation": validation_loss,
                        prefix + "validation_E40_5": valid_e40_5,
                        prefix + "validation_E40_10": valid_e40_10,
                        prefix + "validation_E40_20": valid_e40_20,
                        prefix + "validation_E40_30": valid_e40_30,
                        prefix + "validation_E40_mean": mean_valid_e40
                    })

                if mean_valid_e40 < best_mean_valid_e40:
                    print("* new best model *")

                    best_valid_e40_epoch = epoch
                    best_valid_e40[5] = valid_e40_5
                    best_valid_e40[10] = valid_e40_10
                    best_valid_e40[20] = valid_e40_20
                    best_valid_e40[30] = valid_e40_30
                    best_mean_valid_e40 = mean_valid_e40

                    best_model = clone_model(model)
                    best_model.set_weights(model.get_weights())
            """if (epoch + 1) % 10 == 0:
                filename = tmp_file_name + "_f%i_e%i.png" % (fold, epoch)
                print(filename)
                plot_predicted_curves(model, X_test, test_indices, filename = filename)"""

        # evaluation on test data
        test_e40 = {}
        test_e40[5] = evaluate_step40_loss(best_model, X_test, test_indices, 5)
        test_e40[10] = evaluate_step40_loss(best_model, X_test, test_indices,
                                            10)
        test_e40[20] = evaluate_step40_loss(best_model, X_test, test_indices,
                                            20)
        test_e40[30] = evaluate_step40_loss(best_model, X_test, test_indices,
                                            30)
        fold_test_errors.append(test_e40)
        print(test_e40)

        #filename = tmp_file_name + "_f%i_best.png" % fold
        #print(filename)
        #plot_predicted_curves(best_model, X_test, test_indices, filename = filename)

    means_e40 = {}
    for steps in [5, 10, 20, 30]:
        print("MSE@40 for %i input steps:" % steps)
        e40_folds = [fold_res[steps] for fold_res in fold_test_errors]
        print(e40_folds)
        mean_e40 = np.mean(e40_folds)
        print("mean = %f" % mean_e40)
        means_e40[steps] = mean_e40
    return_dict["results"] = means_e40
Esempio n. 7
0
def main(estimators, n_folds=3):
    # read data and transform it to numpy arrays
    configs, learning_curves = load_data(source_dir='../data')
    configs = np.array(list(map(lambda x: list(x.values()), configs)))
    learning_curves = np.array(learning_curves)

    # initialise CV
    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=1)

    # store predicted and true y
    y_y_hat = np.zeros((len(estimators), 2, learning_curves.shape[0]))

    performances = np.zeros((len(estimators), 6))

    for m_idx, model_desc in enumerate(estimators):
        current_fold = 0

        print(model_desc)
        for preprocessing in [False, True]:
            # CV folds
            for train_indices, test_indices in k_fold.split(configs):
                # split into training and test data
                train_configs = configs[train_indices]
                train_curves = learning_curves[train_indices]
                test_configs = configs[test_indices]
                test_curves = learning_curves[test_indices]

                # preprocessing
                if preprocessing:
                    scaler = StandardScaler()
                    train_configs = scaler.fit_transform(train_configs)
                    test_configs = scaler.transform(test_configs)

                # train model
                model = eval(model_desc)

                model.fit(train_configs, train_curves[:, -1])

                # evaluate model
                y = test_curves[:, -1]
                y_hat = model.predict(test_configs)
                test_loss = loss(y_hat, y)
                performances[m_idx, current_fold] = test_loss
                print("fold test loss = %f" % test_loss)

                # store prediction
                if preprocessing:
                    y_y_hat[m_idx, 0, test_indices] = y
                    y_y_hat[m_idx, 1, test_indices] = y_hat
                current_fold += 1

        print("mean CV loss w/o prep = {0:.5f}, w prep = {1:.5f}".format(
            np.mean(performances[m_idx, :3]), np.mean(performances[m_idx,
                                                                   3:6])))

    data = {
        'no prep': np.mean(performances[:, :3], axis=1),
        'prep': np.mean(performances[:, 3:6], axis=1)
    }
    frame = pd.DataFrame(data, index=estimators)
    print(frame)

    return performances, y_y_hat
Esempio n. 8
0
def run():
    if len(sys.argv) < 3:
        print("** Usage: python3 " + sys.argv[0] +
              " <<Model Directory>> <<Test Set>>")
        sys.exit(1)

    np.random.seed(42)
    model_dir = sys.argv[1]
    config = Config.load(
        ['./default.conf',
         os.path.join(model_dir, 'model.conf')])
    model = create_model(config)

    test_data = load_data(sys.argv[2], config.dictionary, config.grammar,
                          config.max_length)

    with tf.Graph().as_default():
        tf.set_random_seed(1234)
        model.build()
        loader = tf.train.Saver()

        inputs, input_lengths, parses, labels, label_lengths = test_data

        final_encoder_state = tf.concat(nest.flatten(
            model.final_encoder_state),
                                        axis=1)
        final_encoder_size = final_encoder_state.get_shape()[1]

        final_states = OrderedDict()
        with tf.Session() as sess:
            loader.restore(sess, os.path.join(model_dir, 'best'))

            # capture all the final encoder states
            for input_batch, input_length_batch, parse_batch, label_batch, label_length_batch in get_minibatches(
                [inputs, input_lengths, parses, labels, label_lengths],
                    config.batch_size):
                feed_dict = model.create_feed_dict(input_batch,
                                                   input_length_batch,
                                                   parse_batch)
                state_array = sess.run(final_encoder_state,
                                       feed_dict=feed_dict)
                #print state_array.shape

                for state, input, input_length, label, length in zip(
                        state_array, input_batch, input_length_batch,
                        label_batch, label_length_batch):
                    label = label[:length]
                    program = ' '.join(
                        config.grammar.tokens[x] for x in
                        label)  # if is_function(config.grammar.tokens[x]))
                    if not program in final_states:
                        final_states[program] = [(state, input[:input_length])]
                    else:
                        final_states[program].append(
                            (state, input[:input_length]))

        prog_array = [prog for prog in final_states
                      ]  #if len(final_states[prog]) > 1]
        prog_index = dict()
        num_programs = len(prog_array)
        print('num programs', num_programs)
        centers = np.zeros((num_programs, final_encoder_size),
                           dtype=np.float32)
        for i, program in enumerate(prog_array):
            prog_index[program] = i
            centers[i] = np.mean([x[0] for x in final_states[program]], axis=0)

        eval_data = []
        with open(sys.argv[3]) as fp:
            for line in fp:
                sentence, gold, predicted, _ = line.strip().split('\t')
                if gold == predicted:
                    continue
                gold += ' <<EOS>>'
                predicted += ' <<EOS>>'
                if gold in prog_index and predicted in prog_index:
                    sentence_vector, sentence_length = vectorize(
                        sentence, config.dictionary, config.max_length)
                    gold_index = prog_index[gold]
                    gold_center = centers[gold_index]
                    predicted_index = prog_index[predicted]
                    predicted_center = centers[predicted_index]
                    eval_data.append(
                        (gold, predicted, gold_center, predicted_center,
                         sentence_vector, sentence_length))
                    #print(np.linalg.norm(gold_center-predicted_center), gold, predicted, sentence, sep='\t')
                elif gold not in prog_index:
                    #print('no gold', gold, file=sys.stderr)
                    pass
                elif predicted not in prog_index:
                    #print('no predicted', file=sys.stderr)
                    pass

        with tf.Session() as sess:
            loader.restore(sess, os.path.join(model_dir, 'best'))

            def flip(list_of_tuples):
                inner_length = len(list_of_tuples[0])
                tuple_of_lists = [[x[i] for x in list_of_tuples]
                                  for i in range(inner_length)]
                return tuple_of_lists

            with open('./eval.tsv', 'w') as out:
                for gold_batch, predicted_batch, gold_center_batch, predicted_center_batch, input_batch, input_length_batch in get_minibatches(
                        flip(eval_data), config.batch_size):
                    parse_batch = np.zeros(
                        (len(input_batch), 2 * config.max_length - 1),
                        dtype=np.bool)
                    feed_dict = model.create_feed_dict(input_batch,
                                                       input_length_batch,
                                                       parse_batch)
                    state_array = sess.run(final_encoder_state,
                                           feed_dict=feed_dict)

                    assert len(state_array) == len(gold_batch)
                    for state, input, input_length, gold, predicted, gold_center, predicted_center in zip(
                            state_array, input_batch, input_length_batch,
                            gold_batch, predicted_batch, gold_center_batch,
                            predicted_center_batch):
                        gold_predicted_dist = np.linalg.norm(gold_center -
                                                             predicted_center)
                        sentence_gold_dist = np.linalg.norm(state -
                                                            gold_center)
                        sentence_predicted_dist = np.linalg.norm(
                            state - predicted_center)
                        sentence = ' '.join(config.reverse_dictionary[x]
                                            for x in input[:input_length])
                        print(gold_predicted_dist,
                              sentence_gold_dist,
                              sentence_predicted_dist,
                              gold,
                              predicted,
                              sentence,
                              sep='\t',
                              file=out)
        print('written eval.tsv')

        num_good_sentences = np.zeros((num_programs, ), dtype=np.int32)
        sum_good_distance = np.zeros((num_programs, ), dtype=np.float32)
        num_bad_sentences = np.zeros((num_programs, ), dtype=np.int32)
        sum_bad_distance = np.zeros((num_programs, ), dtype=np.float32)
        for i, program in enumerate(prog_array):
            num_good_sentences[i] = len(final_states[program])

            for encoding, sentence in final_states[program]:
                dist = np.linalg.norm(encoding - centers[i])
                sum_good_distance[i] += dist

            # negative examples
            for negative in np.random.choice(prog_array,
                                             size=(10, ),
                                             replace=False):
                if negative == program:
                    continue
                num_bad_sentences[i] += len(final_states[negative])
                for negative_enc, negative_sentence in final_states[negative]:
                    dist = np.linalg.norm(negative_enc - centers[i])
                    sum_bad_distance[i] += dist

        avg_good_distance = sum_good_distance / num_good_sentences
        avg_bad_distance = sum_bad_distance / num_bad_sentences

        with open('./encoded.csv', 'w') as fp:
            writer = csv.writer(fp)
            writer.writerows(
                zip(num_good_sentences, num_bad_sentences, avg_good_distance,
                    avg_bad_distance, sum_good_distance, sum_bad_distance))
Esempio n. 9
0
def run():
    if len(sys.argv) < 4:
        print("** Usage: python3 " + sys.argv[0] +
              " <<Model Directory>> <<Train Set>> <<Test Set>>")
        sys.exit(1)

    np.random.seed(42)
    model_dir = sys.argv[1]
    config = Config.load(
        ['./default.conf',
         os.path.join(model_dir, 'model.conf')])
    model = create_model(config)
    train_data = load_data(sys.argv[2], config.dictionary, config.grammar,
                           config.max_length)
    pca_data = load_data(sys.argv[3], config.dictionary, config.grammar,
                         config.max_length)
    print("unknown", unknown_tokens)

    with tf.Graph().as_default():
        model.build()
        loader = tf.train.Saver()

        with tf.Session() as sess:
            loader.restore(sess, os.path.join(model_dir, 'best'))

            inputs, input_lengths, parses, _, _ = train_data

            final_encoder_state = tf.concat(nest.flatten(
                model.final_encoder_state),
                                            axis=1)
            final_encoder_size = final_encoder_state.get_shape()[1]

            final_states_arrays = []
            # capture all the final encoder states
            for input_batch, input_length_batch, parse_batch in get_minibatches(
                [inputs, input_lengths, parses], config.batch_size):
                feed_dict = model.create_feed_dict(input_batch,
                                                   input_length_batch,
                                                   parse_batch)
                state_array = sess.run(final_encoder_state,
                                       feed_dict=feed_dict)
                #print state_array.shape
                final_states_arrays.append(state_array)

            X = np.concatenate(final_states_arrays, axis=0)
            assert X.shape == (len(inputs), final_encoder_size)
            X = tf.constant(X)

            mean = tf.reduce_mean(X, axis=0)
            centered_X = X - mean
            S, U, V = tf.svd(centered_X)

            # take only the top 2 components
            V = V[:2]
            V_array, mean_array = sess.run([V, mean])

            inputs, input_lengths, parses, labels, label_lengths = pca_data

            X = final_encoder_state
            centered_X = X - tf.constant(mean_array)
            transformed_X = tf.matmul(centered_X, tf.constant(V_array.T))

            feed_dict = model.create_feed_dict(inputs, input_lengths, parses)
            X_pca = sess.run(transformed_X, feed_dict=feed_dict)

            if False:
                sentences = reconstruct_sentences(inputs, input_lengths,
                                                  config.reverse_dictionary)
            else:
                sentences = reconstruct_sentences(labels, label_lengths,
                                                  config.grammar.tokens)
            show_pca(X_pca, sentences)
Esempio n. 10
0
def run():
    if len(sys.argv) < 3:
        print("** Usage: python3 " + sys.argv[0] +
              " <<Model Directory>> <<Train Set>> [<<Dev Set>>]")
        sys.exit(1)

    np.random.seed(42)

    model_dir = sys.argv[1]
    model_conf = os.path.join(model_dir, 'model.conf')
    config = Config.load(['./default.conf', model_conf])
    model = create_model(config)
    train_data = load_data(sys.argv[2], config.dictionary, config.grammar,
                           config.max_length)
    if len(sys.argv) > 3:
        dev_data = load_data(sys.argv[3], config.dictionary, config.grammar,
                             config.max_length)
    else:
        dev_data = None
    print("unknown", unknown_tokens)
    try:
        os.mkdir(model_dir)
    except OSError:
        pass
    if not os.path.exists(model_conf):
        config.save(model_conf)

    with tf.Graph().as_default():
        tf.set_random_seed(1234)
        model.build()
        init = tf.global_variables_initializer()

        saver = tf.train.Saver(max_to_keep=config.n_epochs)

        train_eval = Seq2SeqEvaluator(model,
                                      config.grammar,
                                      train_data,
                                      'train',
                                      config.reverse_dictionary,
                                      beam_size=config.beam_size,
                                      batch_size=config.batch_size)
        dev_eval = Seq2SeqEvaluator(model,
                                    config.grammar,
                                    dev_data,
                                    'dev',
                                    config.reverse_dictionary,
                                    beam_size=config.beam_size,
                                    batch_size=config.batch_size)
        trainer = Trainer(model,
                          train_data,
                          train_eval,
                          dev_eval,
                          saver,
                          model_dir=model_dir,
                          max_length=config.max_length,
                          batch_size=config.batch_size,
                          n_epochs=config.n_epochs,
                          dropout=config.dropout)

        with tf.Session() as sess:
            # Run the Op to initialize the variables.
            sess.run(init)
            #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

            # Fit the model
            best_dev, best_train = trainer.fit(sess)

            print("best train", best_train)
            print("best dev", best_dev)
Esempio n. 11
0
def main(n_steps=10, epochs=200):
    """
    Trains and evaluates the MSE of a LSTM model in a 3-fold CV.
    
    n_steps: how many steps of the learning curve are used for training and predicting
    epochs: number of training epochs
    repeat_config: if True the configuration is fed into the network in each
        time step, otherwise only in the first time step
    """
    configs, learning_curves = load_data(source_dir='./data')
    Y = [curve[-1] for curve in learning_curves]

    n_params = len(configs[0])
    d = n_params + 1

    # 3 fold CV:
    n_folds = 3
    k_fold = KFold(n_splits=n_folds)
    fold_mses = []
    fold = 0
    for training_indices, test_indices in k_fold.split(Y):
        fold = fold + 1
        print("***** FOLD %i *****" % fold)

        # prepare training data:
        configs_train = [configs[index] for index in training_indices]
        learning_curves_train = [
            learning_curves[index][:n_steps] for index in training_indices
        ]
        if scale_configs:
            scaler = StandardScaler()
            configs_train = scaler.fit_transform(configs_train)
        X_train = get_time_series(configs_train,
                                  learning_curves_train,
                                  repeat_config=repeat_config)
        Y_train = [Y[index] for index in training_indices]

        # prepare test data:
        configs_test = [configs[index] for index in test_indices]
        learning_curves_test = [
            learning_curves[index][:n_steps] for index in test_indices
        ]
        if scale_configs:
            configs_test = scaler.transform(configs_test)
        X_test = get_time_series(configs_test,
                                 learning_curves_test,
                                 repeat_config=repeat_config)
        Y_test = [Y[index] for index in test_indices]

        n_train = len(training_indices)

        model = lstm(d, lr, regularize=regularize, alpha=alpha)

        # training:
        for epoch in range(epochs):
            print("epoch = %i" % epoch)
            for i in range(n_train):
                x = X_train[i].reshape(1, -1, d)
                y = Y_train[i]
                model.train_on_batch(x, np.array([[y]]))
                model.reset_states()

            # validation output:
            mse_train = evaluate(model, X_train, Y_train)
            mse_test = evaluate(model, X_test, Y_test)
            print("training mse = %f" % mse_train)
            print("test mse = %f" % mse_test)
            #if (epoch + 1) % 10 == 0:
            #    predictions = predict(model, X_test)
            #    print("y_hat", "y")
            #    for y_hat, y in zip(predictions, Y_test):
            #        print(y_hat, y)

        # evaluation:
        fold_mses.append(evaluate(model, X_test, Y_test))

    print("\nmse per fold:")
    print(fold_mses)
    print("mean mse:")
    print(np.mean(fold_mses))