def main():
    path_to_vec = '../glove/glove.6B.100d.txt'
    dir_name = '../'
    reader = SSTDataReader(dir_name, nclasses=2)
    embedding_params = reader.get_word_embedding(path_to_vec,
                                                 orthonormalized=False)
    lookup_table = get_lookup_table(embedding_params)
    max_sequence_length = 60

    sequence_input = Input(shape=(max_sequence_length, ), dtype='int32')
    phase_embedding = phase_embedding_layer(max_sequence_length,
                                            lookup_table.shape[0])

    amplitude_embedding = amplitude_embedding_layer(np.transpose(lookup_table),
                                                    max_sequence_length)

    # [embed_seq_real, embed_seq_imag] = ComplexMultiply()([phase_embedding, amplitude_embedding])
    output = phase_embedding(sequence_input)
    model = Model(sequence_input, output)
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    model.summary()

    train_test_val = reader.create_batch(embedding_params=embedding_params,
                                         batch_size=-1)

    training_data = train_test_val['train']
    test_data = train_test_val['test']
    validation_data = train_test_val['dev']

    # for x, y in batch_gen(training_data, max_sequence_length):
    #     model.train_on_batch(x,y)

    train_x, train_y = data_gen(training_data, max_sequence_length)
    test_x, test_y = data_gen(test_data, max_sequence_length)
    val_x, val_y = data_gen(validation_data, max_sequence_length)
    # sequence_input = Input(shape=(max_sequence_length,), dtype='int32')
    # path_to_vec = '../glove/glove.6B.100d.txt'
    # embedded_sequences = amplitude_embedding_layer(path_to_vec, 10)

    # output = embedded_sequences(sequence_input)
    # model = Model(sequence_input, output)
    # model.compile(loss='categorical_crossentropy',
    #           optimizer='rmsprop',
    #           metrics=['acc'])

    # model.summary()

    x = train_x

    y = model.predict(x)
    print(y)
    print(y.shape)
Beispiel #2
0
    df1.ix[l[params.network_type],params.dataset_name] = max(grid_result.best_score_)
    df1.to_excel(experiment_results_path)


if __name__ == '__main__':
    params = Params()
    params.parse_config('config/waby.ini')
    
    
    reader = data_reader_initialize(params.dataset_name,params.datasets_dir)

    if(params.wordvec_initialization == 'orthogonalize'):
        embedding_params = reader.get_word_embedding(params.wordvec_path,orthonormalized=True)

    elif( (params.wordvec_initialization == 'random') | (params.wordvec_initialization == 'word2vec')):
        embedding_params = reader.get_word_embedding(params.wordvec_path,orthonormalized=False)
    else:
        raise ValueError('The input word initialization approach is invalid!')

    # print(embedding_params['word2id'])
    lookup_table = get_lookup_table(embedding_params)
    
    gridsearch(params)







Beispiel #3
0
def complex_embedding(params):
    # datasets_dir, dataset_name, wordvec_initialization ='random', wordvec_path = None, loss = 'binary_crossentropy', optimizer = 'rmsprop', batch_size = 16, epochs= 4

    reader = data_reader_initialize(params.dataset_name, params.datasets_dir)

    if (params.wordvec_initialization == 'orthogonalize'):
        embedding_params = reader.get_word_embedding(params.wordvec_path,
                                                     orthonormalized=True)

    elif ((params.wordvec_initialization == 'random') |
          (params.wordvec_initialization == 'word2vec')):
        embedding_params = reader.get_word_embedding(params.wordvec_path,
                                                     orthonormalized=False)
    else:
        raise ValueError('The input word initialization approach is invalid!')

    # print(embedding_params['word2id'])
    lookup_table = get_lookup_table(embedding_params)

    max_sequence_length = reader.max_sentence_length
    random_init = True
    if not (params.wordvec_initialization == 'random'):
        random_init = False

    if params.network_type == 'complex_superposition':
        model = run_complex_embedding_network_superposition(
            lookup_table,
            max_sequence_length,
            reader.nb_classes,
            random_init=random_init)
    elif params.network_type == 'complex_mixture':
        model = run_complex_embedding_network_mixture(lookup_table,
                                                      max_sequence_length,
                                                      reader.nb_classes,
                                                      random_init=random_init)
    else:
        model = run_real_embedding_network(lookup_table,
                                           max_sequence_length,
                                           reader.nb_classes,
                                           random_init=random_init)

    model.compile(loss=params.loss,
                  optimizer=params.optimizer,
                  metrics=['accuracy'])

    model.summary()
    weights = model.get_weights()

    train_test_val = reader.create_batch(embedding_params=embedding_params,
                                         batch_size=-1)

    training_data = train_test_val['train']
    test_data = train_test_val['test']
    validation_data = train_test_val['dev']

    # for x, y in batch_gen(training_data, max_sequence_length):
    #     model.train_on_batch(x,y)

    train_x, train_y = data_gen(training_data, max_sequence_length)
    test_x, test_y = data_gen(test_data, max_sequence_length)
    val_x, val_y = data_gen(validation_data, max_sequence_length)
    print(len(train_x))
    print(len(test_x))
    print(len(val_x))
    # assert len(train_x) == 67349
    # assert len(test_x) == 1821
    # assert len(val_x) == 872

    train_y = to_categorical(train_y)
    test_y = to_categorical(test_y)
    val_y = to_categorical(val_y)

    history = model.fit(x=train_x,
                        y=train_y,
                        batch_size=params.batch_size,
                        epochs=params.epochs,
                        validation_data=(test_x, test_y))

    val_acc = history.history['val_acc']
    train_acc = history.history['acc']

    if not (os.path.exists(params.eval_dir)):
        os.mkdir(params.eval_dir)

    learning_curve_path = os.path.join(params.eval_dir, 'learning_curve')
    epoch_indexes = [x + 1 for x in range(len(val_acc))]
    line_1, = plt.plot(epoch_indexes, val_acc)
    line_2, = plt.plot(epoch_indexes, train_acc)
    # plt.axis([0, 6, 0, 20])

    plt.legend([line_1, line_2], ['test_acc', 'train_acc'])
    fig = plt.gcf()
    fig.savefig(learning_curve_path, dpi=fig.dpi)

    evaluation = model.evaluate(x=test_x, y=test_y)

    eval_file_path = os.path.join(params.eval_dir, 'eval.txt')

    with open(eval_file_path, 'w') as eval_file:
        eval_file.write('acc: {}, loss: {}'.format(evaluation[1],
                                                   evaluation[0]))

    embedding_dir = os.path.join(params.eval_dir, 'embedding')
    if not (os.path.exists(embedding_dir)):
        os.mkdir(embedding_dir)
    np.save(os.path.join(embedding_dir, 'phase_embedding'),
            model.get_weights()[0])
    np.save(os.path.join(embedding_dir, 'amplitude_embedding'),
            model.get_weights()[1])
    np.save(os.path.join(embedding_dir, 'word2id'),
            embedding_params['word2id'])
    save_model(model, os.path.join(params.eval_dir, 'model'))

    experiment_results_path = 'eval/experiment_result.xlsx'
    xls_file = pd.ExcelFile(experiment_results_path)

    df1 = xls_file.parse('Sheet1')
    l = {'complex_mixture': 0, 'complex_superposition': 1, 'real': 2}
    df1.ix[l[params.network_type], params.dataset_name] = max(val_acc)
    df1.to_excel(experiment_results_path)