Ejemplo n.º 1
0
def load_dataset(path, config):
    print('Loading data: ' + path)
    train, valid, test = read_data.load_data(path, n_words=config.vocab_size, \
        valid_portion=0.15, maxlen=config.maxlen)
    train = read_data.prepare_data(train[0], train[1], maxlen=config.maxlen)
    valid = read_data.prepare_data(valid[0], valid[1], maxlen=config.maxlen)
    test = read_data.prepare_data(test[0], test[1], maxlen=config.maxlen)
    return (train, valid, test)
Ejemplo n.º 2
0
import numpy as np
from read_data import read_messages, read_classes, prepare_data

#messages = read_messages()
#classes = read_classes()
data = prepare_data()

a = np.array(data)
np.savetxt('data_mapbook.csv', a, fmt='%s', delimiter=',')
Ejemplo n.º 3
0
    config.layer = int(sys.argv[1])
    config.step = int(sys.argv[2])
    print("dataset: " + sys.argv[3])
    print("iteration: " + str(config.layer))
    print("step: " + str(config.step))
    print("model: " + str(sys.argv[4]))
    #word2vec
    f = open(vector_path, 'rb')
    matrix = np.array(pickle.load(f))
    config.vocab_size = matrix.shape[0]
    #load datasets
    train_dataset, valid_dataset, test_dataset = read_data.load_data(\
        path=path,n_words=config.vocab_size)
    config.num_label = len(set(train_dataset[1]))
    print("number label: " + str(config.num_label))
    train_dataset = read_data.prepare_data(train_dataset[0], train_dataset[1])
    valid_dataset = read_data.prepare_data(valid_dataset[0], valid_dataset[1])
    test_dataset = read_data.prepare_data(test_dataset[0], test_dataset[1])

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_normal_initializer(0, 0.05)

        classifier = Classifer(config=config, session=session)

        total = 0
        #print trainable variables
        for v in tf.trainable_variables():
            print(v.name)
            shape = v.get_shape()
            try:
                size = shape[0].value * shape[1].value
    domain_size, domain_list = get_domains()

    #load dataset
    train_datasets, valid_datasets, test_datasets = [], [], []
    for domain in domain_list:
        train, valid, test = read_data.load_data(path='dataset'+config.dataset+'/'+domain+'/dataset',n_words=config.vocab_size, \
            valid_portion=config.valid_portion, maxlen=config.maxlen)
        train_datasets.append(train)
        valid_datasets.append(valid)
        test_datasets.append(test)

    #transform dataset to matrix
    for index in range(domain_size):
        train = read_data.prepare_data(train_datasets[index][0],
                                       train_datasets[index][1],
                                       maxlen=config.maxlen,
                                       traindata=True,
                                       index=index)
        valid = read_data.prepare_data(valid_datasets[index][0],
                                       valid_datasets[index][1],
                                       maxlen=config.maxlen,
                                       traindata=False,
                                       index=index)
        test = read_data.prepare_data(test_datasets[index][0],
                                      test_datasets[index][1],
                                      maxlen=config.maxlen,
                                      traindata=False,
                                      index=index)
        train_datasets[index] = train
        valid_datasets[index] = valid
        test_datasets[index] = test
Ejemplo n.º 5
0
hyperparameters = Hyperparameters()

with tf.Graph().as_default():
    # =========================================================================================================
    # BUILD MODEL
    # =========================================================================================================
    train_operation = model.model_architecture(hyperparameters)

    # =========================================================================================================
    # LOAD DATA
    # =========================================================================================================
    input_train, train_label, input_test, test_label = read_data.load_data(
        hyperparameters.num_points)
    scaled_laplacian_train, scaled_laplacian_test = read_data.prepare_data(
        input_train, input_test, hyperparameters.num_neighhbors,
        hyperparameters.num_points)

    # =========================================================================================================
    # TRAIN MODEL
    # =========================================================================================================
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    saver = tf.train.Saver()

    learning_rate = hyperparameters.learning_rate

    save_model_path = '../model/'
    weight_dict = utils.weight_dict_fc(train_label, hyperparameters)
    test_label_whole = []
Ejemplo n.º 6
0
def main(_):

    # Neural net definition
    x = tf.placeholder(tf.float32, shape=[None, 2352])
    y_ = tf.placeholder(tf.float32, shape=[None, 2])

    initializer = tf.contrib.layers.xavier_initializer()
    x_reshaped = tf.reshape(x, [-1, 28, 28, 3])

    conv_1 = tf.layers.conv2d(
          inputs=x_reshaped,
          filters=32,
          kernel_size=5,
          padding='same',
          activation=tf.nn.relu,
          kernel_initializer=initializer
          )

    pool_1 = tf.layers.max_pooling2d(inputs=conv_1, pool_size=[2,2], strides=2)

    conv_2 = tf.layers.conv2d(
          inputs=pool_1,
          filters=64,
          kernel_size=5,
          padding='same',
          activation=tf.nn.relu,
          kernel_initializer=initializer
          )

    pool_2 = tf.layers.max_pooling2d(inputs=conv_2, pool_size=[2,2], strides=2)
    pool_2_flat = tf.reshape(pool_2, [-1, 7*7*64])

    dense_1 = tf.layers.dense(inputs=pool_2_flat, units=1024, activation=tf.nn.relu, kernel_initializer=initializer)
    dropout = tf.layers.dropout(inputs=dense_1, rate=0.4)
    y_conv  = tf.layers.dense(inputs=dropout, units=2, kernel_initializer=initializer) 

    # Train step data
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv))

    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


    saver = tf.train.Saver()
    path = "./hotdog-model/"
    if not os.path.exists(path):
      os.makedirs(path)

    with tf.Session() as sess:
      # sess.run(tf.global_variables_initializer())
      saver.restore(sess, path + 'test-model')
      file_list, y_image_label = prepare_data(FLAGS.image_dir)
      le = preprocessing.LabelEncoder()
      labels = ['hotdog', 'not_a_hotdog']
      y_one_hot = tf.one_hot(le.fit_transform(labels),depth=2)

      if FLAGS.train:

        x_feed = sess.run(read_image_array(file_list))
        y_feed = sess.run(y_one_hot)

        for i in range(80):
          if i % 10 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x:x_feed , y_: y_feed})
            print('step %d, training accuracy %g' % (i, train_accuracy))
          train_step.run(feed_dict={x:x_feed , y_: y_feed})
          save_path = saver.save(sess, path + 'test-model')

      elif FLAGS.predict_image != "":
        predicted = tf.argmax(y_conv, 1)
        x_single_img = sess.run(read_single_image(FLAGS.predict_image))
       
        pixels = mpimg.imread(FLAGS.predict_image)
        plt.imshow(pixels)

        result = le.inverse_transform(sess.run(predicted, feed_dict={x: x_single_img}))[0]
        if result == 'hotdog':
          text = 'This is a hotdog, right?'
        else:
          text = 'This isn\'t a hotdog, right?'
        plt.title(text)
        plt.show()
Ejemplo n.º 7
0
if __name__ == '__main__':

    # fix random seed for reproducibility
    np.random.seed(7)

    # Load Data
    data_2_05 = read_proteins("../Data/Protein Data/astral-scope-95-2.05.fa")
    data_2_06 = read_proteins("../Data/Protein Data/astral-scope-95-2.06.fa")
    data_2_06 = {
        'A': data_2_06['A'].difference(data_2_05['A']),
        'B': data_2_06['B'].difference(data_2_05['B']),
        'C': data_2_06['C'].difference(data_2_05['C']),
        'D': data_2_06['D'].difference(data_2_05['D']),
    }
    X_train, y_train, vocab = data_formatting(prepare_data(data_2_05))
    X_test, y_test, vocab = data_formatting(prepare_data(data_2_06), vocab)

    # truncate and pad input sequences
    max_seq_length = 200
    X_train = sequence.pad_sequences(X_train, maxlen=max_seq_length)
    X_test = sequence.pad_sequences(X_test, maxlen=max_seq_length)

    # Find number of protein lexemes
    print "Protein Lexemes Count =", len(vocab.keys()) + 1

    # create the model
    embedding_vecor_length = 6
    model = Sequential()
    model.add(
        Embedding(len(vocab) + 1,
Ejemplo n.º 8
0
        test_acc, _ = run_epoch(session, config, model, test_dataset,
                                tf.no_op(), 1, False)
        print("Eval Accuracy = %.2f  time: %.3f\n" %
              (100 * test_acc, time.time() - start_time))


def word_to_vec(matrix, session, config, *args):
    for model in args:
        session.run(tf.assign(model.embedding, matrix))


if __name__ == "__main__":

    config = Config()
    train_dataset, test_dataset = read_data.load_data(path=config.data_path +
                                                      "parsed_data/")
    # conver datas into matrix
    train_dataset = read_data.prepare_data(train_dataset[0], train_dataset[1],
                                           train_dataset[2])
    test_dataset = read_data.prepare_data(test_dataset[0], test_dataset[1],
                                          test_dataset[2])

    with tf.Graph().as_default(), tf.Session(
            config=tf.ConfigProto()) as session:
        classifier = Classifer(config=config, session=session)
        session.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=config.max_to_keep)
        for epoch_id in range(config.max_max_epoch):
            train_test_model(config, epoch_id, session, classifier,
                             train_dataset, test_dataset, saver)
Ejemplo n.º 9
0
def main(unused_args):
    #configs
    config = Config()
    #domains to be processed
    domain_list = sys.argv[1:]
    domain_size = len(domain_list)
    if domain_size <= 0:
        print("No dataset")
        exit(1)
    #load dataset
    train_datasets, valid_datasets, test_datasets = [], [], []
    for domain in domain_list:
        train, valid, test = read_data.load_data(path='dataset'+config.dataset+'/'+domain+'/dataset',n_words=config.vocab_size, \
            valid_portion=config.valid_portion, maxlen=config.maxlen)
        train_datasets.append(train)
        valid_datasets.append(valid)
        test_datasets.append(test)
    #transform dataset to matrix
    for index in range(domain_size):
        train = read_data.prepare_data(train_datasets[index][0],
                                       train_datasets[index][1],
                                       maxlen=config.maxlen,
                                       traindata=True)
        valid = read_data.prepare_data(valid_datasets[index][0],
                                       valid_datasets[index][1],
                                       maxlen=config.maxlen,
                                       traindata=False)
        test = read_data.prepare_data(test_datasets[index][0],
                                      test_datasets[index][1],
                                      maxlen=config.maxlen,
                                      traindata=False)
        train_datasets[index] = train
        valid_datasets[index] = valid
        test_datasets[index] = test

    config.num_classes = count_labels(train_datasets[0][2])
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(
            gpu_options=gpu_options)) as session:
        initializer = tf.random_normal_initializer(0, 0.05)

        #training model for shared weights
        with tf.variable_scope("shared_model",
                               reuse=None,
                               initializer=initializer):
            share_model_train = EmbeddingModel(is_training=True,
                                               config=config,
                                               session=session,
                                               trainable=True)
        #testing model for shared weights
        with tf.variable_scope("shared_model",
                               reuse=True,
                               initializer=initializer):
            share_model_test = EmbeddingModel(is_training=False,
                                              config=config,
                                              session=session,
                                              trainable=True)

        #build models
        train_models = []
        test_models = []
        for index in range(domain_size):
            with tf.variable_scope("m" + str(index),
                                   reuse=None,
                                   initializer=initializer):
                train_model = Combine_two_model(share_model_train, config)
            with tf.variable_scope("m" + str(index),
                                   reuse=True,
                                   initializer=initializer):
                test_model = Combine_two_model(share_model_test, config)
            train_models.append(train_model)
            test_models.append(test_model)

        init = tf.global_variables_initializer()
        session.run(init)

        #initialize share model's embedding with word2vec
        word_to_vec(session, config, share_model_train)
        #train test model
        train_test_model(config, session,\
            train_models,test_models,test_models,\
            train_datasets,valid_datasets,test_datasets)
def process_queue_batch(args):
    batch, k = args
    sim = {}
    for i, j in tqdm(batch, position=k):
        sim[(i, j)] = similarity(train_data[i][0], train_data[j][0])
        sim[(j, i)] = sim[(i, j)]
    return sim


if __name__ == '__main__':

    num_threads = int(sys.argv[1])

    # Read and prep data
    data_2_05 = read_proteins("../Data/Data/astral-scope-95-2.05.fa")
    train_data = prepare_data(data_2_05)

    # Select out few samples
    np.random.shuffle(train_data)
    train_data = train_data[:250]

    # Build similarity score matrix
    sim = np.zeros((len(train_data), len(train_data)))
    work_queue = [(i, j) for i in range(len(train_data))
                  for j in range(i, len(train_data))]
    work_per_thread = int(np.ceil(float(len(work_queue)) / num_threads))

    # Parallely execute load
    p = Pool(num_threads)
    res = p.map(process_queue_batch,
                [(work_queue[i * work_per_thread:(i + 1) * work_per_thread], i)