def load_dataset(path, config): print('Loading data: ' + path) train, valid, test = read_data.load_data(path, n_words=config.vocab_size, \ valid_portion=0.15, maxlen=config.maxlen) train = read_data.prepare_data(train[0], train[1], maxlen=config.maxlen) valid = read_data.prepare_data(valid[0], valid[1], maxlen=config.maxlen) test = read_data.prepare_data(test[0], test[1], maxlen=config.maxlen) return (train, valid, test)
import numpy as np from read_data import read_messages, read_classes, prepare_data #messages = read_messages() #classes = read_classes() data = prepare_data() a = np.array(data) np.savetxt('data_mapbook.csv', a, fmt='%s', delimiter=',')
config.layer = int(sys.argv[1]) config.step = int(sys.argv[2]) print("dataset: " + sys.argv[3]) print("iteration: " + str(config.layer)) print("step: " + str(config.step)) print("model: " + str(sys.argv[4])) #word2vec f = open(vector_path, 'rb') matrix = np.array(pickle.load(f)) config.vocab_size = matrix.shape[0] #load datasets train_dataset, valid_dataset, test_dataset = read_data.load_data(\ path=path,n_words=config.vocab_size) config.num_label = len(set(train_dataset[1])) print("number label: " + str(config.num_label)) train_dataset = read_data.prepare_data(train_dataset[0], train_dataset[1]) valid_dataset = read_data.prepare_data(valid_dataset[0], valid_dataset[1]) test_dataset = read_data.prepare_data(test_dataset[0], test_dataset[1]) with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_normal_initializer(0, 0.05) classifier = Classifer(config=config, session=session) total = 0 #print trainable variables for v in tf.trainable_variables(): print(v.name) shape = v.get_shape() try: size = shape[0].value * shape[1].value
domain_size, domain_list = get_domains() #load dataset train_datasets, valid_datasets, test_datasets = [], [], [] for domain in domain_list: train, valid, test = read_data.load_data(path='dataset'+config.dataset+'/'+domain+'/dataset',n_words=config.vocab_size, \ valid_portion=config.valid_portion, maxlen=config.maxlen) train_datasets.append(train) valid_datasets.append(valid) test_datasets.append(test) #transform dataset to matrix for index in range(domain_size): train = read_data.prepare_data(train_datasets[index][0], train_datasets[index][1], maxlen=config.maxlen, traindata=True, index=index) valid = read_data.prepare_data(valid_datasets[index][0], valid_datasets[index][1], maxlen=config.maxlen, traindata=False, index=index) test = read_data.prepare_data(test_datasets[index][0], test_datasets[index][1], maxlen=config.maxlen, traindata=False, index=index) train_datasets[index] = train valid_datasets[index] = valid test_datasets[index] = test
hyperparameters = Hyperparameters() with tf.Graph().as_default(): # ========================================================================================================= # BUILD MODEL # ========================================================================================================= train_operation = model.model_architecture(hyperparameters) # ========================================================================================================= # LOAD DATA # ========================================================================================================= input_train, train_label, input_test, test_label = read_data.load_data( hyperparameters.num_points) scaled_laplacian_train, scaled_laplacian_test = read_data.prepare_data( input_train, input_test, hyperparameters.num_neighhbors, hyperparameters.num_points) # ========================================================================================================= # TRAIN MODEL # ========================================================================================================= init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) saver = tf.train.Saver() learning_rate = hyperparameters.learning_rate save_model_path = '../model/' weight_dict = utils.weight_dict_fc(train_label, hyperparameters) test_label_whole = []
def main(_): # Neural net definition x = tf.placeholder(tf.float32, shape=[None, 2352]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) initializer = tf.contrib.layers.xavier_initializer() x_reshaped = tf.reshape(x, [-1, 28, 28, 3]) conv_1 = tf.layers.conv2d( inputs=x_reshaped, filters=32, kernel_size=5, padding='same', activation=tf.nn.relu, kernel_initializer=initializer ) pool_1 = tf.layers.max_pooling2d(inputs=conv_1, pool_size=[2,2], strides=2) conv_2 = tf.layers.conv2d( inputs=pool_1, filters=64, kernel_size=5, padding='same', activation=tf.nn.relu, kernel_initializer=initializer ) pool_2 = tf.layers.max_pooling2d(inputs=conv_2, pool_size=[2,2], strides=2) pool_2_flat = tf.reshape(pool_2, [-1, 7*7*64]) dense_1 = tf.layers.dense(inputs=pool_2_flat, units=1024, activation=tf.nn.relu, kernel_initializer=initializer) dropout = tf.layers.dropout(inputs=dense_1, rate=0.4) y_conv = tf.layers.dense(inputs=dropout, units=2, kernel_initializer=initializer) # Train step data cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() path = "./hotdog-model/" if not os.path.exists(path): os.makedirs(path) with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) saver.restore(sess, path + 'test-model') file_list, y_image_label = prepare_data(FLAGS.image_dir) le = preprocessing.LabelEncoder() labels = ['hotdog', 'not_a_hotdog'] y_one_hot = tf.one_hot(le.fit_transform(labels),depth=2) if FLAGS.train: x_feed = sess.run(read_image_array(file_list)) y_feed = sess.run(y_one_hot) for i in range(80): if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x:x_feed , y_: y_feed}) print('step %d, training accuracy %g' % (i, train_accuracy)) train_step.run(feed_dict={x:x_feed , y_: y_feed}) save_path = saver.save(sess, path + 'test-model') elif FLAGS.predict_image != "": predicted = tf.argmax(y_conv, 1) x_single_img = sess.run(read_single_image(FLAGS.predict_image)) pixels = mpimg.imread(FLAGS.predict_image) plt.imshow(pixels) result = le.inverse_transform(sess.run(predicted, feed_dict={x: x_single_img}))[0] if result == 'hotdog': text = 'This is a hotdog, right?' else: text = 'This isn\'t a hotdog, right?' plt.title(text) plt.show()
if __name__ == '__main__': # fix random seed for reproducibility np.random.seed(7) # Load Data data_2_05 = read_proteins("../Data/Protein Data/astral-scope-95-2.05.fa") data_2_06 = read_proteins("../Data/Protein Data/astral-scope-95-2.06.fa") data_2_06 = { 'A': data_2_06['A'].difference(data_2_05['A']), 'B': data_2_06['B'].difference(data_2_05['B']), 'C': data_2_06['C'].difference(data_2_05['C']), 'D': data_2_06['D'].difference(data_2_05['D']), } X_train, y_train, vocab = data_formatting(prepare_data(data_2_05)) X_test, y_test, vocab = data_formatting(prepare_data(data_2_06), vocab) # truncate and pad input sequences max_seq_length = 200 X_train = sequence.pad_sequences(X_train, maxlen=max_seq_length) X_test = sequence.pad_sequences(X_test, maxlen=max_seq_length) # Find number of protein lexemes print "Protein Lexemes Count =", len(vocab.keys()) + 1 # create the model embedding_vecor_length = 6 model = Sequential() model.add( Embedding(len(vocab) + 1,
test_acc, _ = run_epoch(session, config, model, test_dataset, tf.no_op(), 1, False) print("Eval Accuracy = %.2f time: %.3f\n" % (100 * test_acc, time.time() - start_time)) def word_to_vec(matrix, session, config, *args): for model in args: session.run(tf.assign(model.embedding, matrix)) if __name__ == "__main__": config = Config() train_dataset, test_dataset = read_data.load_data(path=config.data_path + "parsed_data/") # conver datas into matrix train_dataset = read_data.prepare_data(train_dataset[0], train_dataset[1], train_dataset[2]) test_dataset = read_data.prepare_data(test_dataset[0], test_dataset[1], test_dataset[2]) with tf.Graph().as_default(), tf.Session( config=tf.ConfigProto()) as session: classifier = Classifer(config=config, session=session) session.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=config.max_to_keep) for epoch_id in range(config.max_max_epoch): train_test_model(config, epoch_id, session, classifier, train_dataset, test_dataset, saver)
def main(unused_args): #configs config = Config() #domains to be processed domain_list = sys.argv[1:] domain_size = len(domain_list) if domain_size <= 0: print("No dataset") exit(1) #load dataset train_datasets, valid_datasets, test_datasets = [], [], [] for domain in domain_list: train, valid, test = read_data.load_data(path='dataset'+config.dataset+'/'+domain+'/dataset',n_words=config.vocab_size, \ valid_portion=config.valid_portion, maxlen=config.maxlen) train_datasets.append(train) valid_datasets.append(valid) test_datasets.append(test) #transform dataset to matrix for index in range(domain_size): train = read_data.prepare_data(train_datasets[index][0], train_datasets[index][1], maxlen=config.maxlen, traindata=True) valid = read_data.prepare_data(valid_datasets[index][0], valid_datasets[index][1], maxlen=config.maxlen, traindata=False) test = read_data.prepare_data(test_datasets[index][0], test_datasets[index][1], maxlen=config.maxlen, traindata=False) train_datasets[index] = train valid_datasets[index] = valid test_datasets[index] = test config.num_classes = count_labels(train_datasets[0][2]) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto( gpu_options=gpu_options)) as session: initializer = tf.random_normal_initializer(0, 0.05) #training model for shared weights with tf.variable_scope("shared_model", reuse=None, initializer=initializer): share_model_train = EmbeddingModel(is_training=True, config=config, session=session, trainable=True) #testing model for shared weights with tf.variable_scope("shared_model", reuse=True, initializer=initializer): share_model_test = EmbeddingModel(is_training=False, config=config, session=session, trainable=True) #build models train_models = [] test_models = [] for index in range(domain_size): with tf.variable_scope("m" + str(index), reuse=None, initializer=initializer): train_model = Combine_two_model(share_model_train, config) with tf.variable_scope("m" + str(index), reuse=True, initializer=initializer): test_model = Combine_two_model(share_model_test, config) train_models.append(train_model) test_models.append(test_model) init = tf.global_variables_initializer() session.run(init) #initialize share model's embedding with word2vec word_to_vec(session, config, share_model_train) #train test model train_test_model(config, session,\ train_models,test_models,test_models,\ train_datasets,valid_datasets,test_datasets)
def process_queue_batch(args): batch, k = args sim = {} for i, j in tqdm(batch, position=k): sim[(i, j)] = similarity(train_data[i][0], train_data[j][0]) sim[(j, i)] = sim[(i, j)] return sim if __name__ == '__main__': num_threads = int(sys.argv[1]) # Read and prep data data_2_05 = read_proteins("../Data/Data/astral-scope-95-2.05.fa") train_data = prepare_data(data_2_05) # Select out few samples np.random.shuffle(train_data) train_data = train_data[:250] # Build similarity score matrix sim = np.zeros((len(train_data), len(train_data))) work_queue = [(i, j) for i in range(len(train_data)) for j in range(i, len(train_data))] work_per_thread = int(np.ceil(float(len(work_queue)) / num_threads)) # Parallely execute load p = Pool(num_threads) res = p.map(process_queue_batch, [(work_queue[i * work_per_thread:(i + 1) * work_per_thread], i)