## params x_dim = 50 component_dim = 100 batchsize = 32 learning_rate = 0.01 x_ph = tf.placeholder('float32', [None, x_dim]) # the three components y1_ph = tf.placeholder('float32', [None, component_dim]) y2_ph = tf.placeholder('float32', [None, component_dim]) y3_ph = tf.placeholder('float32', [None, component_dim]) # define the graph model structure start = StartNode(input_vars=[x_ph]) h1 = HiddenNode(prev=[start], layers=[Linear(x_dim, component_dim), Softmax()]) h2 = HiddenNode(prev=[h1], layers=[Linear(component_dim, component_dim), Softmax()]) h3 = HiddenNode(prev=[h2], layers=[Linear(component_dim, component_dim), Softmax()]) e1 = EndNode(prev=[h1], input_merge_mode=Sum()) e2 = EndNode(prev=[h1, h2], input_merge_mode=Sum()) e3 = EndNode(prev=[h1, h2, h3], input_merge_mode=Sum()) graph = Graph(start=[start], end=[e1, e2, e3]) o1, o2, o3 = graph.train_fprop()
def train(): ### params sent_len = 50 word_len = 20 ch_embed_dim = 100 unicode_size = 128 tfidf_dim = 1000 tfidf_embed_dim = 1000 fc_dim = 1000 batchsize = 32 train_valid_ratio = [5, 1] learning_rate = 0.001 # components = [len(np.unique(val)) for val in comps] # components = [65, 454, 983, 892, 242, 6] # components = [42] # components = [65] # num_train = 10000 num_train = 10000 components = [65] train_X, valid_X, train_ys, valid_ys = infodocs(num_train, word_len, sent_len, components) # num_train = 560000 # train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) #num_train=16000 #components = [20] #train_X, valid_X, train_ys, valid_ys = twenty_newsgroup(num_train, word_len, sent_len, components, use_sean=True) #num_train = 20000 #train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) print 'num train', len(train_X) print 'num valid', len(valid_X) trainset_X = SequentialIterator(train_X, batchsize=batchsize) trainset_y = SequentialIterator(*train_ys, batchsize=batchsize) validset_X = SequentialIterator(valid_X, batchsize=batchsize) validset_y = SequentialIterator(*valid_ys, batchsize=batchsize) ### define placeholders X_ph = tf.placeholder('int32', [None, sent_len, word_len]) y_phs = [] for comp in components: y_phs.append(tf.placeholder('float32', [None, comp])) ### define the graph model structure start = StartNode(input_vars=[X_ph]) # character CNN embed_n = HiddenNode(prev=[start], layers=[ Reshape(shape=(-1, word_len)), Embedding(cat_dim=unicode_size, encode_dim=ch_embed_dim, zero_pad=True), Reshape(shape=(-1, ch_embed_dim, word_len, 1)) ]) h1, w1 = valid(ch_embed_dim, word_len, strides=(1, 1), filters=(ch_embed_dim, 4)) conv1_n = HiddenNode(prev=[embed_n], layers=[ Conv2D(input_channels=1, num_filters=10, padding='VALID', kernel_size=(ch_embed_dim, 4), stride=(1, 1)), RELU(), Flatten(), Linear(int(h1 * w1 * 10), 1000), RELU(), Reshape((-1, sent_len, 1000)), ReduceSum(1), BatchNormalization(layer_type='fc', dim=1000, short_memory=0.01) ]) # conv2_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,2), stride=(1,1)), # Squeeze()]) # h2, w2 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,2)) # conv3_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,3), stride=(1,1)), # Squeeze()]) # h3, w3 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,3)) # # conv4_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,4), stride=(1,1)), # Squeeze()]) # h4, w4 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) # concat_n = HiddenNode(prev=[conv1_n, conv2_n, conv3_n, conv4_n], # input_merge_mode=Concat(), layers=[RELU()]) # concat_n = HiddenNode(prev=[conv1_n, conv2_n], # input_merge_mode=Concat(), layers=[RELU()]) # fc_n = HiddenNode(prev=[concat_n], layers=[Linear(int(w1+w2), fc_dim), Sigmoid()]) # # # TF-IDF Embedding # words_combined_layer = WordsCombined(this_dim=tfidf_dim, mode='sum') # words_combined_n = HiddenNode(prev=[fc_n], # layers=[Linear(prev_dim=fc_dim, this_dim=tfidf_dim), Sigmoid(), # Reshape(shape=(-1, sent_len, tfidf_dim)), # words_combined_layer, # BatchNormalization(dim=tfidf_dim, layer_type='fc', short_memory=0.01)]) out_n = HiddenNode( prev=[conv1_n], layers=[Linear(prev_dim=1000, this_dim=components[0]), Softmax()]) # # hierachical softmax # prev_dim = components[0] # prev_node = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Softmax()]) # end_nodes = [] # end_nodes.append(EndNode(prev=[prev_node])) # for this_dim in components[1:]: # top_connect = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Sigmoid()]) # prev_node = HiddenNode(prev=[prev_node, top_connect], layers=[Linear(prev_dim, this_dim), Softmax()]) # end_nodes.append(EndNode(prev=[prev_node])) # prev_dim = this_dim end_nodes = [EndNode(prev=[out_n])] graph = Graph(start=[start], end=end_nodes) # import pdb; pdb.set_trace() train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] accus = [] for y_ph, out in zip(y_phs, train_outs_sb): ttl_mse.append(tf.reduce_mean((y_ph - out)**2)) pos = tf.reduce_sum((y_ph * out)) accus.append(pos) mse = sum(ttl_mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) max_epoch = 50 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=3, percent_decrease=0.1) temp_acc = [] for epoch in range(max_epoch): print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(trainset_X, trainset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, ys)[0] train_accuracy += total_accuracy(train_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'train mse', train_error / float(ttl_examples) print 'train accuracy', train_accuracy / float(ttl_examples) # print 'outputs' # ypred_train = sess.run(outs, feed_dict=feed_dict) # # print 'ypreds' # ypred = np.argmax(ypred_train[0],axis=1) # print ypred # print 'ylabels' # ylabel = np.argmax(ys[0],axis=1) # print ylabel # print 'mse' # print np.mean((ypred_train[0] - ys[0])**2) # for v in graph.variables: # print v.name, # print 'mean:', np.mean(np.abs(sess.run(v))) # print 'std:', np.std(sess.run(v)) # print sess.run(v) # print '---------------------------------' # import pdb; pdb.set_trace() # ypreds = [] # print 'words_combined_layer in',sess.run(tf.reduce_mean(words_combined_layer.train_in, reduction_indices=0), feed_dict=feed_dict) # print 'words_combined_layer out',sess.run(tf.reduce_mean(words_combined_layer.train_out, reduction_indices=0), feed_dict=feed_dict) # # for out in outs: # ypreds.append(sess.run(out, feed_dict=feed_dict)) # accus = [] # for y_batch, ypred_batch in zip(ys, ypreds): # accu = accuracy_score(y_batch.argmax(axis=1), ypred_batch.argmax(axis=1)) # accus.append(accu) # print accus # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error # for accu in accus: # train_pos = sess.run(, feed_dict=feed_dict) # print sess.run(embed._W[0,:]) # # print sess.run(embed.embedding[0,:]) # print '--------------' # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(validset_X, validset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, ys)[0] valid_accuracy += total_accuracy(valid_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'valid mse', valid_error / float(ttl_examples) print 'valid accuracy', valid_accuracy / float(ttl_examples) temp_acc.append(valid_accuracy / float(ttl_examples)) print 'average accuracy is:\t', sum(temp_acc) / len(temp_acc)
def Vanilla_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) # y_ph = tf.placeholder('float32', [None, nclass]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) dim = int(h*w*c) scope = 'encoder' start = tg.StartNode(input_vars=[X_ph]) h1_Node = tg.HiddenNode(prev=[start], layers=[Sigmoid(), TFBatchNormalization(name= scope + '/vanilla1'), RELU(), Flatten(), Sigmoid(), TFBatchNormalization(name=scope + '/vanilla2')]) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) saver = tf.train.Saver() vardir = './var/2' if not os.path.exists(vardir): os.makedirs(vardir) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: # print '=======session start' sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) save_path = saver.save(sess, vardir + "/model.ckpt") # print("Model saved in file: %s" % save_path) temp_acc.append(valid_accuracy/float(ttl_examples)) print 'max accuracy is:\t', max(temp_acc)
def Encoder_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h1, w1 = valid(h, w, filters=(5,5), strides=(1,1)) h2, w2 = valid(h1, w1, filters=(5,5), strides=(2,2)) h3, w3 = valid(h2, w2, filters=(5,5), strides=(2,2)) flat_dim = int(h3*w3*32) scope = 'encoder' bottleneck_dim = 300 enc_hn = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=32, kernel_size=(5,5), stride=(1,1), padding='VALID'), TFBatchNormalization(name=scope + '/genc1'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc2'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc3'), RELU(), Flatten(), Linear(flat_dim, 300), TFBatchNormalization(name=scope + '/genc4'), RELU(), Linear(300, bottleneck_dim), Tanh() ]) h2_Node = tg.HiddenNode(prev=[enc_hn], layers=[Linear(prev_dim=bottleneck_dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # saver_init = tf.train.Saver() saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") # save_path = saver_init.save(sess, vardir + "/init.ckpt") # print("Model saved in file: %s" % save_path) max_epoch = 2 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
def CNN_Classifier(X_train, y_train, X_valid, y_valid): batchsize = 64 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h, w = same(in_height=h, in_width=w, strides=(1,1), filters=(2,2)) h, w = same(in_height=h, in_width=w, strides=(2,2), filters=(2,2)) #h1, w1 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) dim = int(h * w * c * 10) h1_Node = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=10, padding='SAME', kernel_size=(2,2), stride=(1,1)), MaxPooling(poolsize=(2,2), stride=(2,2), padding='SAME'), Reshape(shape=(-1, dim))] ) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: init = tf.global_variables_initializer() sess.run(init) max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) # print 'train mse', train_error/float(ttl_examples) # print 'train accuracy', train_accuracy/float(ttl_examples) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) # print 'valid mse', valid_error/float(ttl_examples) # print 'valid accuracy', valid_accuracy/float(ttl_examples) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
x_dim = 50 component_dim = 100 batchsize = 32 learning_rate = 0.01 x_ph = tf.placeholder('float32', [None, x_dim]) # the three components y1_ph = tf.placeholder('float32', [None, component_dim]) y2_ph = tf.placeholder('float32', [None, component_dim]) y3_ph = tf.placeholder('float32', [None, component_dim]) # define the graph model structure start = StartNode(input_vars=[x_ph]) h1 = HiddenNode(prev=[start], layers=[Linear(component_dim), Softmax()]) h2 = HiddenNode(prev=[h1], layers=[Linear(component_dim), Softmax()]) h3 = HiddenNode(prev=[h2], layers=[Linear(component_dim), Softmax()]) e1 = EndNode(prev=[h1], input_merge_mode=Sum()) e2 = EndNode(prev=[h1, h2], input_merge_mode=Sum()) e3 = EndNode(prev=[h1, h2, h3], input_merge_mode=Sum()) graph = Graph(start=[start], end=[e1, e2, e3]) o1, o2, o3 = graph.train_fprop() o1_mse = tf.reduce_mean((y1_ph - o1)**2) o2_mse = tf.reduce_mean((y2_ph - o2)**2) o3_mse = tf.reduce_mean((y3_ph - o3)**2)
import numpy as np from tensorgraph.data_iterator import SequentialIterator y1_dim = 50 y2_dim = 100 batchsize = 32 learning_rate = 0.01 y1 = tf.placeholder('float32', [None, y1_dim]) y2 = tf.placeholder('float32', [None, y2_dim]) start1 = StartNode(input_vars=[y1]) start2 = StartNode(input_vars=[y2]) h1 = HiddenNode(prev=[start1, start2], input_merge_mode=Concat(), layers=[Linear(y1_dim + y2_dim, y2_dim), RELU()]) h2 = HiddenNode(prev=[start2], layers=[Linear(y2_dim, y2_dim), RELU()]) h3 = HiddenNode(prev=[h1, h2], input_merge_mode=Sum(), layers=[Linear(y2_dim, y1_dim), RELU()]) e1 = EndNode(prev=[h3]) e2 = EndNode(prev=[h2]) graph = Graph(start=[start1, start2], end=[e1, e2]) o1, o2 = graph.train_fprop() o1_mse = tf.reduce_mean((y1 - o1)**2) o2_mse = tf.reduce_mean((y2 - o2)**2) mse = o1_mse + o2_mse optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse)