def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) pcanet = utils.load_model(args.pretrained_path + "/pcanet.pkl") classifier = utils.load_model(args.pretrained_path + "/classifier.pkl") logging.info("load PCANet and SVM completely") test_queue, num_test = load_test_mnist(args) # load dataset logging.info("load testing dataset completely") with torch.no_grad(): num_of_correct_samples = 0 for global_step, (test_images, test_labels) in enumerate(test_queue): batch_size = test_images.shape[0] batch_features = test_images.cuda() # execute convolution in different stages for stage in range(args.stages): batch_features = pcanet.pca_conv(batch_features, pcanet.kernel[stage]) # build binary quantization mapping and generate histogram decimal_features = pcanet.binary_mapping(batch_features, stage) final_features = pcanet.generate_histogram(decimal_features) # calculate the rate of correct classification final_features = final_features.cpu() predict_class = classifier.predict(final_features) batch_accuracy = accuracy_score(predict_class, test_labels) if global_step % args.log_freq == 0: logging.info("global_step %d, stage %d, batch accuracy %f" % (global_step, stage, batch_accuracy)) batch_num_of_correct_samples = utils.total_accuracy(predict_class, test_labels) num_of_correct_samples += batch_num_of_correct_samples logging.info("total accuracy %f" % (num_of_correct_samples / num_test)) logging.info("test completely")
def valid(): # =====parameters =========== col, tokenizer = db_init() use_loaded_model = True batch_size = 64 batch_idx = 0 test_batch_idx = 0 # ==========currnt========== epoch = 14 # =======net =============== net = Net() if use_loaded_model: try: net = load_model(model_dir + model_name + '_' + str(epoch) + '_' + model_ext) logger.info('init net from local, current epoch is %d', epoch) except Exception: pass logger.info('validation start') valid_data, valid_labels = get_valid_batch_data_labels( csv_path=train_data_path, col=col, tokenizer=tokenizer, use_batch=False) if len(valid_data) > 1: logger.info('valid data length is %d', len(valid_data)) bi = lambda x: 1 if x > 0.5 else 0 bi_list = lambda label: [bi(item) for item in label] predict_labels = net.predict(valid_data, batch_size=150) predict_labels = np.array([bi_list(item) for item in predict_labels]) logger.info("pridict labels shape: %d,%d", predict_labels.shape[0], predict_labels.shape[1]) accu = total_accuracy(valid_labels, predict_labels) logger.info("accuracy: %f", accu) else: logger.debug('error!,data in batch_index %d len(data) <= 1', batch_idx)
tf.add_to_collection("infrence", _pred) #logits 是前向传播的结果,labels 是正确的标签 #print(_pred.shape, Y.shape) _pred_flatten = _pred #utils.label_before_cal_loss(_pred) Y_flatten = Y #utils.label_before_cal_loss(Y) #_pred_flatten = tf.concat() #print(_pred_flatten[0].shape) #print("len of prepare: ", utils.label_before_cal_loss(_pred).shape) #tf.layers.Flatten()(_pred) #Y_flatten = tf.layers.Flatten()(Y) #print(_pred_flatten.shape) #print(Y_flatten.shape) #softmax = tf.nn.softmax_cross_entropy_with_logits(logits = _pred_flatten, labels = Y_flatten) loss = utils.weighted_loss_v1(logits=_pred_flatten, labels=Y_flatten) #tf.reduce_mean(softmax) acc = utils.total_accuracy(logits=_pred_flatten, labels=Y_flatten) #print(loss.shape) #print("1") global_step = tf.Variable(0, trainable=False) #print("2") opt = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( loss, global_step=global_step) #print("3") #global_acc = utils.cal_global_accuracy(_pred, Y) #print("4") #history = {} #history["train_loss"] = [] #history["test_loss"] = [] #history["train_acc"] = [] #history["test_acc"] = []
def train(): ### params sent_len = 50 word_len = 20 ch_embed_dim = 100 unicode_size = 128 tfidf_dim = 1000 tfidf_embed_dim = 1000 fc_dim = 1000 batchsize = 32 train_valid_ratio = [5, 1] learning_rate = 0.001 # components = [len(np.unique(val)) for val in comps] # components = [65, 454, 983, 892, 242, 6] # components = [42] # components = [65] # num_train = 10000 num_train = 10000 components = [65] train_X, valid_X, train_ys, valid_ys = infodocs(num_train, word_len, sent_len, components) # num_train = 560000 # train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) #num_train=16000 #components = [20] #train_X, valid_X, train_ys, valid_ys = twenty_newsgroup(num_train, word_len, sent_len, components, use_sean=True) #num_train = 20000 #train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) print 'num train', len(train_X) print 'num valid', len(valid_X) trainset_X = SequentialIterator(train_X, batchsize=batchsize) trainset_y = SequentialIterator(*train_ys, batchsize=batchsize) validset_X = SequentialIterator(valid_X, batchsize=batchsize) validset_y = SequentialIterator(*valid_ys, batchsize=batchsize) ### define placeholders X_ph = tf.placeholder('int32', [None, sent_len, word_len]) y_phs = [] for comp in components: y_phs.append(tf.placeholder('float32', [None, comp])) ### define the graph model structure start = StartNode(input_vars=[X_ph]) # character CNN embed_n = HiddenNode(prev=[start], layers=[ Reshape(shape=(-1, word_len)), Embedding(cat_dim=unicode_size, encode_dim=ch_embed_dim, zero_pad=True), Reshape(shape=(-1, ch_embed_dim, word_len, 1)) ]) h1, w1 = valid(ch_embed_dim, word_len, strides=(1, 1), filters=(ch_embed_dim, 4)) conv1_n = HiddenNode(prev=[embed_n], layers=[ Conv2D(input_channels=1, num_filters=10, padding='VALID', kernel_size=(ch_embed_dim, 4), stride=(1, 1)), RELU(), Flatten(), Linear(int(h1 * w1 * 10), 1000), RELU(), Reshape((-1, sent_len, 1000)), ReduceSum(1), BatchNormalization(layer_type='fc', dim=1000, short_memory=0.01) ]) # conv2_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,2), stride=(1,1)), # Squeeze()]) # h2, w2 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,2)) # conv3_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,3), stride=(1,1)), # Squeeze()]) # h3, w3 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,3)) # # conv4_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,4), stride=(1,1)), # Squeeze()]) # h4, w4 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) # concat_n = HiddenNode(prev=[conv1_n, conv2_n, conv3_n, conv4_n], # input_merge_mode=Concat(), layers=[RELU()]) # concat_n = HiddenNode(prev=[conv1_n, conv2_n], # input_merge_mode=Concat(), layers=[RELU()]) # fc_n = HiddenNode(prev=[concat_n], layers=[Linear(int(w1+w2), fc_dim), Sigmoid()]) # # # TF-IDF Embedding # words_combined_layer = WordsCombined(this_dim=tfidf_dim, mode='sum') # words_combined_n = HiddenNode(prev=[fc_n], # layers=[Linear(prev_dim=fc_dim, this_dim=tfidf_dim), Sigmoid(), # Reshape(shape=(-1, sent_len, tfidf_dim)), # words_combined_layer, # BatchNormalization(dim=tfidf_dim, layer_type='fc', short_memory=0.01)]) out_n = HiddenNode( prev=[conv1_n], layers=[Linear(prev_dim=1000, this_dim=components[0]), Softmax()]) # # hierachical softmax # prev_dim = components[0] # prev_node = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Softmax()]) # end_nodes = [] # end_nodes.append(EndNode(prev=[prev_node])) # for this_dim in components[1:]: # top_connect = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Sigmoid()]) # prev_node = HiddenNode(prev=[prev_node, top_connect], layers=[Linear(prev_dim, this_dim), Softmax()]) # end_nodes.append(EndNode(prev=[prev_node])) # prev_dim = this_dim end_nodes = [EndNode(prev=[out_n])] graph = Graph(start=[start], end=end_nodes) # import pdb; pdb.set_trace() train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] accus = [] for y_ph, out in zip(y_phs, train_outs_sb): ttl_mse.append(tf.reduce_mean((y_ph - out)**2)) pos = tf.reduce_sum((y_ph * out)) accus.append(pos) mse = sum(ttl_mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) max_epoch = 50 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=3, percent_decrease=0.1) temp_acc = [] for epoch in range(max_epoch): print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(trainset_X, trainset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, ys)[0] train_accuracy += total_accuracy(train_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'train mse', train_error / float(ttl_examples) print 'train accuracy', train_accuracy / float(ttl_examples) # print 'outputs' # ypred_train = sess.run(outs, feed_dict=feed_dict) # # print 'ypreds' # ypred = np.argmax(ypred_train[0],axis=1) # print ypred # print 'ylabels' # ylabel = np.argmax(ys[0],axis=1) # print ylabel # print 'mse' # print np.mean((ypred_train[0] - ys[0])**2) # for v in graph.variables: # print v.name, # print 'mean:', np.mean(np.abs(sess.run(v))) # print 'std:', np.std(sess.run(v)) # print sess.run(v) # print '---------------------------------' # import pdb; pdb.set_trace() # ypreds = [] # print 'words_combined_layer in',sess.run(tf.reduce_mean(words_combined_layer.train_in, reduction_indices=0), feed_dict=feed_dict) # print 'words_combined_layer out',sess.run(tf.reduce_mean(words_combined_layer.train_out, reduction_indices=0), feed_dict=feed_dict) # # for out in outs: # ypreds.append(sess.run(out, feed_dict=feed_dict)) # accus = [] # for y_batch, ypred_batch in zip(ys, ypreds): # accu = accuracy_score(y_batch.argmax(axis=1), ypred_batch.argmax(axis=1)) # accus.append(accu) # print accus # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error # for accu in accus: # train_pos = sess.run(, feed_dict=feed_dict) # print sess.run(embed._W[0,:]) # # print sess.run(embed.embedding[0,:]) # print '--------------' # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(validset_X, validset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, ys)[0] valid_accuracy += total_accuracy(valid_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'valid mse', valid_error / float(ttl_examples) print 'valid accuracy', valid_accuracy / float(ttl_examples) temp_acc.append(valid_accuracy / float(ttl_examples)) print 'average accuracy is:\t', sum(temp_acc) / len(temp_acc)
def Vanilla_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) # y_ph = tf.placeholder('float32', [None, nclass]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) dim = int(h*w*c) scope = 'encoder' start = tg.StartNode(input_vars=[X_ph]) h1_Node = tg.HiddenNode(prev=[start], layers=[Sigmoid(), TFBatchNormalization(name= scope + '/vanilla1'), RELU(), Flatten(), Sigmoid(), TFBatchNormalization(name=scope + '/vanilla2')]) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) saver = tf.train.Saver() vardir = './var/2' if not os.path.exists(vardir): os.makedirs(vardir) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: # print '=======session start' sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) save_path = saver.save(sess, vardir + "/model.ckpt") # print("Model saved in file: %s" % save_path) temp_acc.append(valid_accuracy/float(ttl_examples)) print 'max accuracy is:\t', max(temp_acc)
def Encoder_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h1, w1 = valid(h, w, filters=(5,5), strides=(1,1)) h2, w2 = valid(h1, w1, filters=(5,5), strides=(2,2)) h3, w3 = valid(h2, w2, filters=(5,5), strides=(2,2)) flat_dim = int(h3*w3*32) scope = 'encoder' bottleneck_dim = 300 enc_hn = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=32, kernel_size=(5,5), stride=(1,1), padding='VALID'), TFBatchNormalization(name=scope + '/genc1'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc2'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc3'), RELU(), Flatten(), Linear(flat_dim, 300), TFBatchNormalization(name=scope + '/genc4'), RELU(), Linear(300, bottleneck_dim), Tanh() ]) h2_Node = tg.HiddenNode(prev=[enc_hn], layers=[Linear(prev_dim=bottleneck_dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # saver_init = tf.train.Saver() saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") # save_path = saver_init.save(sess, vardir + "/init.ckpt") # print("Model saved in file: %s" % save_path) max_epoch = 2 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
def CNN_Classifier(X_train, y_train, X_valid, y_valid): batchsize = 64 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h, w = same(in_height=h, in_width=w, strides=(1,1), filters=(2,2)) h, w = same(in_height=h, in_width=w, strides=(2,2), filters=(2,2)) #h1, w1 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) dim = int(h * w * c * 10) h1_Node = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=10, padding='SAME', kernel_size=(2,2), stride=(1,1)), MaxPooling(poolsize=(2,2), stride=(2,2), padding='SAME'), Reshape(shape=(-1, dim))] ) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: init = tf.global_variables_initializer() sess.run(init) max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) # print 'train mse', train_error/float(ttl_examples) # print 'train accuracy', train_accuracy/float(ttl_examples) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) # print 'valid mse', valid_error/float(ttl_examples) # print 'valid accuracy', valid_accuracy/float(ttl_examples) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 enc_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 100 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.1 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() # X_train, y_train, X_valid, y_valid = X_train[0:10000], y_train[0:10000], X_valid[0:10000], y_valid[0:10000] # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list, G_train_enc, G_test_enc, G_train_embed, G_test_embed = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) enc_train_cost_sb = encoder_cost(y_ph, G_train_enc) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) enc_train_cost_sm = tf.summary.scalar('enc_cost', enc_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm, enc_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) enc_optimizer = tf.train.AdamOptimizer(enc_learning_rate).minimize(enc_train_cost_sb) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] # embedding_var = tf.Variable(tf.zeros([60000, 300]), trainable=False, name="embedding") # prepare projector config # summary_writer = tf.summary.FileWriter(logdir) # saver = tf.train.Saver([embedding_var]) init = tf.global_variables_initializer() gan.sess.run(init) # es = tg.EarlyStopper(max_epoch=max_epoch, # epoch_look_back=epoch_look_back, # percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() #embed = gan.sess.graph.get_tensor_by_name('Generator/genc4') # Create metadata # embeddir = logdir # if not os.path.exists(embeddir): # os.makedirs(embeddir) # metadata_path = os.path.join(embeddir, 'metadata.tsv') temp_acc = [] for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 ttl_enc_cost = 0 error_writer.reopen() if epoch == max_epoch-1: output = np.empty([0,300], 'float32') labels = np.empty([0,10], 'int32') # metadata = open(metadata_path, 'w') # metadata.write("Name\tLabels\n") for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([enc_optimizer, gen_optimizer], feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train,enc_cost, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, enc_train_cost_sb,gen_train_cost_sb,dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) ttl_enc_cost += enc_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() if epoch == max_epoch-1: results = gan.sess.run(G_train_embed, feed_dict = {real_ph:X_batch, y_ph:y_batch}) output = np.concatenate((output, results), axis = 0) labels = np.concatenate((labels, y_batch), axis = 0) # import pdb; pdb.set_trace() # for x_row, y_row in zip(X_batch, y_batch): # metadata.write('{}\t{}\n'.format(x_row, y_row)) # metadata.close() error_writer.close() # import pdb; pdb.set_trace() # for ot in output: # temp = tf.stack(ot, axis = 0) #embedding_var = tf.Variable(temp) # sess.run(tf.variables_initializer([embedding_var])) # saver.save(gan.sess, os.path.join(embeddir, 'model.ckpt')) # config = projector.ProjectorConfig() # embedding = config.embeddings.add() # embedding.tensor_name = embedding_var.name # embedding.metadata_path = metadata_path # save embedding_var # projector.visualize_embeddings(summary_writer, config) ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp mean_enc_cost = ttl_enc_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) print('enc train dis cost:', mean_enc_cost) lab = [] if epoch == max_epoch-1: embeddir = './genData/3' if not os.path.exists(embeddir): os.makedirs(embeddir) lab = np.nonzero(labels)[1] np.save(embeddir + 'embed.npy', output) np.save(embeddir + 'label.npy', lab) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {real_ph:X_batch, y_ph:y_batch} valid_outs = gan.sess.run(G_test_enc, feed_dict=feed_dict) valid_error += total_mse([valid_outs], [ys])[0] valid_accuracy += total_accuracy([valid_outs], [ys])[0] ttl_examples += len(X_batch) temp_acc.append(valid_accuracy/float(ttl_examples)) print 'max accuracy is:\t', max(temp_acc) print 'max accuracy is:\t', max(temp_acc) return save_path