def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) [test_loss_1, _, _, _, _, _, _, _, test_loss_2, _, _, _, _, _, _, test_loss_3, _, _, _, _, _, _ ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) test_loss = test_loss_1 + test_loss_2 + test_loss_3 with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('average_test_loss', average_test_loss) test_merged = tf.summary.merge([summary_test_loss]) return_list = [test_merged, average_test_loss, test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3] return return_list
def test_data_loader(self): # data init data_size = 16 batch_size = 8 xs = tf.range(data_size) ys = tf.range(data_size, 0, -1) sample_transforms = [lambda x, y: (x, y)] batch_transforms = [lambda x, y: (x, y)] dataset = data_loader((xs, ys), sample_transforms, batch_transforms, batch_size=batch_size) # test for x, y in dataset: self.assertEqual(x.shape, [batch_size]) self.assertEqual(y.shape, [batch_size])
def main(): # Training settings parser = argparse.ArgumentParser(description='DAN USPS MNIST') parser.add_argument('--task', default='USPS2MNIST', help='task to perform') parser.add_argument('--batch_size', type=int, default=128, help='input batch size for training (default: 64)') parser.add_argument('--test_batch_size', type=int, default=1000, help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=500, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.005, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--gpu_id', type=str, default='0', help='cuda device id') parser.add_argument( '--log_interval', type=int, default=10, help='how many batches to wait before logging training status') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id if args.task == 'USPS2MNIST': source_list, target_list, test_list = data_loader(task='U2M') elif args.task == 'MNIST2USPS': source_list, target_list, test_list = data_loader(task='M2U') else: raise Exception('task cannot be recognized!') train_loader = torch.utils.data.DataLoader(dataset=source_list, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader1 = torch.utils.data.DataLoader(dataset=target_list, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = torch.utils.data.DataLoader(dataset=test_list, batch_size=args.test_batch_size, shuffle=True) model = models.NUMNet() model = model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=5e-4, momentum=0.9) save_table = np.zeros(shape=(args.epochs, 2)) for epoch in range(1, args.epochs + 1): train(args, model, train_loader, train_loader1, optimizer, epoch) acc = test(args, model, test_loader) save_table[epoch - 1, :] = epoch, acc np.savetxt(args.task + '_50m_128_0.005.txt', save_table, delimiter=',', fmt='%1.3f') np.savetxt(args.task + '_50m_128_0.005.txt', save_table, delimiter=',', fmt='%1.3f')
import tensorflow as tf from graph import * from data_loader import * path = 'Embarcadero, San Francisco, CA->Fisherman\'s Wharf, San Francisco, CA' save_directory = './models/' steps = 200 save_interval = 50 global_step = tf.Variable(0, name="step_count") graph = Graph() loader = data_loader('../data/barts_hotspots_sorted.csv', path) optimizer = tf.train.AdamOptimizer().minimize(graph.mse_loss, global_step=global_step) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) x_0, y_0 = loader.next_batch() for i in range(steps): x_input, y_input = loader.next_batch() sess.run(optimizer, feed_dict={ graph.x_input: x_input, graph.y_gt: y_input }) if (i % save_interval == 0): saver.save(sess, save_directory, global_step=global_step) gradients = tf.gradients(graph.mse_loss, graph.W1) gradients = sess.run(gradients, feed_dict={
print( "Epoch: %d, chunk: %d, batch: %d, loss=%.4f, accuracy=%.4f" % (epoch_done + i + 1, j + 1, k + 1, curr_avg_loss, curr_avg_acc)) # if (i+1) % 5 == 0: # model.save_weights('model_weights_cnn_'+str(epoch_done+i+1)+'_'+str(curr_avg_acc)+'.h5',overwrite=True) return model if __name__ == '__main__': assert (len(argv) >= 2) print "reading arguments" train_samples_file = argv[1] test_samples_file = argv[2] print "Initializing data loading" loader = data_loader(train_samples_file, from_chunk=False) test_loader = data_loader(test_samples_file, from_chunk=False) print "Creating CNN architecture....." #model = CNN(batch_size=8) model = create_model() # model_arch_json = model.to_json() # pickle.dump(model_arch_json,open('model_cnn_more_droput.json.pkl','wb')) print "CNN architechture created" print "Starting Training..." num_evaluate = 10 #for i in range(num_evaluate): # model = train_model_with_parallel_loading(model,loader,num_epoch=2) # write_to_file("Evaluating model performance\n") # model = evaluate_model_with_parallel_loading(model,test_loader,num_epoch=1) #model = train_model(model,loader)
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, mask_1, mask_2, mask_3 ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss if FLAGS.stage == 'mask_predict': test_loss = mask_predict_loss elif FLAGS.stage == 'cube_update': test_loss = fitting_loss elif FLAGS.stage == 'finetune': test_loss = fitting_loss + mask_predict_loss * FLAGS.mask_weight else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) average_test_selected_tree_loss = tf.placeholder(tf.float32) summary_test_selected_tree_loss = tf.summary.scalar('selected_tree_loss', average_test_selected_tree_loss) average_test_original_tree_loss = tf.placeholder(tf.float32) summary_test_original_tree_loss = tf.summary.scalar('original_tree_loss', average_test_original_tree_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss, summary_test_selected_tree_loss, summary_test_original_tree_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, selected_tree_loss, original_tree_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_selected_tree_loss, average_test_original_tree_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
num_workers = args.num_workers ''' batch_size = 24 num_epochs = 3 num_workers = 1 ''' max_len = 64 warmup_ratio = 0.1 max_grad_norm = 1 log_interval = 200 learning_rate = 5e-5 model = BERTClassifier(num_classes=args.num_classes).build_model() device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") dtls, _ = preprocessing() train_dataloader, test_dataloader = data_loader(dtls, max_len, batch_size, num_workers) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate) loss_fn = nn.CrossEntropyLoss() t_total = len(train_dataloader) * num_epochs warmup_step = int(t_total * warmup_ratio) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_step, t_total=t_total)
from data_loader import * from keras.models import model_from_json import cPickle as pickle from sys import argv from train import * if __name__ == '__main__': assert(len(argv) >= 3) train_samples = argv[1] model_weights = argv[2] train_loader = data_loader(train_samples,from_chunk=False,transform=False) print "Loading model configs..." #model_json = pickle.load(open(argv[2],'rb')) #model = model_from_json(model_json) model = CNN(batch_size=8) print "Loading model weights..." #model.load_weights(argv[3]) model.load_model_params_dumb(model_weights) model = train_model_with_parallel_loading(model,train_loader,num_epoch=16) #test_using_chunks(model,test_data_loader)
D_inpseq = 64 D_future = 8 D_in = 4096 D_hid = 4096 D_out = 21 BatchSize = 5 alpha = 1. #x = torch.randn(D_inpseq, BatchSize, D_in , device = device, requires_grad = True ) train_ann, test_ann = annotations() train_keys_ann = list(train_ann.keys()) test_keys_ann = list(test_ann.keys()) steps = 10 for i in range(0, len(train_ann), BatchSize): x = data_loader(i, BatchSize) traindata = x[0] testdata = x[1] zerrow = torch.zeros(1, D_in) m = 0 for j in range(BatchSize): if traindata[j].size(0) > m: m = traindata[j].size(0) for j in range(BatchSize): r = traindata[j].size(0) result = np.zeros((m, D_in)) result[:r,:] = traindata[j]
print('Reading Annotation File') train_ann, test_ann = annotations(D_out) train_keys_ann = list(train_ann.keys()) test_keys_ann = list(test_ann.keys()) steps = 10 skip = False hx, cx = Model.initialise_hidden_parameters() for i in range(0, len(train_ann) - BatchSize, BatchSize): print('Loading Data of given Batchsize: ', BatchSize) x, vidtag = data_loader(i, BatchSize) traindata, traintag = x[0], vidtag[0] testdata, testtag = x[1], vidtag[1] print('Preprocessing data') m = 0 for j in range(BatchSize): if traindata[j].size(0) > m: m = traindata[j].size(0) ptraindata = traindata for j in range(BatchSize): r = ptraindata[j].size(0) result = np.zeros((m, D_in)) result[:r, :] = ptraindata[j]
import sys sys.path.append('model') import torch.nn as nn import arguments from cartoon_gan import CartoonGAN import data_loader # main training model = CartoonGAN(arguments) model.initialize() data_loader = data_loader() num_batch = floor(min(data_loader.size(), args['ntrain']) / args['batch_size']) print('-------------start training------------------') for epoch in range(args['niter']+args['niter_decay']): print('epoch: %d / %d' % (epoch, args['niter']+args['niter_decay'])) ### # sth for batch in range(num_batch): print('batch: %d / %d' % (batch, num_batch)) ... = data_loader.get_next_batch() model.forward(...) model.optimize_param(...) # print loss # save latest model
from data_load_utils import * from data_loader import * from NN.CNN import * loader = data_loader('train_samples.pkl',from_chunk=False) X,Y = loader.load_samples(num_elements=8) model = CNN(batch_size = 8) for i in range(1000): loss, acc = model.train_on_batch(X,Y,0.01) if i+1 % 10: print "epoch %d, accuracy %.4f, loss %.4f"%(i+1,acc,loss)
import matplotlib.pyplot as plt from keras.optimizers import SGD import pandas as pd from data_loader import * from model import * ## 参数设置 model_name = 'fusionnet' # fusionnet / segnet / fcn backbone = 'resnet50' # basic_encoder / vgg16 / resnet50 image_hw = (768, 576) # 32的倍数 model_path = 'savefiles/{0}_{1}.hdf5'.format(model_name, backbone) datadir = 'dataset' batch_size = 1 ## 加载数据 data_loader = data_loader(datadir, batch_size, image_hw) ## 建立模型 model = build_model(model_name, backbone, image_hw) if os.path.exists(model_path): print('loading model:', model_path) model.load_weights(model_path) ## 模型训练 checkpoint = ModelCheckpoint(model_path, save_best_only=True, verbose=1) model.compile(SGD(lr=1e-2), loss=binary_crossentropy, metrics=[iou_score]) history = model.fit_generator( data_loader.data_generator('train'), steps_per_epoch=len(data_loader.train_files) // batch_size, validation_data=data_loader.data_generator('test'), validation_steps=len(data_loader.test_files) // batch_size,
loss, accuracy = model.train_on_batch(X[k*batch_size:(k+1)*batch_size], Y[k*batch_size:(k+1)*batch_size]) curr_avg_loss = loss_avg.upsert(loss) curr_avg_acc = acc_avg.upsert(accuracy) if (k+1)%print_every == 0: print("Epoch: %d, chunk: %d, batch: %d, loss=%.4f, accuracy=%.4f"%(epoch_done+i+1,j+1,k+1,curr_avg_loss,curr_avg_acc)) # if (i+1) % 5 == 0: # model.save_weights('model_weights_cnn_'+str(epoch_done+i+1)+'_'+str(curr_avg_acc)+'.h5',overwrite=True) return model if __name__ == '__main__': assert(len(argv) >= 2) print "reading arguments" train_samples_file = argv[1] test_samples_file = argv[2] print "Initializing data loading" loader = data_loader(train_samples_file, from_chunk = False) test_loader = data_loader(test_samples_file, from_chunk = False) print "Creating CNN architecture....." #model = CNN(batch_size=8) model = create_model() # model_arch_json = model.to_json() # pickle.dump(model_arch_json,open('model_cnn_more_droput.json.pkl','wb')) print "CNN architechture created" print "Starting Training..." num_evaluate = 10 #for i in range(num_evaluate): # model = train_model_with_parallel_loading(model,loader,num_epoch=2) # write_to_file("Evaluating model performance\n") # model = evaluate_model_with_parallel_loading(model,test_loader,num_epoch=1) #model = train_model(model,loader)
def Meddit(arg_tuple): exp_index = arg_tuple[0] data_loader = arg_tuple[1] dataset_name = arg_tuple[2] dist_func = arg_tuple[3] sigma = arg_tuple[4] verbose = arg_tuple[5] np.random.seed(exp_index) #Random seed for reproducibility print "loading dataset", # Variable initialization data = data_loader() n = data.shape[0] Delta = 1.0 / n #Accuracy parameter. Increase this if you want to increase the speed num_arms = 32 #Number of arms to be pulled in every round parallelly step_size = 32 #Number of distance evaluation to be performed on every arm lcb = np.zeros(n, dtype='float' ) #At any point, stores the mu - lower_confidence_interval ucb = np.zeros(n, dtype='float' ) #At any point, stores the mu + lower_confidence_interval T = step_size * np.ones( n, dtype='int') #At any point, stores number of times each arm is pulled #Calculating the approximate std deviation sample_distance = dist_func(data[np.random.randint(n, size=2000)], data[np.random.randint(n, size=2000)]).flatten() # Bookkeeping variables start_time = time.time() summary = np.zeros(n) summary_ind = 0 if exp_index == 0: print "Calculating full summary for exp 0" full_summary = [] left_over_array = [] old_tmean = 0 """ Chooses the "num_arms" arms with lowest lcb and removes the ones which have been pulled n times. Returns None at stopping time """ def choose_arm(): low_lcb_arms = np.argpartition(lcb, num_arms)[:num_arms] #Arms which are pulled >= ntimes and ucb!=lcb arms_pulled_morethan_n = low_lcb_arms[np.where( (T[low_lcb_arms] >= n) & (ucb[low_lcb_arms] != lcb[low_lcb_arms]))] if arms_pulled_morethan_n.shape[0] > 0: # Compute the distance of these arms accurately estimate[arms_pulled_morethan_n] = np.mean(dist_func( data[arms_pulled_morethan_n], data), axis=1) T[arms_pulled_morethan_n] += n ucb[arms_pulled_morethan_n] = estimate[arms_pulled_morethan_n] lcb[arms_pulled_morethan_n] = estimate[arms_pulled_morethan_n] if ucb.min() < lcb[np.argpartition(lcb, 1)[1]]: #Exit condition return None arms_to_pull = low_lcb_arms[np.where(T[low_lcb_arms] < n)] return arms_to_pull """ Pulls the "num_arms" arms "step_size" times. Updates the estimate, ucb, lcb """ def pull_arm(arms): tmp_pos = np.array(np.random.choice(n, size=step_size, replace=False), dtype='int') X_arm = data[arms] X_other_arms = data[tmp_pos] Tmean = np.mean(dist_func(X_arm, X_other_arms), axis=1) estimate[arms] = (estimate[arms] * T[arms] + Tmean * step_size) / (T[arms] + step_size + 0.0) T[arms] = T[arms] + step_size lcb[arms] = estimate[arms] - np.sqrt(sigma**2 * np.log(1 / Delta) / (T[arms] + 0.0)) ucb[arms] = estimate[arms] + np.sqrt(sigma**2 * np.log(1 / Delta) / (T[arms] + 0.0)) #Step 1: Initialize estimate = initialise(data, step_size, dist_func) lcb = estimate - np.sqrt(sigma**2 * np.log(1 / Delta) / step_size) ucb = estimate + np.sqrt(sigma**2 * np.log(1 / Delta) / step_size) print "running experiment ", exp_index, "with sigma", sigma #Step 2: Iterate for ind in range(n * 10): #Choose the arms arms_to_pull = choose_arm() #Stop if we have found the best arm if arms_to_pull == None: #Collecting final stats summary[summary_ind] = estimate.argmin() summary_ind += 1 if exp_index == 0: left_over = np.where(lcb <= np.min(ucb)) full_summary += [[ np.random.choice(estimate[left_over], size=250) ]] left_over_array += [left_over[0].shape[0]] logging.info("Done. Best arm = " + str(np.argmin(lcb))) print "Summary: Avg pulls=", T.mean(), time.time() - start_time break #Pull the arms pull_arm(arms_to_pull) left_over = np.where(lcb <= np.min(ucb)) left_over_array += [left_over[0].shape[0]] #Stats if ind % 50 == 0: summary[summary_ind] = estimate.argmin() summary_ind += 1 #Storing the whole experiment for the first experiment if exp_index == 0: full_summary += [[ np.random.choice(estimate[left_over], size=250) ]] if T.mean() > old_tmean: old_tmean = T.mean() + 10 thrown_away = (100.0 * np.where(lcb > np.min(ucb))[0].shape[0]) / n if verbose: logging.info( str(exp_index) + " Thrown away " + " " + str(thrown_away) + " " + str(T.mean()) + " " + str(T.std())) if exp_index == 0: print "Saving full summary for exp 0" filename = '../experiments/figure_data/' + dataset_name + '_sample.pkl' with open(filename, 'wb') as f: pickle.dump([full_summary, left_over_array, T], f) filename = '../experiments/' + dataset_name + '/meddit/' + str( exp_index) + '.pkl' with open(filename, 'wb') as f: pickle.dump([summary[:summary_ind + 1], T.mean()], f)
# Check the output_dir is given if flags.output_dir is None: raise ValueError('The output directory is needed') # Check the output directory to save the checkpoint if not os.path.exists(flags.output_dir): os.mkdir(flags.output_dir) # Check the summary directory to save the event if not os.path.exists(flags.summary_dir): os.mkdir(flags.summary_dir) if flags.mode == 'train': data = data_loader(flags) net = nodule_seg(data.data,data.label,data.weight,flags) tf.summary.scalar('loss',net.loss) tf.summary.scalar('unweighted_loss',net.unweighted_loss) tf.summary.scalar('acc_f',net.accuracy_front) tf.summary.scalar('acc_b', net.accuracy_back) tf.summary.scalar('learning_rate',net.learning_rate) saver = tf.train.Saver(max_to_keep=10) var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) var_list2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope='forward')
def meta_test(network_name, dataset, num_epoch, batch_size, lr, momentum, wd): if not os.path.isdir('result'): os.mkdir('result') save_path = './result/meta-test_' + str(args.network) + '_' + str(dataset) tr_loss = [] t_loss = [] tr_acc = [] t_acc = [] # We are using cuda for training - no point trying out on CPU for ResNet device = torch.device("cuda") net = build_network(network_name, dataset) net.to(device) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in net.parameters()]))) # assign argparse parameters criterion = nn.CrossEntropyLoss().to(device) best_val_accuracy = 0.0 lr_data = [] train_data, test_data = data_loader(dataset, batch_size) iter_len = len(train_data.dataset) num_classes = 100 if args.dataset == 'cifar100' else 10 train_loss, train_acc = compute_loss_accuracy(net, train_data, criterion, device) print('Initial training loss is %.3f' % train_loss) gamma = abs((train_loss**0.5 * np.log(train_loss * num_classes) / num_classes**0.25) / 4) print('First gamma is %.3f' % gamma) mlr_snet = MLRSNet(1, 50).to(device) print(mlr_snet) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=wd) net_path = torch.load('./mlr_snet 1.pth') mlr_snet.load_state_dict(net_path) for epoch in range(num_epoch): train_correct = 0 train_loss = 0 if epoch == args.num_epoch // 3: net_path = './mlr_snet 100.pth' model_dict = torch.load(net_path) mlr_snet.load_state_dict(model_dict) mlr_snet = mlr_snet.to(device) train_loss, train_acc = compute_loss_accuracy( net, train_data, criterion, device) gamma = abs((train_loss**0.5 * np.log(train_loss * num_classes) / num_classes**0.25) / 4) print('Second gamma is %.3f' % gamma) if epoch == args.num_epoch // 3 * 2: net_path = './mlr_snet 200.pth' model_dict = torch.load(net_path) mlr_snet.load_state_dict(model_dict) mlr_snet = mlr_snet.to(device) train_loss, train_acc = compute_loss_accuracy( net, train_data, criterion, device) gamma = abs((train_loss**0.5 * np.log(train_loss * num_classes) / num_classes**0.25) / 4) print('Third gamma is %.3f' % gamma) for i, (inputs, labels) in enumerate(train_data): mlr_snet.reset_lstm(keep_states=(epoch + i) > 0, device=device) net.train() inputs, labels = inputs.to(device), labels.to(device) outputs = net(inputs) loss = criterion(outputs, labels) train_loss += loss.item() * labels.size(0) train_pred = outputs.argmax(1) train_correct += train_pred.eq(labels).sum().item() loss_net = loss.unsqueeze(0) with torch.no_grad(): lr_model = mlr_snet(loss_net) lr_model = float(lr_model.data) * gamma lr_data.append(lr_model) for group in optimizer.param_groups: group['lr'] = lr_model loss.backward() optimizer.step() optimizer.zero_grad() train_acc = 100.0 * (train_correct / iter_len) val_loss, val_acc = compute_loss_accuracy(net, test_data, criterion, device) tr_loss.append(train_loss / iter_len) t_loss.append(val_loss) tr_acc.append(train_acc) t_acc.append(val_acc) torch.save( { 'train_acc': tr_acc, 'test_acc': t_acc, 'train_loss': tr_loss, 'test_loss': t_loss, 'lr': lr_data }, save_path) print('train loss is : %.4f' % (train_loss / iter_len)) print('test loss is: %.4f' % val_loss) if val_acc > best_val_accuracy: best_val_accuracy = val_acc # torch.save(net.state_dict(), model_loc) print('train_accuracy at epoch :{} is : {}'.format(epoch, train_acc)) print('val_accuracy at epoch :{} is : {}'.format(epoch, val_acc)) print('best val_accuracy is : {}'.format(best_val_accuracy)) print('learning_rate after epoch :{} is : {}'.format( epoch, lr_data[-1]))
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) train_loss, sparseness_loss, similarity_loss, completeness_loss, _, _ = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] var_list = mask_predict_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
import data_loader import Config from model import e2e_user import argparser import tensorflow as tf config = Config.config input_graph = tf.Graph() with input_graph.as_default(): data = data_loader().DataLoader() proto = tf.ConfigProto() input_sess = tf.Session(config=proto) seq_goals, seq_usr_dass, seq_sys_dass = data.data_loader() train_goals, train_usrdas, train_sysdas, test_goals, test_usrdas, test_sysdas, val_goals, val_usrdas, val_sysdas = train_test_val_split( seq_goals, seq_usr_dass, seq_sys_dass) generator = batch_iter(train_goals, train_usrdas, train_sysdas) def train(): best_ppl = 1e20 def infer(): pass def get_args(): parser = argparse.ArgumentParser() parser.register("type", "bool", lambda x: x.lower() == 'true') parser.add_argument("--train", type="bool", default=True)
def test_network(): data, octree, node_position = data_loader(FLAGS.test_data, FLAGS.test_batch_size, n_points, test=True) latent_code = encoder(data, octree, is_training=False, reuse=True) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=False, reuse=True) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=False, reuse=True) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=False, reuse=True) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=False, reuse=True) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=False, reuse=True) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=False, reuse=True) predict_1 = tf.cast(logit_1 > 0.5, tf.int32) predict_2 = tf.cast(logit_2 > 0.5, tf.int32) predict_3 = tf.cast(logit_3 > 0.5, tf.int32) test_loss, sparseness_loss, similarity_loss, completeness_loss, relation_12, relation_23 = \ mask_prediction_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) logit = tf.concat([logit_1, logit_2, logit_3], axis=1) mask = tf.cast(logit > 0.5, tf.int32) mask_1, mask_2, mask_3 = primitive_tree_generation(mask, relation_12, relation_23, n_part_1, n_part_2, n_part_3) with tf.name_scope('test_summary'): average_test_loss = tf.placeholder(tf.float32) summary_test_loss = tf.summary.scalar('test_loss', average_test_loss) average_test_sparseness_loss = tf.placeholder(tf.float32) summary_test_sparseness_loss = tf.summary.scalar('sparseness_loss', average_test_sparseness_loss) average_test_similarity_loss = tf.placeholder(tf.float32) summary_test_similarity_loss = tf.summary.scalar('similarity_loss', average_test_similarity_loss) average_test_completeness_loss = tf.placeholder(tf.float32) summary_test_completeness_loss = tf.summary.scalar('completeness_loss', average_test_completeness_loss) test_merged = tf.summary.merge([summary_test_loss, summary_test_sparseness_loss, summary_test_similarity_loss, summary_test_completeness_loss]) return_list = [test_merged, logit_1, logit_2, logit_3, predict_1, predict_2, predict_3, sparseness_loss, similarity_loss, completeness_loss, test_loss, average_test_sparseness_loss, average_test_similarity_loss, average_test_completeness_loss, average_test_loss, node_position, latent_code, cube_params_1, cube_params_2, cube_params_3, mask_1, mask_2, mask_3] return return_list
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) logit_1 = mask_predict_net(latent_code, n_part_1, name='phase_1', is_training=True, reuse=False) logit_2 = mask_predict_net(latent_code, n_part_2, name='phase_2', is_training=True, reuse=False) logit_3 = mask_predict_net(latent_code, n_part_3, name='phase_3', is_training=True, reuse=False) mask_predict_loss, sparseness_loss, similarity_loss, completeness_loss = \ mask_predict_loss_function( logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position ) original_tree_loss = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) [selected_tree_loss_1, selected_coverage_distance_1, selected_consistency_distance_1, selected_mutex_distance_1, selected_tree_loss_2, selected_coverage_distance_2, selected_consistency_distance_2, selected_mutex_distance_2, selected_tree_loss_3, selected_coverage_distance_3, selected_consistency_distance_3, selected_mutex_distance_3, _, _, _ ] = cube_update_loss_function(logit_1, logit_2, logit_3, cube_params_1, cube_params_2, cube_params_3, node_position) selected_tree_loss = selected_tree_loss_1 + selected_tree_loss_2 + selected_tree_loss_3 fitting_loss = selected_tree_loss * FLAGS.selected_tree_weight + original_tree_loss tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_vars = [var for var in tvars if 'decoder' in var.name] mask_predict_vars = [var for var in tvars if 'mask_predict' in var.name] if FLAGS.stage == 'mask_predict': train_loss = mask_predict_loss var_list = mask_predict_vars elif FLAGS.stage == 'cube_update': train_loss = fitting_loss var_list = decoder_vars elif FLAGS.stage == 'finetune': train_loss = fitting_loss + mask_predict_loss*FLAGS.mask_weight var_list = encoder_vars# + decoder_vars else: raise ValueError('[{}] is an invalid training stage'.format(FLAGS.stage)) with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_sparseness_loss = tf.summary.scalar('sparseness_loss', sparseness_loss) summary_similarity_loss = tf.summary.scalar('similarity_loss', similarity_loss) summary_completeness_loss = tf.summary.scalar('completeness_loss', completeness_loss) summary_selected_tree_loss = tf.summary.scalar('selected_tree_loss', selected_tree_loss) summary_original_tree_loss = tf.summary.scalar('original_tree_loss', original_tree_loss) summary_logit_1_histogram = tf.summary.histogram('logit_1', logit_1) summary_logit_2_histogram = tf.summary.histogram('logit_2', logit_2) summary_logit_3_histogram = tf.summary.histogram('logit_3', logit_3) summary_selected_coverage_distance_1 = tf.summary.scalar('selected_coverage_distance_1', selected_coverage_distance_1) summary_selected_consistency_distance_1 = tf.summary.scalar('selected_consistency_distance_1', selected_consistency_distance_1) summary_selected_mutex_distance_1 = tf.summary.scalar('selected_mutex_distance_1', selected_mutex_distance_1) summary_list_phase_one = [summary_selected_coverage_distance_1, summary_selected_consistency_distance_1, summary_selected_mutex_distance_1] summary_selected_coverage_distance_2 = tf.summary.scalar('selected_coverage_distance_2', selected_coverage_distance_2) summary_selected_consistency_distance_2 = tf.summary.scalar('selected_consistency_distance_2', selected_consistency_distance_2) summary_selected_mutex_distance_2 = tf.summary.scalar('selected_mutex_distance_2', selected_mutex_distance_2) summary_list_phase_two = [summary_selected_coverage_distance_2, summary_selected_consistency_distance_2, summary_selected_mutex_distance_2] summary_selected_coverage_distance_3 = tf.summary.scalar('selected_coverage_distance_3', selected_coverage_distance_3) summary_selected_consistency_distance_3 = tf.summary.scalar('selected_consistency_distance_3', selected_consistency_distance_3) summary_selected_mutex_distance_3 = tf.summary.scalar('selected_mutex_distance_3', selected_mutex_distance_3) summary_list_phase_three = [summary_selected_coverage_distance_3, summary_selected_consistency_distance_3, summary_selected_mutex_distance_3] total_summary_list = [ summary_train_loss, summary_lr_scheme, summary_sparseness_loss, summary_similarity_loss, summary_completeness_loss, summary_selected_tree_loss, summary_original_tree_loss, summary_logit_1_histogram, summary_logit_2_histogram, summary_logit_3_histogram ] + summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
parser.add_argument('--in_memory', type=str2bool, default=False) parser.add_argument('--pipe_lining', type=str2bool, default=False) parser.add_argument('--aug_num', type=int, default=1000) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--test_step', type=int, default=50) args = parser.parse_args() # In[ ]: vgg19_net = Vgg19(args.vgg_path) result_time = [] if args.pipe_lining: loader = data_loader(args) loader.build_loader() if args.in_memory: img_arr = image_loader(args.data_path, args.aug_num) vgg19_net.build(loader.next_batch) sess = tf.Session() sess.run(tf.global_variables_initializer()) _ = sess.run(loader.init_op, feed_dict={loader.img_data: img_arr}) for i in range(args.test_step): s_time = time.time() sess.run(vgg19_net.prob) e_time = time.time() result_time.append((e_time - s_time))
def train_network(): data, octree, node_position = data_loader(FLAGS.train_data, FLAGS.train_batch_size, n_points) latent_code = encoder(data, octree, is_training=True, reuse=False) cube_params_1 = decoder(latent_code, n_part_1, shape_bias_1, name='decoder_phase_one', is_training=True, reuse=False) cube_params_2 = decoder(latent_code, n_part_2, shape_bias_2, name='decoder_phase_two', is_training=True, reuse=False) cube_params_3 = decoder(latent_code, n_part_3, shape_bias_3, name='decoder_phase_three', is_training=True, reuse=False) [train_loss_1, coverage_distance_1, cube_volume_1, consistency_distance_1, mutex_distance_1, aligning_distance_1, symmetry_distance_1, cube_area_average_distance_1, train_loss_2, coverage_distance_2, cube_volume_2, consistency_distance_2, mutex_distance_2, aligning_distance_2, symmetry_distance_2, train_loss_3, coverage_distance_3, cube_volume_3, consistency_distance_3, mutex_distance_3, aligning_distance_3, symmetry_distance_3 ] = initial_loss_function(cube_params_1, cube_params_2, cube_params_3, node_position) train_loss = train_loss_1 + train_loss_2 + train_loss_3 with tf.name_scope('train_summary'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): tvars = tf.trainable_variables() encoder_vars = [var for var in tvars if 'encoder' in var.name] decoder_1_vars = [var for var in tvars if 'phase_one' in var.name] decoder_2_vars = [var for var in tvars if 'phase_two' in var.name] decoder_3_vars = [var for var in tvars if 'phase_three' in var.name] var_list = encoder_vars + decoder_1_vars + decoder_2_vars + decoder_3_vars optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) solver = optimizer.minimize(train_loss, var_list=var_list) lr = optimizer._lr summary_lr_scheme = tf.summary.scalar('learning_rate', lr) summary_train_loss = tf.summary.scalar('train_loss', train_loss) summary_coverage_distance_1 = tf.summary.scalar('coverage_distance_1', coverage_distance_1) summary_cube_volume_1 = tf.summary.scalar('cube_volume_1', cube_volume_1) summary_consistency_distance_1 = tf.summary.scalar('consistency_distance_1', consistency_distance_1) summary_mutex_distance_1 = tf.summary.scalar('mutex_distance_1', mutex_distance_1) summary_aligning_distance_1 = tf.summary.scalar('aligning_distance_1', aligning_distance_1) summary_symmetry_distance_1 = tf.summary.scalar('symmetry_distance_1', symmetry_distance_1) summary_cube_area_average_distance_1 = tf.summary.scalar('cube_area_average_distance_1', cube_area_average_distance_1) summary_list_phase_one = [summary_coverage_distance_1, summary_cube_volume_1, summary_consistency_distance_1, summary_mutex_distance_1, summary_aligning_distance_1, summary_symmetry_distance_1, summary_cube_area_average_distance_1] summary_coverage_distance_2 = tf.summary.scalar('coverage_distance_2', coverage_distance_2) summary_cube_volume_2 = tf.summary.scalar('cube_volume_2', cube_volume_2) summary_consistency_distance_2 = tf.summary.scalar('consistency_distance_2', consistency_distance_2) summary_mutex_distance_2 = tf.summary.scalar('mutex_distance_2', mutex_distance_2) summary_aligning_distance_2 = tf.summary.scalar('aligning_distance_2', aligning_distance_2) summary_symmetry_distance_2 = tf.summary.scalar('symmetry_distance_2', symmetry_distance_2) summary_list_phase_two = [summary_coverage_distance_2, summary_cube_volume_2, summary_consistency_distance_2, summary_mutex_distance_2, summary_aligning_distance_2, summary_symmetry_distance_2] summary_coverage_distance_3 = tf.summary.scalar('coverage_distance_3', coverage_distance_3) summary_cube_volume_3 = tf.summary.scalar('cube_volume_3', cube_volume_3) summary_consistency_distance_3 = tf.summary.scalar('consistency_distance_3', consistency_distance_3) summary_mutex_distance_3 = tf.summary.scalar('mutex_distance_3', mutex_distance_3) summary_aligning_distance_3 = tf.summary.scalar('aligning_distance_3', aligning_distance_3) summary_symmetry_distance_3 = tf.summary.scalar('symmetry_distance_3', symmetry_distance_3) summary_list_phase_three = [summary_coverage_distance_3, summary_cube_volume_3, summary_consistency_distance_3, summary_mutex_distance_3, summary_aligning_distance_3, summary_symmetry_distance_3] total_summary_list = [summary_train_loss, summary_lr_scheme] + \ summary_list_phase_one + summary_list_phase_two + summary_list_phase_three train_merged = tf.summary.merge(total_summary_list) return train_merged, solver
def main(): # Training settings parser = argparse.ArgumentParser(description='CDAN USPS MNIST') parser.add_argument('--method', type=str, default='CDAN-E', choices=['CDAN', 'CDAN-E', 'DANN']) parser.add_argument('--task', default='USPS2MNIST', help='task to perform') parser.add_argument('--batch_size', type=int, default=128, help='input batch size for training (default: 64)') parser.add_argument('--test_batch_size', type=int, default=1000, help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=550, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=5e-5, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--lr2', type=float, default=0.005, metavar='LR2', help='learning rate2 (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--gpu_id', type=str, default='0', help='cuda device id') parser.add_argument( '--log_interval', type=int, default=10, help='how many batches to wait before logging training status') parser.add_argument('--random', type=bool, default=False, help='whether to use random') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id if args.task == 'USPS2MNIST': source_list, ordinary_train_dataset, target_list, test_list, ccp = data_loader( task='U2M') start_epoch = 50 decay_epoch = 600 elif args.task == 'MNIST2USPS': source_list, ordinary_train_dataset, target_list, test_list, ccp = data_loader( task='M2U') start_epoch = 50 decay_epoch = 600 else: raise Exception('task cannot be recognized!') train_loader = torch.utils.data.DataLoader(dataset=source_list, batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=True) train_loader1 = torch.utils.data.DataLoader(dataset=target_list, batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=True) o_train_loader = torch.utils.data.DataLoader( dataset=ordinary_train_dataset, batch_size=args.test_batch_size, shuffle=True, num_workers=8) test_loader = torch.utils.data.DataLoader(dataset=test_list, batch_size=args.test_batch_size, shuffle=True, num_workers=8) model = network.LeNet() model = model.cuda() class_num = 10 if args.random: random_layer = network.RandomLayer([model.output_num(), class_num], 500) ad_net = network.AdversarialNetwork(500, 500) random_layer.cuda() else: random_layer = None ad_net = network.AdversarialNetwork(model.output_num() * class_num, 500) ad_net = ad_net.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=0.0005, momentum=0.9) optimizer_ad = optim.SGD(ad_net.parameters(), lr=args.lr2, weight_decay=0.0005, momentum=0.9) save_table = np.zeros(shape=(args.epochs, 3)) for epoch in range(1, args.epochs + 1): if epoch % decay_epoch == 0: for param_group in optimizer.param_groups: param_group["lr"] = param_group["lr"] * 0.5 train(args, model, ad_net, random_layer, train_loader, train_loader1, optimizer, optimizer_ad, epoch, start_epoch, args.method, ccp) acc1 = test(args, model, o_train_loader) acc2 = test(args, model, test_loader) save_table[epoch - 1, :] = epoch - 50, acc1, acc2 np.savetxt(args.task + '_.txt', save_table, delimiter=',', fmt='%1.3f') np.savetxt(args.task + '_.txt', save_table, delimiter=',', fmt='%1.3f')
from data_load_utils import * from data_loader import * from NN.CNN import * loader = data_loader('train_samples.pkl', from_chunk=False) X, Y = loader.load_samples(num_elements=8) model = CNN(batch_size=8) for i in range(1000): loss, acc = model.train_on_batch(X, Y, 0.01) if i + 1 % 10: print "epoch %d, accuracy %.4f, loss %.4f" % (i + 1, acc, loss)
from data_loader import * from keras.models import model_from_json import cPickle as pickle from sys import argv from train import * if __name__ == '__main__': assert (len(argv) >= 3) train_samples = argv[1] model_weights = argv[2] train_loader = data_loader(train_samples, from_chunk=False, transform=False) print "Loading model configs..." #model_json = pickle.load(open(argv[2],'rb')) #model = model_from_json(model_json) model = CNN(batch_size=8) print "Loading model weights..." #model.load_weights(argv[3]) model.load_model_params_dumb(model_weights) model = train_model_with_parallel_loading(model, train_loader, num_epoch=16) #test_using_chunks(model,test_data_loader)