def load_all_data(window_size): print("Loading training data...") x_train, y_train = load_data('generate', window_size) print("Loading validate data...") x_val, y_val = load_data('validate', window_size) print("Data loaded.") return x_train, y_train, x_val, y_val
def train(image, text, encoder, decoder, criterion, train_loader, teach_forcing_prob=1): logger = Logger('log/') # optimizer encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): encoder_param.requires_grad = True decoder_param.requires_grad = True encoder.train() decoder.train() target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # CNN + BiLSTM encoder_outputs = encoder(image) target_variable = target_variable.cuda() # start decoder for SOS_TOKEN decoder_input = target_variable[utils.SOS_TOKEN].cuda() decoder_hidden = decoder.initHidden(batch_size).cuda() loss = 0.0 teach_forcing = True if random.random( ) > teach_forcing_prob else False if teach_forcing: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) topv, topi = decoder_output.data.topk(1) ni = topi.squeeze() decoder_input = ni encoder.zero_grad() decoder.zero_grad() loss.backward() encoder_optimizer.step() decoder_optimizer.step() loss_avg.add(loss) if i % 10 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch, cfg.num_epochs, i, len(train_loader), loss_avg.val())) logger.scalar_summary( 'Loss of Epoch{0}/miniBatch(100)'.format(epoch), loss_avg.val(), i) logger.scalar_summary('Loss of Epoch/miniBatch(100)', loss_avg.val(), epoch * len(train_loader) + i) loss_avg.reset() # save checkpoint torch.save(encoder.state_dict(), '{0}/encoder_{1}.pth'.format(cfg.model, epoch)) torch.save(decoder.state_dict(), '{0}/decoder_{1}.pth'.format(cfg.model, epoch))
image_names.append(line) image_names = np.array(image_names) path_aggregated_descriptors = 'output/aggregated_descriptors/' aggregated_descriptors_path_wp = path_aggregated_descriptors + 'painting_aggregated_descriptors_wp.h5' model = vggcam(nb_classes) model.load_weights(VGGCAM_weight_path) final_conv_layer = get_output_layer(model, "CAM_relu") conv_layer_features = get_output_layer(model, layer) get_output = K.function( [model.layers[0].input, K.learning_phase()], [ final_conv_layer.output, model.layers[-1].output, conv_layer_features.output ]) data = load_data(aggregated_descriptors_path_wp) pp_descriptor_path = 'output/pp_descriptor/painting_pp_descriptors_wp.h5' pp_data = [ load_pp_data(pp_descriptor_path, index) for index in range(num_images) ] with open(pca_path + 'pca_matrix_from', 'rb') as file: pca_matrix = pickle.load(file) print "Load completed" compiler = re.compile('\/[^\/]+\.jpg') if flag_d: for subdir, dirs, files in os.walk(test_path): for file in files: image_name = subdir + os.sep + file if image_name.endswith("TransformedImage.jpg"):
# Set up your default hyperparameters hyperparameter_defaults = dict( n_layers=1, n_neurons=128, eta=0.008, lmbda=0.2, alpha=0.1, epochs=20, batch_size=40, # size_training=5000, # size_validation=500, ) # Pass your defaults to wandb.init wandb.init(config=hyperparameter_defaults) # Access all hyperparameter values through wandb.config config = wandb.config training, validation, test = load_data() # architecture = [784, 128, 10] architecture = [784] + [config['n_neurons']] * config['n_layers'] + [10] nn = NeuralNetwork(architecture, eta=config['eta'], lmbda=config['lmbda'], alpha=config['alpha']) nn.train(np.random.permutation(training)[:5000], np.random.permutation(validation)[:500], epochs=config['epochs'], batch_size=config['batch_size'])
ct = torch.nn.MSELoss().cuda() one_hot = True elif args.loss == 'hinge': ct = torch.nn.MultiMarginLoss(p=2) one_hot = False elif args.loss == 'cross_entropy': print('Loss') ct = torch.nn.CrossEntropyLoss() one_hot = False # Load data and model for lr, n_iters in zip(args.lr, args.n_iters): for i in range(args.n_tries): print('==== Start of %d-th Experiment ===' % (i + 1)) trDL, teDL = load_data(args, one_hot=one_hot) net = load_model(args.dataset, args.arch, width=args.width, depth=args.depth) print(net) #net.apply(lambda t: weights_init(t,args.gain,args.init)) optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=args.momentum) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.decay) trainer = Trainer(iter_display=args.iter_display) trainer.set_model(net, ct, optimizer, scheduler) res = trainer.train_sgd(trDL, batch_size=args.batch_size,
def train(image, text, model, criterion, train_loader, teach_forcing_prob=0.5): # optimizer # encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) # for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): # encoder_param.requires_grad = True # decoder_param.requires_grad = True # encoder.train() # decoder.train() optimizer.zero_grad() # for model_param in zip(model.parameters()): # model_param.requires_grad = True model.train() # formula = formulas(int(cpu_texts)) target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # # CNN + BiLSTM # encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() output = model(image, target_variable) # # start decoder for SOS_TOKEN # decoder_input = target_variable[utils.SOS_TOKEN].cuda() # decoder_hidden = decoder.initHidden(batch_size).cuda() # else: # decoder_input = target_variable[utils.SOS_TOKEN] # decoder_hidden = decoder.initHidden(batch_size) # # if i == 28: # # outputs for the test # print(f' decoder_input {0}', decoder_input.shape) # print(f' decoder_hidden{0}', decoder_hidden.shape) # print(f' encoder_outputs{0}', encoder_outputs.shape) # tensor2image(cpu_images[0]) # print(cpu_texts[0]) # print(target_variable[0]) # print(cpu_texts) # print(target_variable) loss = 0.0 p = True if random.random() > teach_forcing_prob else False # print(' teach_forcing: {}'.format(teach_forcing)) # print(' decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size)) # if teach_forcing or decoder_input.shape[0] < cfg.batch_size: # for di in range(1, target_variable.shape[0]): # # # tensor2image(cpu_images[di]) # # print(cpu_texts[di]) # # print(target_variable[di]) # # print([converter.decode(item) for item in target_variable[di]]) # # decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) # decoder_output, decoder_hidden, decoder_attention = model(image) # loss += criterion(decoder_output, target_variable[di]) # decoder_input = target_variable[di] # else: # for di in range(1, target_variable.shape[0]): # decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs) # loss += criterion(decoder_output, target_variable[di]) # topv, topi = decoder_output.data.topk(1) # ni = topi.squeeze() # decoder_input = ni # encoder.zero_grad() # decoder.zero_grad() output_dim = output.shape[-1] # print(output_dim) output = output[1:].view(-1, output_dim) target_variable = target_variable[1:].view(-1) # trg = [(trg len - 1) * batch size] # output = [(trg len - 1) * batch size, output dim] loss = criterion(output, target_variable) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() # encoder_optimizer.step() # decoder_optimizer.step() loss_avg.add(loss) if i % 1 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch + 1, cfg.num_epochs, i + 1, len(train_loader), loss_avg.val())) loss_avg.reset()
def train(cfg_path): cfg = utils.get_cfg(cfg_path) network = cfg['net'] print('prepare network...') w = cfg['width'] h = cfg['height'] if network == 'mobilenetv1': w = int(w * cfg['resolution_multiplier']) h = int(h * cfg['resolution_multiplier']) x = tf.placeholder(dtype=tf.float32, shape=[None, h, w, 3]) y = tf.placeholder(dtype=tf.float32, shape=[None, len(classes_id)]) if network == 'vgg': if cfg['isvgg19'] == 'true': vgg_network = vgg.Vgg(x, len(classes_id), True, cfg['modelpath']) else: vgg_network = vgg.Vgg(x, len(classes_id), False, cfg['modelpath']) predictions, logits = vgg_network.build() loss = vgg_network.losses(y, logits) accurracy = vgg_network.accurracy(y, logits) elif network == 'inceptionv4': predictions, logits = inceptionV4.inception_v4(x, len(classes_id)) loss = inceptionV4.losses(y, logits) accurracy = inceptionV4.accurracy(y, logits) elif network == 'inceptionResnetV2': predictions, logits = inceptionResnetV2.inception_resnet_v2( x, len(classes_id)) loss = inceptionResnetV2.losses(y, logits) accurracy = inceptionResnetV2.accurracy(y, logits) elif network == 'resnetv2': predictions, logits = resnetv2.resnet_v2_50(x, len(classes_id)) loss = resnetv2.losses(y, logits) accurracy = resnetv2.accurracy(y, logits) elif network == 'mobilenetv1': predictions, logits = mobilenetv1.mobilenet_v1( x, len(classes_id), depth_multiplier=cfg['depth_multiplier']) loss = mobilenetv1.losses(y, logits) accurracy = mobilenetv1.accurracy(y, logits) else: loss = 0 accurracy = 0 if tensorboard: tf.summary.scalar('loss', loss) tf.summary.scalar('acc', accurracy) merge = tf.summary.merge_all() writer = tf.summary.FileWriter(logdir='./summary/') if network == 'vgg' and cfg['finetuning'] == 'true': T_list = tf.trainable_variables() V_list = [var for var in T_list if var.name.startswith('fc8')] else: V_list = tf.trainable_variables() if cfg['optimizer'] == 'RMSProp': print('Optimizer is RMSProp') optim = tf.train.RMSPropOptimizer(learning_rate=cfg['learningrate'], epsilon=1.0).minimize( loss, var_list=V_list) else: print('Optimizer is GradientDescent') optim = tf.train.GradientDescentOptimizer( learning_rate=cfg['learningrate']).minimize(loss, var_list=V_list) print('prepare data...') file_list = utils.load_data(cfg['trainpath']) batch = minibatch(file_list, cfg['batchsize'], cfg['width'], cfg['height']) val_list = utils.load_data(cfg['valpath']) val_batch = minibatch(val_list, cfg['batchsize'], cfg['width'], cfg['height']) print('start training...') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if network == 'vgg' and cfg['finetuning'] == 'true': vgg_network.loadModel(sess, True) epoch = 0 iteration = 0 loss_mean = 0 while epoch < epoch_iter: iteration += 1 epoch, images, labels = next(batch) if network == 'vgg': images = images - np.array(cfg['mean']).reshape(1, 1, 1, 3) loss_curr, summary, _ = sess.run([loss, merge, optim], feed_dict={ x: images, y: labels }) loss_mean += loss_curr writer.add_summary(summary, iteration) if (iteration % 500 == 0): print('epoch/iter: [{}/{}], loss_mean: {}'.format( epoch, iteration, loss_mean / 500)) loss_mean = 0 _, val_images, val_labels = next(val_batch) if network == 'vgg': val_images = val_images - np.array(cfg['mean']).reshape( 1, 1, 1, 3) cc = sess.run(accurracy, feed_dict={ x: val_images, y: val_labels }) print('accurracy: {}'.format(cc)) writer.close()
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpuid if args.loss == 'mse': ct = torch.nn.MSELoss().cuda() one_hot = True elif args.loss == 'hinge': ct = torch.nn.MultiMarginLoss(p=2) one_hot = False elif args.loss == 'cross_entropy': ct = torch.nn.CrossEntropyLoss() one_hot = False ##################################### # Process #################################### trDL, teDL = load_data(args, stop=True, one_hot=one_hot) net = load_model(args.dataset, args.arch, width=args.width, depth=args.depth) net.load_state_dict(torch.load(args.model)) # Evaluation trL, trA, trC = eval_accuracy(net, ct, trDL) teL, teA, teC = eval_accuracy(net, ct, teDL) print('===> SOLUTION INFO: ') print('\t train loss: %.2e, acc: %.2f' % (trL, trA)) print('\t test loss: %.2e, acc: %.2f' % (teL, teA)) print('l2 norm %.2e\n' % (weight_norm(net))) print('===> COMPUTE SHARPNESS:') time_start = time.time() mu = eigen_hessian(net, ct, trDL, verbose=True, tol=1e-4, niters=10) time_end = time.time()
def input(self): df_demand, df_weather = load_data(self.demand_filepath, self.weather_filepath) return dict(demand=df_demand, weather=df_weather)
def train_model(model_name='resnet18', num_epochs=1, hidden_sizes=[256], learning_rate=0.003, model_path=None, data_dir='flowers', use_gpu=False, save_dir='checkpoints'): train, trainloader, validloader = load_data(data_dir) output_size = 102 device = torch.device('cuda' if use_gpu else 'cpu') if model_path is None: start = 0 iterations = num_epochs train_losses, valid_losses = [], [] model = None else: # model, optimizer, iterations, train_losses, valid_losses =load_checkpoint(model_path) model_dict = load_checkpoint(model_path) model = model_dict["model"] model = model.to(device) optimizer = model_dict["optimizer"] model_name = model_dict["model_name"] start = model_dict["iterations"] iterations = num_epochs + start train_losses, valid_losses = model_dict["train_losses"], model_dict[ "valid_losses"] print('starting from {} epoch and training {} epoch(s) now'.format( start, num_epochs)) #CHECK: also in load_checkpoint, maybe refactor if model is None and model_name == 'vgg13': model = models.vgg13(pretrained=True) #turn off gradients for the model for param in model.parameters(): param.requires_grad = False input_size = 25088 model.classifier = Network(input_size, output_size, hidden_sizes) optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) elif model is None and model_name == 'resnet18': model = models.resnet18(pretrained=True) #turn off gradients for the model for param in model.parameters(): param.requires_grad = False input_size = 512 model.fc = Network(input_size, output_size, hidden_sizes) optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate) print('-' * 20) print(f"Model name: {model_name}") print(f"Learning_rate: {learning_rate}") print(f"Hidden_units: {hidden_sizes}\n") model.class_to_idx = train.class_to_idx criterion = nn.NLLLoss() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 since = time.time() steps = 0 model.to(device) for epoch in range(start, iterations): print('Epoch {}/{}'.format(epoch + 1, iterations)) print('-' * 10) print("Train losses: {}".format(train_losses)) print("Valid losses: {}".format(valid_losses)) running_loss = 0 model.train() for images, labels in trainloader: since_train_step = time.time() steps += 1 # Move input and label tensors to the GPU images, labels = images.to(device), labels.to(device) model.train() optimizer.zero_grad() with torch.set_grad_enabled(True): log_ps = model(images) loss = criterion(log_ps, labels) loss.backward() optimizer.step() running_loss += loss.item() print("Time per train step {}/{}: {}".format( steps, len(trainloader), time.time() - since_train_step)) else: # Model in inference mode, dropout is off model.eval() # Turn off gradients for validation, will speed up inference with torch.no_grad(): valid_loss, accuracy = validate_model(model, validloader, criterion, device) train_losses.append(round(running_loss / len(trainloader), 3)) valid_losses.append(round(valid_loss / len(validloader), 3)) if accuracy > best_acc: best_acc = accuracy best_model_wts = copy.deepcopy(model.state_dict()) print( "Epoch: {}/{}.. ".format(epoch + 1, iterations), "Training Loss: {:.3f}.. ".format(running_loss / len(trainloader)), "Test Loss: {:.3f}.. ".format(valid_loss / len(validloader)), "Test Accuracy: {:.3f}..".format(accuracy / len(validloader))) running_loss = 0 steps = 0 # Make sure dropout and grads are on for training model.train() # load best model weights model.load_state_dict(best_model_wts) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # Save the model to checkpoint checkpoint = { 'hidden_sizes': hidden_sizes, 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer, 'optimizer_dict': optimizer.state_dict(), 'class_to_idx': model.class_to_idx, 'iterations': iterations, 'learning_rate': learning_rate, 'train_losses': train_losses, 'valid_losses': valid_losses, 'model_name': model_name } checkpoint_filename = "".join( ["checkpoint_", model_name, "_", str(iterations), "epochs.pth"]) if save_dir is not None: torch.save(checkpoint, '{}/{}'.format(save_dir, checkpoint_filename)) else: torch.save(checkpoint, checkpoint_filename) return model
def analysis(STATE, method, method_kwargs, hyperparams_to_test, fig, spec, row, precomputed=False, separate=False, two_cols=False, NUM_STATES=1, configurations=None, default_cluster_num=5): #First, define appropriate paths SHAPE_PATH, FIGURE_PATH, RAW_DATA_PATH, INCOME_POPULATION_PATH = define_paths( STATE) #Load the data covid_, X, index_X, columns_X = load_data(RAW_DATA_PATH) #Do dim red print('##################D-RED#################') emb_method = method if not precomputed: errors_results, embeddings_results, trustws_results = choose_dimension( X, emb_method, hyperparams_to_test, **method_kwargs) save_obj(embeddings_results, STATE + '_embeddings_results' + method.__name__) save_obj(errors_results, STATE + '_errors_results' + method.__name__) save_obj(trustws_results, STATE + '_trustws_result' + method.__name__) if precomputed: embeddings_results = load_obj(STATE + '_embeddings_results' + method.__name__) errors_results = load_obj(STATE + '_errors_results' + method.__name__) trustws_results = load_obj(STATE + '_trustws_result' + method.__name__) if (len(hyperparams_to_test['n_components']) > 1) and (errors_results['n_components'][0] is not None): plt.plot(hyperparams_to_test['n_components'], errors_results['n_components']) if (len(hyperparams_to_test['n_components']) > 1): kneedle = KneeLocator(hyperparams_to_test['n_components'], np.array(trustws_results['n_components']), S=1, curve='concave', direction='increasing', interp_method='polynomial', online=False) kneedle.plot_knee() plt.title(emb_method.__name__ + ' trustworthiness') plt.xlabel('n_components') plt.ylabel('trustworhiness') kneedle.knee, kneedle.knee_y #Save the dataframe with optimal dim if (len(hyperparams_to_test['n_components']) > 1): good_dim = int( np.squeeze( np.where(hyperparams_to_test['n_components'] == kneedle.knee))) else: good_dim = 0 X_method = embeddings_results['n_components'][ good_dim] #pick the best (knee point) n_components X_method_df = pd.DataFrame( X_method, columns=['Mode {}'.format(i) for i in range(X_method.shape[1])]) #, index = index_X) X_method_df.to_csv( os.path.join( configurations['DATA_PATH'], 'interim', method.__name__ + str(X_method.shape[1]) + 'D_' + STATE + '.csv')) print('Saving optimal embedding. Method: ', method.__name__, 'shape: ', X_method_df.shape) print('##################INITIAL VIZ#################') #Find the 2D and 3D embeddings and continuous colors based on that filename_initial = os.path.join(FIGURE_PATH, 'initial_' + method.__name__) if method.__name__ == 'Isomap': viz = viz_Isomap if method.__name__ == 'SpectralEmbedding': viz = viz_SE if method.__name__ == 'LocallyLinearEmbedding': viz = viz_LLE if precomputed: load_path = os.path.join('obj', STATE) save_path = None else: load_path = None save_path = os.path.join('obj', STATE) X_2D_emb, X_3D_emb = viz(X, colors=None, filename=filename_initial, alpha=0.5, load_path=load_path, save_path=save_path) cos_colors = find_cos_similarity(X_2D_emb) #Color the manifold continuously filename_initial_colored = os.path.join( FIGURE_PATH, 'initial_' + method.__name__ + '_colored') X_2D_emb, X_3D_emb = viz(X, colors=cos_colors, filename=filename_initial_colored, cbar=None, alpha=0.5, load_path=load_path, save_path=save_path) print('##################GMM CLUSTERING#################') #Import R for clustering base = importr('base') mclust = importr('mclust') ro.r('set.seed(1)') dontprecomputeclusters = not precomputed # if not precomputed: if dontprecomputeclusters: clusters, means, z, uncertainty = GMM_clustering_R( X_method_df, method, default_cluster_num=default_cluster_num ) #could change this to 5 to be consistent across states to auto-id clust # clusters_block_indexed = pd.Series(clusters, index=index_X) avg_per_clust = create_avg_df(clusters, index_X, covid_) reordered_clusters, reordered_means, reordered_z, reordered_uncertainty = relabel_clusters( clusters.astype('int'), avg_per_clust, means, z, uncertainty) reordered_avg_per_clust = create_avg_df(reordered_clusters, index_X, covid_) #Save np.save( os.path.join('obj', STATE + '_reordered_clusters.npy'), reordered_clusters, ) reordered_means.to_csv( os.path.join('obj', STATE + '_reordered_means.csv')) reordered_z.to_csv(os.path.join('obj', STATE + '_reordered_z.csv')) np.save(os.path.join('obj', STATE + '_reordered_uncertainty.npy'), reordered_uncertainty) reordered_avg_per_clust.to_csv( os.path.join('obj', STATE + '_reordered_avg_per_clust.csv')) # if precomputed: if not dontprecomputeclusters: reordered_clusters = np.load( os.path.join('obj', STATE + '_reordered_clusters.npy')) reordered_means = pd.read_csv(os.path.join( 'obj', STATE + '_reordered_means.csv'), index_col=0) reordered_z = pd.read_csv(os.path.join('obj', STATE + '_reordered_z.csv'), index_col=0) reordered_uncertainty = np.load( os.path.join('obj', STATE + '_reordered_uncertainty.npy')) reordered_avg_per_clust = pd.read_csv(os.path.join( 'obj', STATE + '_reordered_avg_per_clust.csv'), index_col=0) #Save the data for Dennis (for only this method) index_with_blocks_and_save(STATE, X_method_df, X_2D_emb, X_3D_emb, reordered_clusters, reordered_z, reordered_uncertainty, index_X, emb_method) N_TIMESERIES = 5 closest_to_mean_samples, closest_to_mean_block_ids = find_closest_time_series( X_method_df, reordered_means, covid_, index_X, n=N_TIMESERIES) print('##################FINAL VIZ#################') sns.set(style="whitegrid") if two_cols: reordered_clusters = cos_colors #Change colors add_state_to_fig(STATE, fig, spec, row, NUM_STATES, X, reordered_clusters, index_X, reordered_avg_per_clust, load_path=load_path, save_path=save_path, separate=separate, two_cols=two_cols, configurations=configurations)
def train(image, text, encoder, decoder, criterion, train_loader, teach_forcing_prob=0.5): # optimizer encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.learning_rate, betas=(0.5, 0.999)) # loss averager loss_avg = utils.Averager() for epoch in range(cfg.num_epochs): train_iter = iter(train_loader) for i in range(len(train_loader)): encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() cpu_images, cpu_texts = train_iter.next() batch_size = cpu_images.size(0) for encoder_param, decoder_param in zip(encoder.parameters(), decoder.parameters()): encoder_param.requires_grad = True decoder_param.requires_grad = True encoder.train() decoder.train() # formula = formulas(int(cpu_texts)) target_variable = converter.encode(cpu_texts) utils.load_data(image, cpu_images) # CNN + BiLSTM encoder_outputs = encoder(image) if torch.cuda.is_available(): target_variable = target_variable.cuda() # start decoder for SOS_TOKEN decoder_input = target_variable[utils.SOS_TOKEN].cuda() decoder_hidden = decoder.initHidden(batch_size).cuda() else: decoder_input = target_variable[utils.SOS_TOKEN] decoder_hidden = decoder.initHidden(batch_size) # # if i == 28: # # outputs for the test # print(f' decoder_input {0}', decoder_input.shape) # print(f' decoder_hidden{0}', decoder_hidden.shape) # print(f' encoder_outputs{0}', encoder_outputs.shape) # tensor2image(cpu_images[0]) # print(cpu_texts[0]) # print(target_variable[0]) loss = 0.0 teach_forcing = True if random.random( ) > teach_forcing_prob else False # print(' teach_forcing: {}'.format(teach_forcing)) # print(' decoder_input.shape[0] {}, batch_size {}, batch_size condition: {}'.format(decoder_input.shape[0], batch_size, decoder_input.shape[0] < batch_size)) if teach_forcing or decoder_input.shape[0] < cfg.batch_size: for di in range(1, target_variable.shape[0]): # tensor2image(cpu_images[di]) # print(cpu_texts[di]) # print(target_variable[di]) # print([converter.decode(item) for item in target_variable[di]]) decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] else: for di in range(1, target_variable.shape[0]): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_variable[di]) topv, topi = decoder_output.data.topk(1) ni = topi.squeeze() decoder_input = ni # print('predict: {}'.format(converter.decode(ni[0]))) # print('target: {}'.format(converter.decode(target_variable[di][0]))) encoder.zero_grad() decoder.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip) # torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip) encoder_optimizer.step() decoder_optimizer.step() loss_avg.add(loss) if i % 1 == 0: print('[Epoch {0}/{1}] [Batch {2}/{3}] Loss: {4}'.format( epoch + 1, cfg.num_epochs, i + 1, len(train_loader), loss_avg.val())) loss_avg.reset() # save checkpoint torch.save(encoder.state_dict(), '{0}/encoder_{1}.pth'.format(cfg.model, epoch)) torch.save(decoder.state_dict(), '{0}/decoder_{1}.pth'.format(cfg.model, epoch))
def main(arguments=None): """The main function Entry point. """ global loss_func global best_acc best_acc = 0 global args # Setting the hyper parameters parser = argparse.ArgumentParser(description='Example of Capsule Network') parser.add_argument('--epochs', type=int, default=10, help='number of training epochs. default=10') parser.add_argument('--lr', type=float, default=0.001, help='learning rate. default=0.01') parser.add_argument('--batch-size', type=int, default=128, help='training batch size. default=128') parser.add_argument('--test-batch-size', type=int, default=128, help='testing batch size. default=128') parser.add_argument( '--log-interval', type=int, default=10, help= 'how many batches to wait before logging training status. default=10') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training. default=false') parser.add_argument('--device', type=str, default='cuda:0', help='select the gpu. default=cuda:0') parser.add_argument( '--threads', type=int, default=4, help='number of threads for data loader to use. default=4') parser.add_argument('--seed', type=int, default=42, help='random seed for training. default=42') parser.add_argument( '--num_conv_in_channels', type=int, default=1, help='number of channels in input to first Conv Layer. default=1') parser.add_argument( '--num_conv_out_channels', type=int, default=256, help='number of channels in output from first Conv Layer. default=256' ) parser.add_argument('--conv-kernel', type=int, default=9, help='kernel size of Conv Layer. default=9') parser.add_argument('--conv-stride', type=int, default=1, help='stride of first Conv Layer. default=1') parser.add_argument( '--num-primary-channels', type=int, default=32, help='channels produced by PimaryCaps layer. default=32') parser.add_argument( '--primary-caps-dim', type=int, default=8, help='dimension of capsules in PrimaryCaps layer. default=8') parser.add_argument( '--primary-kernel', type=int, default=9, help='kernel dimension for PrimaryCaps layer. default=9') parser.add_argument('--primary-stride', type=int, default=2, help='stride for PrimaryCaps layer. default=2') parser.add_argument('--num-classes', type=int, default=10, help='number of output classes. default=10 for MNIST') parser.add_argument( '--digit-caps-dim', type=int, default=16, help='dimension of capsules in DigitCaps layer. default=16') parser.add_argument( '--dec1-dim', type=int, default=512, help='output dimension of first layer in decoder. default=512') parser.add_argument( '--dec2-dim', type=int, default=1024, help='output dimension of seconda layer in decoder. default=1024') parser.add_argument('--num-routing', type=int, default=3, help='number of routing iteration. default=3') parser.add_argument( '--use-reconstruction-loss', type=utils.str2bool, nargs='?', default=True, help='use an additional reconstruction loss. default=True') parser.add_argument( '--regularization-scale', type=float, default=0.0005, help= 'regularization coefficient for reconstruction loss. default=0.0005') parser.add_argument('--dataset', help='the name of dataset (mnist, cifar10)', default='mnist') parser.add_argument( '--input-width', type=int, default=28, help='input image width to the convolution. default=28 for MNIST') parser.add_argument( '--input-height', type=int, default=28, help='input image height to the convolution. default=28 for MNIST') parser.add_argument('--directory', type=str, default=PROJECT_DIR / 'results', help='directory to store results') parser.add_argument('--data-directory', type=str, default=PROJECT_DIR / 'data', help='directory to store data') parser.add_argument('--description', type=str, default='no description', help='description to store together with results') parser.add_argument('--exp-decay-lr', action='store_true', default=False, help='use exponential decay of learning rate') parser.add_argument( '--decay-steps', type=int, default=4000, help= 'decay steps for exponential learning rate adjustment. default = 2000' ) parser.add_argument( '--decay-rate', type=float, default=0.96, help= 'decay rate for exponential learning rate adjustment. default=1 (no adjustment)' ) parser.add_argument('--staircase', action='store_true', default=False, help='activate staircase for learning rate adjustment') # one cycle policy parser.add_argument('--one-cycle-policy', action='store_true', default=False, help='use one cycle policy for learning rate') # warm restarts parser.add_argument('--warm-restarts', action='store_true', default=False, help='use warm restarts of the learning rate') parser.add_argument( '--Ti', type=float, default=10.0, help='number of epochs of a cycle of the warm restarts') parser.add_argument('--Tmult', type=float, default=1.0, help='multiplier factor for the warm restarts') # adaptive batch size parser.add_argument('--adabatch', action='store_true', default=False, help='activate adabatch. default False') parser.add_argument('--adapow', type=int, default=2, help='power of two for adabatch size') # weight sharing parser.add_argument('--conv-shared-weights', type=int, default=0) parser.add_argument('--primary-shared-weights', type=int, default=0) parser.add_argument('--digit-shared-weights', type=int, default=0) parser.add_argument('--conv-shared-bias', type=int, default=0) # small decoder parser.add_argument( '--small-decoder', action='store_true', default=False, help='enables the small decoder instead of the standard one') # restart option parser.add_argument('--restart-training', action='store_true', default=False) # squash approx parser.add_argument('--squash-approx', action='store_true', default=False) # find best learning rate interval parser.add_argument('--find-lr', action='store_true', default=False, help='train to find the best learning rate') # normalize or not the inputs to the net (not normalized is better) parser.add_argument('--normalize-input', action='store_true', default=False, help='enables normalization and disables random ' 'cropping the ' 'inputs with padding 2') # use new / old version of the model parser.add_argument('--old-model', action='store_true', default=False, help='uses old model') args = parser.parse_args(args=arguments) args.directory = pathlib.Path(args.directory) print(args) if args.old_model: from src.model.model import Net import src.model.functions as func ModelToUse = Net def loss_func(output, target, regularization_scale, reconstruction, data, device, batch_size): return func.loss(output, reconstruction, target, data, regularization_scale, device) else: from src.model.layers import CapsNet from src.model.layers import loss_func as loss_func_internal ModelToUse = CapsNet def loss_func(output, target, regularization_scale, reconstruction, data, device, batch_size): return loss_func_internal(output, target, regularization_scale, reconstruction, data.view(batch_size, -1), device) # Check GPU or CUDA is available args.cuda = not args.no_cuda and torch.cuda.is_available() if not args.cuda: args.device = 'cpu' # Get reproducible results by manually seed the random number generator torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data train_loader, test_loader = utils.load_data(args) if args.adabatch: temp_bs = args.batch_size args.batch_size = 2**(args.adapow) train_loader1, _ = utils.load_data(args) args.batch_size = 2**(args.adapow) train_loader2, _ = utils.load_data(args) args.batch_size = 2**(args.adapow) train_loader3, _ = utils.load_data(args) args.batch_size = temp_bs # Build Capsule Network print('===> Building model') model = ModelToUse(input_wh=args.input_width, num_conv_in_channels=args.num_conv_in_channels, num_conv_out_channels=args.num_conv_out_channels, conv_kernel=args.conv_kernel, conv_stride=args.conv_stride, num_primary_channels=args.num_primary_channels, primary_caps_dim=args.primary_caps_dim, primary_kernel=args.primary_kernel, primary_stride=args.primary_stride, num_classes=args.num_classes, digit_caps_dim=args.digit_caps_dim, iter=args.num_routing, dec1_dim=args.dec1_dim, dec2_dim=args.dec2_dim, cuda_enabled=args.cuda, device=args.device, regularization_scale=args.regularization_scale, conv_shared_weights=args.conv_shared_weights, primary_shared_weights=args.primary_shared_weights, digit_shared_weights=args.digit_shared_weights, conv_shared_bias=args.conv_shared_bias, small_decoder=args.small_decoder, squash_approx=args.squash_approx) # Optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # optimizer = optim.SGD(model.parameters(), lr=args.lr) lr_wr = utils.custom_warm_restarts(args.lr, args.lr * 10) starting_epoch = 1 if args.cuda: print('Utilize GPUs for computation') print('Number of GPU available', torch.cuda.device_count()) model.to(args.device) cudnn.benchmark = True model = torch.nn.DataParallel(model) args.file_flag = 'w' if args.restart_training: args.file_flag = args.file_flag p = pathlib.Path(args.directory) / + 'trained_model' if p.exists(): l = sorted(list(p.iterdir())) if l: f = l[-1] pckl = utils.load(str(f)) model.load_state_dict(pckl['model_state_dict']) optimizer.load_state_dict(pckl['optimizer_state_dict']) lr_wr.__dict__ = pckl['lr_wr'] starting_epoch = pckl['epoch'] # Print the model architecture and parameters print('Model architectures:\n{}\n'.format(model)) print('Parameters and size:') for name, param in model.named_parameters(): print('{}: {}'.format(name, list(param.size()))) # CapsNet has: # - 8.2M parameters and 6.8M parameters without the reconstruction subnet on MNIST. # - 11.8M parameters and 8.0M parameters without the reconstruction subnet on CIFAR10. num_params = sum([param.nelement() for param in model.parameters()]) # The coupling coefficients c_ij are not included in the parameter list, # we need to add them manually, which is 1152 * 10 = 11520 (on MNIST) or 2048 * 10 (on CIFAR10) print('\nTotal number of parameters: {}\n'.format(num_params + ( 11520 if args.dataset in ('mnist', 'fashionmnist') else 20480))) # Make model checkpoint directory if not (args.directory / 'trained_model').is_dir(): (args.directory / 'trained_model').mkdir(parents=True, exist_ok=True) # files to store accuracies and losses train_mloss = args.directory / 'train_margin_loss.txt' train_rloss = args.directory / 'train_reconstruction_loss.txt' train_acc = args.directory / 'train_accuracy.txt' test_mloss = args.directory / 'test_margin_loss.txt' test_rloss = args.directory / 'test_reconstruction_loss.txt' test_acc = args.directory / 'test_accuracy.txt' learning_rate = args.directory / 'learning_rate.txt' output_tensor = args.directory / 'output_tensor.txt' n_parameters = args.directory / 'n_parameters.txt' with open(n_parameters, args.file_flag) as f: f.write('{}\n'.format(num_params + (11520 if args.dataset == 'mnist' else 20480))) arguments_file = args.directory / 'arguments.txt' with open(arguments_file, args.file_flag) as f: pprint.pprint(args.__dict__, stream=f) description = args.directory / 'details.txt' description = open(description, args.file_flag) description.write(args.description) description.close() train_mloss = open(train_mloss, args.file_flag) train_rloss = open(train_rloss, args.file_flag) train_acc = open(train_acc, args.file_flag) test_mloss = open(test_mloss, args.file_flag) test_rloss = open(test_rloss, args.file_flag) test_acc = open(test_acc, args.file_flag) learning_rate = open(learning_rate, args.file_flag) output_tensor = open(output_tensor, args.file_flag) utils.dump(utils.make_dataset_obj(locals(), globals()), args.directory / 'trained_model' / 'dataset') # Train and test try: for epoch in range(starting_epoch, args.epochs + 1): if not args.adabatch: train(model, train_loader, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor) test(model, test_loader, len(train_loader), epoch, test_mloss, test_rloss, test_acc, args.directory) else: if (1 <= epoch <= 3): train(model, train_loader, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor) test(model, test_loader, len(train_loader), epoch, test_mloss, test_rloss, test_acc, args.directory) elif (4 <= epoch <= 33): args.batch_size = 2**(args.adapow) train(model, train_loader1, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor) test(model, test_loader, len(train_loader), epoch, test_mloss, test_rloss, test_acc, args.directory) elif (34 <= epoch <= 63): args.batch_size = 2**(args.adapow + 1) train(model, train_loader2, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor) test(model, test_loader, len(train_loader), epoch, test_mloss, test_rloss, test_acc, args.directory) else: args.batch_size = 2**(args.adapow + 2) train(model, train_loader3, optimizer, epoch, train_mloss, train_rloss, train_acc, learning_rate, lr_wr, output_tensor) test(model, test_loader, len(train_loader), epoch, test_mloss, test_rloss, test_acc, args.directory) train_mloss.flush() train_rloss.flush() train_acc.flush() test_mloss.flush() test_rloss.flush() test_acc.flush() learning_rate.flush() output_tensor.flush() # Save model checkpoint utils.checkpoint(utils.make_partial_checkpoint_obj( locals(), globals()), epoch, directory=args.directory) except KeyboardInterrupt: print("\n\n\nKeyboardInterrupt, Interrupting...") train_mloss.close() train_rloss.close() train_acc.close() test_mloss.close() test_rloss.close() test_acc.close() learning_rate.close() output_tensor.close() with open(args.directory / 'best_accuracy.txt', args.file_flag) as f: f.write("%.10f,%d\n" % (best_acc, best_acc_epoch)) print('\n\nBest Accuracy: ' + str(best_acc) + '%%\nReached at epoch: %d\n\n' % best_acc_epoch) global avg_training_time_per_epoch global avg_testing_time_per_epoch with open(args.directory / 'average_training_time_per_epoch.txt', args.file_flag) as f: f.write("%.10f\n" % avg_training_time_per_epoch) print('Average time per training epoch: %.10f\n\n' % avg_training_time_per_epoch) with open(args.directory / 'average_testing_time_per_epoch.txt', args.file_flag) as f: f.write("%.10f\n" % avg_testing_time_per_epoch) print('Average time per testing epoch: %.10f\n\n' % avg_testing_time_per_epoch)
def run_experiment(params): # parameters for crowd simulation crowd_acc = params['crowd_acc'] max_votes_per_item = params['max_votes_per_item'] # df_to_print = pd.DataFrame() for budget_per_item in params['budget_per_item']: for switch_point in params['policy_switch_point']: print('Policy switch point: {}'.format(switch_point)) print('Budget per item: {}'.format(budget_per_item)) print('************************************') results_list = [] for experiment_id in range(params['experiment_nums']): ## load and transform input data X, y = load_data(params['dataset_file_name']) vectorizer = Vectorizer() X = vectorizer.fit_transform(X) ## initialize policy items_num = y.shape[0] B = items_num * budget_per_item policy = PointSwitchPolicy(B, switch_point) ## initialize crowd votes counter, prior probs crowd_votes_counts, prior_prob = {}, {} for item_id in range(items_num): crowd_votes_counts[item_id] = {'in': 0, 'out': 0} prior_prob[item_id] = {'in': 0.5, 'out': 0.5} ## assign default positive label for all items to classify item_labels = {item_id: 1 for item_id in range(items_num) } # classify all items as in by default unclassified_item_ids = np.arange( items_num) # ids of unclassified so far items item_ids_helper = np.arange( items_num ) # to track item ids in AL pool and map them to real item ids ## ** START ACTIVE LEARNING ** ## # Run Active Learning Box if Budget is available for Active Learning part if switch_point != 0: L, item_ids_helper = configure_al_box( params, crowd_votes_counts, item_labels, item_ids_helper, X, y) policy.update_budget_al(params['size_init_train_data'] * max_votes_per_item / 2) i = 0 while policy.is_continue_al: ## query items to annotate query_idx = L.query() ## crowdsource queried items gt_items_queried = L.y_pool[query_idx] ## TODO: use max_votes_per_item to compute cost_round (like SM-run) y_crowdsourced, cost_round = CrowdSimulator.crowdsource_items( item_ids_helper[query_idx], gt_items_queried, crowd_acc, max_votes_per_item, crowd_votes_counts) ## Retrain AL with new data L.teach(query_idx, y_crowdsourced) item_ids_helper = np.delete(item_ids_helper, query_idx) ## update budget spent policy.update_budget_al(cost_round) i += 1 ## measure performance pre_, rec_, f_, _ = precision_recall_fscore_support( y, L.learner.predict(X), beta=params['beta'], average='binary') print(pre_, rec_, f_) ## at this stage we do not allow to classify items by machines ## we use machins prob output as prior for item_id in range(items_num): prediction = L.learner.predict_proba([X[item_id] ]).flatten() prior_prob[item_id] = { 'in': prediction[1], 'out': prediction[0] } print('experiment_id {}, AL-Box finished'.format( experiment_id), end=', ')
def local_load_data(dataset): print("checking if {} is present...".format(dataset)) path = os.path.join(file_dir, "../data") load_data(dataset, path=path) print("Confirmed: {} is present.".format(dataset))
def main(argv=None): print("Loading training data..") train_data = load_data(FLAGS.train_prefix) print("Done loading training data..") train(train_data)