def load_encoder(images, MP, nbatch, param_name="encoder_params_best"): lat_mean_layer, lat_var_layer, out = enc.make_encoder( [nbatch, MP["datsize"]], MP["n_lat"], MP["MVG"], MP["n_gauss_dim"], MP["CNN"]) #load parameters enc_parameters = utils.load_obj(MP["dirname"] + param_name) L.layers.set_all_param_values(out, enc_parameters) if MP["MVG"]: lat_var = [ L.layers.get_output(k, inputs=images) for k in lat_var_layer ] else: lat_var = L.layers.get_output(lat_var_layer, inputs=images) var, trans = variance.get_var_mat(lat_var, MP["MVG"]) mean = L.layers.get_output(lat_mean_layer, inputs=images) #so I need to get the variance and mean lfunc = theano.function([images], [mean, var, trans], allow_input_downcast=True) return lfunc
def encoder_response_function(images, MP, nbatch, param_name="encoder_params_best"): lat_mean_layer, lat_var_layer, out = enc.make_encoder( [nbatch, MP["datsize"]], MP["n_lat"], MP["MVG"], MP["n_gauss_dim"], MP["CNN"]) #load parameters enc_parameters = utils.load_obj(MP["dirname"] + param_name) L.layers.set_all_param_values(out, enc_parameters) intermediate = L.layers.get_all_layers(lat_mean_layer) int_resp = [L.layers.get_output(l, inputs=images) for l in intermediate] lfunc = theano.function([images], int_resp, allow_input_downcast=True, on_unused_input='ignore') return lfunc, len(int_resp)
def run_fcnn_model(param_dict, generators, opts, opt_key, models, eta, pandas_save=True, save_weights=True, epoch_batch_size=10): assert (epoch_batch_size >= 1) model_key = "fcnn" if (param_dict["run_verbosity"] > 0): print("Using hidden size {} and optimizer {}...".format( param_dict["hidden_size"], opt_key)) # Set up the input to accept FMA images: inp = keras.layers.Input(shape=( 256, 256, 3, )) # Add a flatten layer to make the input play nicely with these non-convolutional # layers: x = keras.layers.Flatten()(inp) # Add a Flatten/Affine/BatchNorm/ReLU/Dropout/Affine-softmax-categorization block: predict = stack_two_layer_block(param_dict, x) # Construct the model: models[model_key] = keras.models.Model(inp, predict) # Compile the model models[model_key].compile(optimizer=opts[opt_key], **param_dict["compile_args"]) fcnn_pass_epochs = param_dict[ "pass_epochs"] * 6 # Because all the other networks # train in multiple passes # Train the model: timer.tic() run_started = timer.datetimestamp() if (param_dict["run_verbosity"] > 0): print(("Fully connected network run begun at {}." "\n\t[{} epochs on {} FMA on {} takes" "\n\t{}.]\n").format(run_started, fcnn_pass_epochs, param_dict["which_size"], param_dict["spu"].upper(), eta)) initial_epoch = 0 results = None # This loop of multiple checkpoints helps with memory management, which is # probably not necessary for FCNN but is included just in case - see also # http://bit.ly/2hDHJay for more information. while initial_epoch < fcnn_pass_epochs: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, fcnn_pass_epochs) if (param_dict["run_verbosity"] > 0): print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new # Now start from where we stopped on this round initial_epoch = final_epoch runsec = timer.toc() # Create a new row for these results: if (pandas_save): new_res = pd.Series() assign_run_key(new_res, param_dict, run_started) assign_opt_key(new_res, opt_key) train_acc, val_acc = assign_results_history(new_res, model_key, runsec, results) # Add this to the results dataframe: try: fma_results = ut.load_obj(param_dict["fma_results_name"]) except: fma_results = pd.DataFrame(dtype=float, columns=RESULTS_COLS) fma_results = fma_results.append(new_res, ignore_index="True") # And save: ut.save_obj(fma_results, param_dict["fma_results_name"]) else: train_acc = results.history["categorical_accuracy"][-1] val_acc = results.history["val_categorical_accuracy"][-1] if (param_dict["run_verbosity"] > 0): print(("\n{} for {} to yield {:0.1%} training accuracy " "and {:0.1%} validation accuracy in {:d} \nepochs " "(x3 training phases).").format( timer.time_from_sec(runsec), param_dict["model_names"][model_key], train_acc, val_acc, param_dict["pass_epochs"])) # Save trained weights: if save_weights: weights_save_name = os.path.join( "saved_weights", "{}_{}_{}_{}.h5".format( model_key, formatted(opt_key), # 4 elements formatted(run_key(param_dict, run_started)), timer.datetimepath())) ut.ensure_dir(weights_save_name) models[model_key].save_weights(weights_save_name) if (param_dict["run_verbosity"] > 0): print("\nFully connected run complete at {}.".format( timer.datetimestamp())) # Tell keras to clear the the tensorflow backend session (helps with memory leaks; # see: http://bit.ly/2xJZbAt ) if param_dict[ "run_verbosity"] > 0: # i.e. this is not a short-running/crossval run-- # can't reset during crossval because tensorflow # will get cross about the optimizer being # created on a different graph... print("Clearing keras's backend Tensorflow session...\n") K.clear_session() if (pandas_save): return new_res
def run_pretrained_model(param_dict, generators, models, opts, model_class, model_key, opt_key, print_layers, freeze_to, eta, save_weights=True, epoch_batch_size=5): assert (epoch_batch_size >= 1) try: len(freeze_to) except: freeze_to = [freeze_to] # turn single elements into a one-item list print("Using optimizer {}...".format(opt_key)) timer.tic() run_started = timer.datetimestamp() print(("{} run begun at {}." "\n\t[{} epochs (x{} passes) on {} FMA on {} takes" "\n\t{}.]\n").format(param_dict["model_names"][model_key], run_started, param_dict["pass_epochs"], len(freeze_to) + 1, param_dict["which_size"], param_dict["spu"].upper(), eta)) # Adapted from https://keras.io/applications/ # Get the pre-trained base model, without the top layer (because our input is a # different shape), using the trained weights for ImageNet, to use as a starting # point: basemodel = model_class(include_top=False, input_shape=param_dict["mean_img"].shape, weights='imagenet') x = basemodel.output # Add a global spatial average pooling layer at the output for regularization and # to reduce overfitting: x = keras.layers.GlobalAveragePooling2D()(x) # Add Affine/BatchNorm/ReLU/Dropout/Affine-softmax-categorization block: predict = stack_two_layer_block(param_dict, x) # Now make the model: models[model_key] = keras.models.Model(basemodel.input, predict) # Train only the top layers (which were randomly initialized) while freezing # all convolutional layers (which were pretrained on ImageNet): for layer in basemodel.layers: layer.trainable = False # Compile the model (must be done after setting layer trainability): models[model_key].compile(optimizer=opts[opt_key], **param_dict["compile_args"]) # Train just the classifier for the requested number of epochs: print("First-round training (training the classifier)...") initial_epoch = 0 results = None # This loop of multiple checkpoints helps with memory management, esp. for VGG16/ # VGG19, which have a huge number of parameters - see also # http://bit.ly/2hDHJay for more information. while initial_epoch < param_dict["pass_epochs"]: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, param_dict["pass_epochs"]) print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new # Now start from where we stopped on this round initial_epoch = final_epoch # At this point, the top layers are well trained and we can start fine-tuning # convolutional layers from Xception. We will freeze the bottom N layers # and train the remaining top layers. # Visualize layer names and layer indices to see how many layers we should freeze: if print_layers: for i, layer in enumerate(models[model_key].layers): print(i, layer.name) pass_num = 1 for freeze in freeze_to: pass_num += 1 # Freeze all layers up to the specified value; unfreeze everything # after (and including): for layer in models[model_key].layers[:freeze]: layer.trainable = False for layer in models[model_key].layers[freeze:]: layer.trainable = True # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate because SGD trains more slowly than RMSprop # (a good thing, in this case): models[model_key].compile(optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9), **param_dict["compile_args"]) # Train again for the requested number of epochs: print(( "\n\nFurther training (refining convolutional blocks, starting with" "\n\tlayer {})...").format(freeze)) while initial_epoch < pass_num * param_dict["pass_epochs"]: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, pass_num * param_dict["pass_epochs"]) print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new initial_epoch = final_epoch runsec = timer.toc() # Create a new row for these results: new_res = pd.Series() assign_run_key(new_res, param_dict, run_started) assign_opt_key(new_res, opt_key) train_acc, val_acc = assign_results_history(new_res, model_key, runsec, results) # Add this to the results dataframe: try: fma_results = ut.load_obj(param_dict["fma_results_name"]) except: fma_results = pd.DataFrame(dtype=float, columns=RESULTS_COLS) fma_results = fma_results.append(new_res, ignore_index=True) # And save: ut.save_obj(fma_results, param_dict["fma_results_name"]) print( ("\n{} for {} to yield {:0.1%} training accuracy " "and {:0.1%} validation accuracy in {:d} \nepochs " "(x{} training phases).").format(timer.time_from_sec(runsec), param_dict["model_names"][model_key], train_acc, val_acc, param_dict["pass_epochs"], len(freeze_to) + 1)) # Save trained weights: if save_weights: weights_save_name = os.path.join( "saved_weights", "{}_{}_{}_{}.h5".format( model_key, formatted(opt_key), # 4 elements formatted(run_key(param_dict, run_started)), timer.datetimepath())) ut.ensure_dir(weights_save_name) models[model_key].save_weights(weights_save_name) print("\n{} run complete at {}.".format( param_dict["model_names"][model_key], timer.datetimestamp())) # Tell keras to clear the the tensorflow backend session (helps with memory leaks; # see: http://bit.ly/2xJZbAt ) print("Clearing keras's backend Tensorflow session...\n") K.clear_session()
def get_encoder_parameters(MP, param_name="encoder_params_best"): enc_parameters = utils.load_obj(MP["dirname"] + param_name) return [param.get_value() for param in enc_parameters]
def run(dirname, save_weights, test_gratings, RF_comp, test_loglik, train_loglik, plot_loglik, plot_train_loglik, save_test_latents, n_data_samp, n_ais_step, n_prior_samp, n_hast_step, eps, n_ham_step, use_prior, full, fast, seed, AIS_test): np.random.seed(seed) LOG = log.log(dirname + "/analysis_log.log") MP = utils.load_obj(dirname + "model_params") n_pca = int((MP["patch_size"]**2) * MP["pca_frac"]) #this is to handle legacy data files that didn't have the CNN keyword if "CNN" not in MP.keys(): MP["CNN"] = False if MP["CNN"]: datsize = MP["patch_size"]**2 else: datsize = n_pca n_lat = int(n_pca * MP["overcomplete"]) MP["n_lat"] = n_lat MP["n_pca"] = n_pca MP["datsize"] = datsize MP["dirname"] = dirname for x in MP.keys(): print("{}\t{}".format(x, MP[x])) train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca, MP["dataset"], MP["whiten"], MP["CNN"]) LOG.log("Train Shape:\t{}".format(train.shape)) LOG.log("Test Shape:\t{}".format(test.shape)) LOG.log("Var Shape:\t{}".format(var.shape)) W = get_weights(MP) try: Wf = get_weights(MP, "decoder_params_final") FINAL = True except: LOG.log("Final params not available") FINAL = False if save_weights or full or fast: #W[0] is [144,NLAT]. teh PCA var is size n_pca. I want to take the PCA var, inverse transform it, and them normalize by it. if MP["CNN"]: w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) Wout = PCA.inverse_transform( PCA.transform(np.transpose(W[0])) * w_norm) else: Wout = PCA.inverse_transform(np.transpose(W[0])) LOG.log("Saving Weights") np.savetxt(MP["dirname"] + "weights.csv", Wout) if FINAL: if MP["CNN"]: w_norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) Wout = PCA.inverse_transform( PCA.transform(np.transpose(Wf[0])) * w_norm) #w_norm = PCA.inverse_transform(np.sqrt(np.reshape(PCA.explained_variance_,[1,-1]))) #Wout = np.transpose(Wf[0])*w_norm else: Wout = PCA.inverse_transform(np.transpose(W[0])) np.savetxt(MP["dirname"] + "weights_final.csv", Wout) if save_test_latents or full or fast: LOG.log("Saving Latents") mean, var, trans = get_latents( test[:np.min([10 * n_data_samp, len(test)])], MP, W, PCA, SAVE=True) trans1 = np.array([np.diag(x) for x in trans]) trans2 = trans[:, 0, :] np.savetxt(MP['dirname'] + "test_means_best.csv", mean) np.savetxt( MP['dirname'] + "test_sample_best.csv", np.array([ np.random.multivariate_normal(mean[v], var[v]) for v in range(len(var)) ])) np.savetxt(MP['dirname'] + "test_trans_diag_best.csv", trans1) np.savetxt(MP['dirname'] + "test_trans_trans_best.csv", trans2) if FINAL: mean, var, trans = get_latents( test[:np.min([10 * n_data_samp, len(test)])], MP, Wf, PCA, SAVE=True) trans1 = np.array([np.diag(x) for x in trans]) trans2 = trans[:, 0, :] np.savetxt(MP['dirname'] + "test_means_final.csv", mean) np.savetxt(MP['dirname'] + "test_trans_diag_final.csv", trans1) np.savetxt(MP['dirname'] + "test_trans_trans_final.csv", trans2) if MP["CNN"]: norm = np.sqrt(np.reshape(PCA.explained_variance_, [1, -1])) out = PCA.inverse_transform( PCA.transform( test[:np.min([10 * n_data_samp, len(test)])]) * w_norm) else: out = PCA.inverse_transform( test[:np.min([10 * n_data_samp, len(test)])]) np.savetxt( MP["dirname"] + "test_images.csv", out ) #PCA.inverse_transform(test[:np.min([n_data_samp,len(test)])])) if test_gratings or full or fast: LOG.log("Processing Gratings") mean, lab, grats = grating_test(MP, PCA) np.savetxt(MP["dirname"] + "test_grating.csv", mean) np.savetxt(MP["dirname"] + "test_grating_labels.csv", lab) np.savetxt(MP["dirname"] + "test_grating_images.csv", grats) if RF_comp or full or fast: LOG.log("Calculating RFs") for scale in [.4, .5, .6]: RFs = RF_test(MP, PCA, scale) for k in range(len(RFs)): np.savetxt( MP["dirname"] + "receptive_fields_{}_{}.csv".format(k, scale), RFs[k]) if test_loglik or full: LOG.log("Calculating Likelihoods") plot_loglikelihood(test[:np.min([n_data_samp, len(test)])], MP, "test_final_loglik.csv", indices=["best"], n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if train_loglik or full: LOG.log("Calculating Likelihoods") plot_loglikelihood(train[:np.min([n_data_samp, len(train)])], MP, "train_final_loglik.csv", indices=["best"], n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if plot_loglik or full: LOG.log("Plotting Likelihoods") plot_loglikelihood(test[:np.min([n_data_samp, len(test)])], MP, "test_plot_loglik.csv", n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if plot_train_loglik or full: LOG.log("Plotting Likelihoods") plot_loglikelihood(train[:np.min([n_data_samp, len(test)])], MP, "train_plot_loglik.csv", n_ais_step=n_ais_step, n_prior_samp=n_prior_samp, n_hast_step=n_hast_step, eps=eps, n_ham_step=n_ham_step, use_prior=use_prior, LOG=LOG) if AIS_test: test_loglikelihood(test[:2], MP, "best", n_ais_step, n_prior_samp, n_hast_step, eps, n_ham_step, use_prior, LOG)
def get_weights(MP, param_name="decoder_params_best"): W = utils.load_obj(MP["dirname"] + param_name) return W
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # ---------------------------------------------------------------------------------------- # Create model(s) and send to device(s) # ---------------------------------------------------------------------------------------- net = model.ResUNet(3, False).float() net.load_state_dict(torch.load('ResUNet.pt')) if args.distributed: if args.gpu is not None: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.gpu]) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: torch.cuda.set_device(args.gpu) net.cuda(args.gpu) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) # ---------------------------------------------------------------------------------------- # Define dataset path and data splits # ---------------------------------------------------------------------------------------- #Input_Data = scipy.io.loadmat("\Path\To\Inputs.mat") #Output_Data = scipy.io.loadmat("\Path\To\Outputs.mat") #Input = Input_Data['data'] #Output = Output_Data['data'] Input = utilities.load_obj( f'{args.path_to_data}/inputs') #Input_Data['Inputs'] Output = utilities.load_obj( f'{args.path_to_data}/outputs') # Output_Data['Outputs'] # ---------------------------------------------------------------------------------------- # Create datasets (with augmentation) and dataloaders # ---------------------------------------------------------------------------------------- Raman_Dataset_Test = dataset.RamanDataset(Input, Output, batch_size=args.batch_size, spectrum_len=args.spectrum_len) test_loader = DataLoader(Raman_Dataset_Test, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # ---------------------------------------------------------------------------------------- # Evaluate # ---------------------------------------------------------------------------------------- MSE_NN, MSE_SG = evaluate(test_loader, net, args)
args.PATH_IMAGES = PATH_IMAGES args.DATA_NAME = DATA_NAME TENSOR_FROZEN_MODEL_PATH = './models/frozen_model.pb' graph = load_graph(TENSOR_FROZEN_MODEL_PATH) sess = tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.60))) output = sess.graph.get_tensor_by_name('model/embeddings:0') input = sess.graph.get_tensor_by_name('model/input:0') phase_train_placeholder = sess.graph.get_tensor_by_name('model/phase_train:0') data_pkl = load_obj(DATA_NAME) @app.route('/', methods=['POST']) def post(): data = request.get_json() parent_path = os.path.join(args.PATH_IMAGES, data['parent_path']) # print("parent path: ", parent_path) for child in data['childrent']: path_child, x, y, w, h = child embedding = get_embedding(path_child, input, phase_train_placeholder, output, sess) person_id = find_min(embedding, data_pkl) post_to_main_server(person_id, parent_path, (x, y, w, h)) return 'OK!'
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # ---------------------------------------------------------------------------------------- # Create model(s) and send to device(s) # ---------------------------------------------------------------------------------------- net = model.ResUNet(3, args.batch_norm).float() if args.distributed: if args.gpu is not None: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.gpu]) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: torch.cuda.set_device(args.gpu) net.cuda(args.gpu) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) # ---------------------------------------------------------------------------------------- # Define dataset path and data splits # ---------------------------------------------------------------------------------------- #Input_Data = #scipy.io.loadmat("\Path\To\Inputs.mat") #Output_Data = #scipy.io.loadmat("\Path\To\Outputs.mat") Input = utilities.load_obj( f'{args.path_to_data}/inputs') #Input_Data['Inputs'] Output = utilities.load_obj( f'{args.path_to_data}/outputs') # Output_Data['Outputs'] spectra_num = len(Input) train_split = round(0.9 * spectra_num) val_split = round(0.1 * spectra_num) input_train = Input[:train_split] input_val = Input[train_split:train_split + val_split] output_train = Output[:train_split] output_val = Output[train_split:train_split + val_split] # ---------------------------------------------------------------------------------------- # Create datasets (with augmentation) and dataloaders # ---------------------------------------------------------------------------------------- Raman_Dataset_Train = dataset.RamanDataset(input_train, output_train, batch_size=args.batch_size, spectrum_len=args.spectrum_len, spectrum_shift=0.1, spectrum_window=False, horizontal_flip=False, mixup=True) Raman_Dataset_Val = dataset.RamanDataset(input_val, output_val, batch_size=args.batch_size, spectrum_len=args.spectrum_len) train_loader = DataLoader(Raman_Dataset_Train, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) val_loader = DataLoader(Raman_Dataset_Val, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # ---------------------------------------------------------------------------------------- # Define criterion(s), optimizer(s), and scheduler(s) # ---------------------------------------------------------------------------------------- criterion = nn.L1Loss().cuda(args.gpu) criterion_MSE = nn.MSELoss().cuda(args.gpu) if args.optimizer == "sgd": optimizer = optim.SGD(net.parameters(), lr=args.lr) elif args.optimizer == "adamW": optimizer = optim.AdamW(net.parameters(), lr=args.lr) else: # Adam optimizer = optim.Adam(net.parameters(), lr=args.lr) if args.scheduler == "decay-lr": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.2) elif args.scheduler == "multiplicative-lr": lmbda = lambda epoch: 0.985 scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda) elif args.scheduler == "cyclic-lr": scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=args.base_lr, max_lr=args.lr, mode='triangular2', cycle_momentum=False) elif args.scheduler == "one-cycle-lr": scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=args.lr, steps_per_epoch=len(train_loader), epochs=args.epochs, cycle_momentum=False) else: # constant-lr scheduler = None print('Started Training') print('Training Details:') #print('Network: {}'.format(args.network)) print('Epochs: {}'.format(args.epochs)) print('Batch Size: {}'.format(args.batch_size)) print('Optimizer: {}'.format(args.optimizer)) print('Scheduler: {}'.format(args.scheduler)) print('Learning Rate: {}'.format(args.lr)) print('Spectrum Length: {}'.format(args.spectrum_len)) DATE = datetime.datetime.now().strftime("%Y_%m_%d") log_dir = "runs/{}_{}_{}".format(DATE, args.optimizer, args.scheduler) #, args.network) models_dir = "{}_{}_{}.pt".format(DATE, args.optimizer, args.scheduler) #, args.network) writer = SummaryWriter(log_dir=log_dir) for epoch in range(args.epochs): train_loss = train(train_loader, net, optimizer, scheduler, criterion, criterion_MSE, epoch, args) val_loss = validate(val_loader, net, criterion_MSE, args) if args.scheduler == "decay-lr" or args.scheduler == "multiplicative-lr": scheduler.step() writer.add_scalar('Loss/train', train_loss, epoch) writer.add_scalar('Loss/val', val_loss, epoch) torch.save(net.state_dict(), models_dir) print('Finished Training')
def run(dirname): LOG = log.log(dirname + "/weight_log.log") MP = utils.load_obj(dirname + "model_params") n_pca = int((MP["patch_size"]**2) * MP["pca_frac"]) #this is to handle legacy data files that didn't have the CNN keyword if "CNN" not in MP.keys(): MP["CNN"] = False if MP["CNN"]: datsize = MP["patch_size"]**2 else: datsize = n_pca n_lat = int(n_pca * MP["overcomplete"]) MP["n_lat"] = n_lat MP["n_pca"] = n_pca MP["datsize"] = datsize MP["dirname"] = dirname for x in MP.keys(): print("{}\t{}".format(x, MP[x])) train, test, var, PCA = dat.get_data(MP["patch_size"], n_pca, MP["dataset"], MP["whiten"], MP["CNN"]) LOG.log("Train Shape:\t{}".format(train.shape)) LOG.log("Test Shape:\t{}".format(test.shape)) LOG.log("Var Shape:\t{}".format(var.shape)) W = get_weights(MP) try: Wf = get_weights(MP, "decoder_params_final") FINAL = True except: LOG.log("Final params not available") FINAL = False LOG.log(np.std(test)) sp1 = np.random.randn(test.shape[0], n_lat) * MP["s1"] sp2 = np.random.randn(test.shape[0], n_lat) * MP["s2"] S = MP["S"] LOG.log("sp1 {}".format(np.std(sp1))) LOG.log("sp2 {}".format(np.std(sp2))) LOG.log("Wsp1 {}".format(np.std(np.tensordot(sp1, W[0], axes=[1, 1])))) LOG.log("Wsp2 {}".format(np.std(np.tensordot(sp2, W[0], axes=[1, 1])))) LOG.log("SW {}".format(S * np.std(np.tensordot(sp2, W[0], axes=[1, 1])) + (1. - S) * np.std(np.tensordot(sp1, W[0], axes=[1, 1])))) A = get_file(MP["dirname"] + "/test_means_best.csv") LOG.log("RV {}".format(np.std(np.tensordot(W[0], A, axes=[1, 1])))) LOG.log("DV {}".format(np.std(var)))