def main(): test_args = arglib.TestArgs() args, str_args = test_args.args, test_args.str_args os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu Writer.set_writer(args.results_dir) id_model_path = args.pretrained_models_path.joinpath('vggface2.h5') stylegan_G_synthesis_path = str( args.pretrained_models_path.joinpath( f'stylegan_G_{args.resolution}x{args.resolution}_synthesis')) utils.landmarks_model_path = str( args.pretrained_models_path.joinpath( 'shape_predictor_68_face_landmarks.dat')) stylegan_G_synthesis = StyleGAN_G_synthesis( resolution=args.resolution, is_const_noise=args.const_noise) stylegan_G_synthesis.load_weights(stylegan_G_synthesis_path) network = Network(args, id_model_path, stylegan_G_synthesis) network.test() inference = Inference(args, network) test_func = getattr(inference, args.test_func) test_func()
def __init__(self, num_samples, burn_in, population_size, topology, train_data, test_data, directory, temperature, swap_sample, parameter_queue, problem_type, main_process, event, active_chains, num_accepted, swap_interval, max_limit=(-5), min_limit=5): # Multiprocessing attributes multiprocessing.Process.__init__(self) self.process_id = temperature self.parameter_queue = parameter_queue self.signal_main = main_process self.event = event self.active_chains = active_chains self.num_accepted = num_accepted self.event.clear() self.signal_main.clear() # Parallel Tempering attributes self.temperature = temperature self.swap_sample = swap_sample self.swap_interval = swap_interval self.burn_in = burn_in # MCMC attributes self.num_samples = num_samples self.topology = topology self.train_data = train_data self.test_data = test_data self.problem_type = problem_type self.directory = directory self.w_size = (topology[0] * topology[1]) + ( topology[1] * topology[2]) + topology[1] + topology[2] self.neural_network = Network(topology, train_data, test_data) self.min_limits = np.repeat(min_limit, self.w_size) self.max_limits = np.repeat(max_limit, self.w_size) self.initialize_sampling_parameters() self.create_directory(directory) PSO.__init__(self, pop_size=population_size, num_params=self.w_size, max_limits=self.max_limits, min_limits=self.min_limits)
def multinomial_likelihood(neural_network, data, weights, temperature): y = data[:, neural_network.topology[0]:neural_network.topology[0] + neural_network.topology[2]] fx = neural_network.generate_output(data, weights) rmse = Network.calculate_rmse( fx, y) # Can be replaced by calculate_nmse function for reporting NMSE probability = neural_network.softmax(fx) loss = 0 for index_1 in range(y.shape[0]): for index_2 in range(y.shape[1]): if y[index_1, index_2] == 1: loss += np.log(probability[index_1, index_2]) accuracy = Network.calculate_accuracy(fx, y) return [loss / temperature, rmse, accuracy]
def gaussian_likelihood(neural_network, data, weights, tausq, temperature): desired = data[:, neural_network.topology[0]:neural_network.topology[0] + neural_network.topology[2]] prediction = neural_network.generate_output(data, weights) rmse = Network.calculate_rmse(prediction, desired) loss = -0.5 * np.log( 2 * np.pi * tausq) - 0.5 * np.square(desired - prediction) / tausq return [np.sum(loss) / temperature, rmse]
def deploy(args, data_loader): model = Network(k=args.network_k, att_type=args.network_att_type, kernel3=args.kernel3, width=args.network_width, dropout=args.network_dropout, compensate=True, norm=args.norm, inp_channels=args.input_channels) print(model) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) checkpoint_path = os.path.join(args.logdir, 'best_checkpoint.pth') if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['state_dict']) else: raise Exception('Couldnt load checkpoint.') df = pd.DataFrame(columns=['img', 'label', 'pred']) with tqdm(enumerate(data_loader)) as pbar: for i, (images, labels) in pbar: raw_label = labels raw_images = images if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() images.requires_grad = True # Forward pass outputs, att, localised = model(images, True) localised = F.softmax(localised.data, 3)[..., 1] predicted = torch.argmax(outputs.data, 1) saliency = torch.autograd.grad(outputs[:, 1].sum(), images)[0].data localised = localised[0].cpu().numpy() saliency = torch.sqrt((saliency[0]**2).mean(0)).cpu().numpy() raw_img = np.transpose(raw_images.numpy(), (0, 2, 3, 1)).squeeze() np.save(os.path.join(args.outpath, 'pred_{}.npy'.format(i)), localised) np.save(os.path.join(args.outpath, 'sal_{}.npy'.format(i)), saliency) df.loc[len(df)] = [ i, raw_label.numpy().squeeze(), predicted.cpu().numpy().squeeze() ] df.to_csv(os.path.join(args.outpath, 'pred.csv'), index=False) print('done - stopping now')
def main(): train_args = arglib.TrainArgs() args, str_args = train_args.args, train_args.str_args os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu init_logger(args) logger = logging.getLogger('main') cmd_line = ' '.join(sys.argv) logger.info(f'cmd line is: \n {cmd_line}') logger.info(str_args) logger.debug('Copying src to results dir') Writer.set_writer(args.results_dir) if not args.debug: description = input('Please write a short description of this run\n') desc_file = args.results_dir.joinpath('description.txt') with desc_file.open('w') as f: f.write(description) id_model_path = args.pretrained_models_path.joinpath('vggface2.h5') stylegan_G_synthesis_path = str( args.pretrained_models_path.joinpath(f'stylegan_G_{args.resolution}x{args.resolution}_synthesis')) landmarks_model_path = str(args.pretrained_models_path.joinpath('face_utils/keypoints')) face_detection_model_path = str(args.pretrained_models_path.joinpath('face_utils/detector')) arcface_model_path = str(args.pretrained_models_path.joinpath('arcface_weights/weights-b')) utils.landmarks_model_path = str(args.pretrained_models_path.joinpath('shape_predictor_68_face_landmarks.dat')) stylegan_G_synthesis = StyleGAN_G_synthesis(resolution=args.resolution, is_const_noise=args.const_noise) stylegan_G_synthesis.load_weights(stylegan_G_synthesis_path) network = Network(args, id_model_path, stylegan_G_synthesis, landmarks_model_path, face_detection_model_path, arcface_model_path) data_loader = DataLoader(args) trainer = Trainer(args, network, data_loader) trainer.train()
def main(): train_dataset = MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) test_dataset = MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) net = Network(1, 128, 10, 10) if USE_CUDA: net = net.cuda() opt = optim.SGD(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY, momentum=.9, nesterov=True) for epoch in range(1, EPOCHS + 1): print('[Epoch %d]' % epoch) train_loss = 0 train_correct, train_total = 0, 0 start_point = time.time() for inputs, labels in train_loader: inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() opt.zero_grad() preds = F.log_softmax(net(inputs), dim=1) loss = F.cross_entropy(preds, labels) loss.backward() opt.step() train_loss += loss.item() train_correct += (preds.argmax(dim=1) == labels).sum().item() train_total += len(preds) print('train-acc : %.4f%% train-loss : %.5f' % (100 * train_correct / train_total, train_loss / len(train_loader))) print('elapsed time: %ds' % (time.time() - start_point)) test_loss = 0 test_correct, test_total = 0, 0 for inputs, labels in test_loader: with torch.no_grad(): inputs, labels = Variable(inputs), Variable(labels) if USE_CUDA: inputs, labels = inputs.cuda(), labels.cuda() preds = F.softmax(net(inputs), dim=1) test_loss += F.cross_entropy(preds, labels).item() test_correct += (preds.argmax(dim=1) == labels).sum().item() test_total += len(preds) print('test-acc : %.4f%% test-loss : %.5f' % (100 * test_correct / test_total, test_loss / len(test_loader))) torch.save(net.state_dict(), './checkpoint/checkpoint-%04d.bin' % epoch)
class Replica(PSO, Process): def __init__(self, num_samples, burn_in, population_size, topology, train_data, test_data, directory, temperature, swap_sample, parameter_queue, problem_type, main_process, event, active_chains, num_accepted, swap_interval, max_limit=(-5), min_limit=5): # Multiprocessing attributes multiprocessing.Process.__init__(self) self.process_id = temperature self.parameter_queue = parameter_queue self.signal_main = main_process self.event = event self.active_chains = active_chains self.num_accepted = num_accepted self.event.clear() self.signal_main.clear() # Parallel Tempering attributes self.temperature = temperature self.swap_sample = swap_sample self.swap_interval = swap_interval self.burn_in = burn_in # MCMC attributes self.num_samples = num_samples self.topology = topology self.train_data = train_data self.test_data = test_data self.problem_type = problem_type self.directory = directory self.w_size = (topology[0] * topology[1]) + ( topology[1] * topology[2]) + topology[1] + topology[2] self.neural_network = Network(topology, train_data, test_data) self.min_limits = np.repeat(min_limit, self.w_size) self.max_limits = np.repeat(max_limit, self.w_size) self.initialize_sampling_parameters() self.create_directory(directory) PSO.__init__(self, pop_size=population_size, num_params=self.w_size, max_limits=self.max_limits, min_limits=self.min_limits) def fitness_function(self, x): fitness = self.neural_network.evaluate_fitness(x) return fitness def initialize_sampling_parameters(self): self.weights_stepsize = 0.01 self.eta_stepsize = 0.02 self.sigma_squared = 36 self.nu_1 = 0 self.nu_2 = 0 self.start_time = time.time() @staticmethod def convert_time(secs): if secs >= 60: mins = str(int(secs / 60)) secs = str(int(secs % 60)) else: secs = str(int(secs)) mins = str(00) if len(mins) == 1: mins = '0' + mins if len(secs) == 1: secs = '0' + secs return [mins, secs] @staticmethod def create_directory(directory): if not os.path.isdir(directory): os.mkdir(directory) @staticmethod def multinomial_likelihood(neural_network, data, weights, temperature): y = data[:, neural_network.topology[0]:neural_network.topology[0] + neural_network.topology[2]] fx = neural_network.generate_output(data, weights) rmse = Network.calculate_rmse( fx, y) # Can be replaced by calculate_nmse function for reporting NMSE probability = neural_network.softmax(fx) loss = 0 for index_1 in range(y.shape[0]): for index_2 in range(y.shape[1]): if y[index_1, index_2] == 1: loss += np.log(probability[index_1, index_2]) accuracy = Network.calculate_accuracy(fx, y) return [loss / temperature, rmse, accuracy] @staticmethod def classification_prior(sigma_squared, weights): part_1 = -1 * ((weights.shape[0]) / 2) * np.log(sigma_squared) part_2 = 1 / (2 * sigma_squared) * (sum(np.square(weights))) log_loss = part_1 - part_2 return log_loss @staticmethod def gaussian_likelihood(neural_network, data, weights, tausq, temperature): desired = data[:, neural_network.topology[0]:neural_network.topology[0] + neural_network.topology[2]] prediction = neural_network.generate_output(data, weights) rmse = Network.calculate_rmse(prediction, desired) loss = -0.5 * np.log( 2 * np.pi * tausq) - 0.5 * np.square(desired - prediction) / tausq return [np.sum(loss) / temperature, rmse] @staticmethod def gaussian_prior(sigma_squared, nu_1, nu_2, weights, tausq): part1 = -1 * (weights.shape[0] / 2) * np.log(sigma_squared) part2 = 1 / (2 * sigma_squared) * (sum(np.square(weights))) log_loss = part1 - part2 - (1 + nu_1) * np.log(tausq) - (nu_2 / tausq) return log_loss def likelihood_function(self, neural_network, data, weights, tau, temperature): if self.problem_type == 'regression': likelihood, rmse = self.gaussian_likelihood( neural_network, data, weights, tau, temperature) return likelihood, rmse, None elif self.problem_type == 'classification': likelihood, rmse, accuracy = self.multinomial_likelihood( neural_network, data, weights, temperature) return likelihood, rmse, accuracy def prior_function(self, weights, tau): if self.problem_type == 'regression': loss = self.gaussian_prior(self.sigma_squared, self.nu_1, self.nu_2, weights, tau) elif self.problem_type == 'classification': loss = self.classification_prior(self.sigma_squared, weights) # quit() return loss def evaluate_proposal(self, neural_network, train_data, test_data, weights_proposal, tau_proposal, likelihood_current, prior_current): accept = False likelihood_ignore, rmse_test_proposal, acc_test = self.likelihood_function( neural_network, test_data, weights_proposal, tau_proposal, self.temperature) likelihood_proposal, rmse_train_proposal, acc_train = self.likelihood_function( neural_network, train_data, weights_proposal, tau_proposal, self.temperature) prior_proposal = self.prior_function(weights_proposal, tau_proposal) difference_likelihood = likelihood_proposal - likelihood_current difference_prior = prior_proposal - prior_current mh_ratio = min(1, np.exp(min(709, difference_likelihood))) u = np.random.uniform(0, 1) if u < mh_ratio: accept = True likelihood_current = likelihood_proposal prior_proposal = prior_current if acc_train == None: return accept, rmse_train_proposal, rmse_test_proposal, likelihood_current, prior_current else: return accept, rmse_train_proposal, rmse_test_proposal, acc_train, acc_test, likelihood_current, prior_current def run(self): print(f'Entered Run, chain: {self.temperature:.2f}') np.random.seed(int(self.temperature * 1000)) save_knowledge = True train_rmse_file = open( os.path.join(self.directory, 'train_rmse_{:.4f}.csv'.format(self.temperature)), 'w') test_rmse_file = open( os.path.join(self.directory, 'test_rmse_{:.4f}.csv'.format(self.temperature)), 'w') if self.problem_type == 'classification': train_acc_file = open( os.path.join(self.directory, 'train_acc_{:.4f}.csv'.format(self.temperature)), 'w') test_acc_file = open( os.path.join(self.directory, 'test_acc_{:.4f}.csv'.format(self.temperature)), 'w') swarm_initial, best_swarm_pos, best_swarm_err = self.swarm, self.best_swarm_pos, self.best_swarm_err # Initialize MCMC print(f'Initialize MCMC, chain: {self.temperature:.2f}') self.start_time = time.time() train_size = self.train_data.shape[0] test_size = self.test_data.shape[0] y_test = self.test_data[:, self.topology[0]:self.topology[0] + self.topology[2]] y_train = self.train_data[:, self.topology[0]:self.topology[0] + self.topology[2]] swarm_current = copy.copy(swarm_initial) swarm_proposal = copy.copy(swarm_initial) eta = [None for n in range(self.pop_size)] eta_proposal = [None for n in range(self.pop_size)] tau_proposal = [None for n in range(self.pop_size)] prior = [None for n in range(self.pop_size)] likelihood = [None for n in range(self.pop_size)] rmse_train = [None for n in range(self.pop_size)] rmse_test = [None for n in range(self.pop_size)] acc_test = [None for n in range(self.pop_size)] acc_train = [None for n in range(self.pop_size)] for i in range(self.pop_size): prediction_train = self.neural_network.generate_output( self.train_data, swarm_current[i].position) prediction_test = self.neural_network.generate_output( self.test_data, swarm_current[i].position) eta[i] = np.log(np.var(prediction_train - y_train)) eta_proposal[i] = copy.copy(eta[i]) tau_proposal[i] = np.exp(eta[i]) prior[i] = self.prior_function(swarm_current[i].position, tau_proposal[i]) likelihood[i], rmse_train[i], acc_train[ i] = self.likelihood_function(self.neural_network, self.train_data, swarm_current[i].position, tau_proposal[i], self.temperature) rmse_test[i] = Network.calculate_rmse(prediction_test, y_test) if self.problem_type == 'classification': acc_test[i] = Network.calculate_accuracy( prediction_test, y_test) # Save values into previous variables rmse_train_current = rmse_train rmse_test_current = rmse_test num_accept = 0 if self.problem_type == 'classification': acc_test_current = acc_test acc_train_current = acc_train writ = 0 if save_knowledge: train_rmse_file.write(str(rmse_train_current) + "\n") test_rmse_file.write(str(rmse_test_current) + "\n") if self.problem_type == 'classification': train_acc_file.write(str(acc_train_current) + "\n") test_acc_file.write(str(acc_test_current) + "\n") writ += 1 print(f'Starting sampling, chain:{self.temperature:.2f}') # Start sampling for sample in range(1, self.num_samples): # swarm_evolved, best_swarm_evolved_pos, best_swarm_evolved_err= self.evolve(copy.copy(swarm_current), best_swarm_pos, best_swarm_err) swarm_evolved, best_swarm_pos, best_swarm_err = self.evolve( copy.copy(swarm_current), best_swarm_pos, best_swarm_err) arr = [] for i in range(self.pop_size): arr.append(swarm_evolved[i].position) arr = np.asarray(arr) np.savetxt(f'test/{sample}.txt', arr, delimiter=',') # for i in range(self.pop_size): # swarm_proposal[i].position = swarm_evolved[i].position #+ np.random.normal(0, self.weights_stepsize, size=self.num_params) # eta_proposal[i] = eta[i] + np.random.normal(0, self.eta_stepsize, 1) # tau_proposal[i] = np.exp(eta_proposal[i]) # # print(f'Temperature: {self.temperature:.2f} Sample: {sample} P3') # if self.problem_type == 'classification': # accept, rmse_train[i], rmse_test[i], acc_train[i], acc_test[i], likelihood[i], prior[i] = self.evaluate_proposal(self.neural_network, self.train_data, self.test_data, swarm_proposal[i].position, tau_proposal[i], likelihood[i], prior[i]) # else: # accept, rmse_train[i], rmse_test[i], likelihood[i], prior[i] = self.evaluate_proposal(self.neural_network, self.train_data, self.test_data, swarm_proposal[i].position, tau_proposal[i], likelihood[i], prior[i]) # if accept: # num_accept += 1 # swarm_current[i] = swarm_proposal[i] # eta[i] = eta_proposal[i] # # save values into previous variables # rmse_train_current[i] = rmse_train[i] # rmse_test_current[i] = rmse_test[i] # if self.problem_type == 'classification': # acc_train_current[i] = acc_train[i] # acc_test_current[i] = acc_test[i] # if swarm_current[i].error < best_swarm_err: # best_swarm_err = swarm_current[i].error # best_swarm_pos = copy.copy(swarm_current[i].position) # if save_knowledge: # train_rmse_file.write(str(rmse_train_current)+"\n") # test_rmse_file.write(str(rmse_test_current)+"\n") # if self.problem_type == 'classification': # train_acc_file.write(str(acc_train_current)+"\n") # test_acc_file.write(str(acc_test_current)+"\n") # writ += 1 print( f'Temperature: {self.temperature:.2f} Sample: {sample} Next swap at: {self.swap_sample.value} Best rmse: {best_swarm_err} P4' ) # SWAPPING PREP if (sample == self.swap_sample.value): # print('\nTemperature: {} Swapping weights: {}'.format(self.temperature, weights_current[:2])) # param = np.concatenate([weights_current, np.asarray([eta]).reshape(1), np.asarray([likelihood*self.temperature]),np.asarray([self.temperature])]) # self.parameter_queue.put(param) self.event.clear() self.signal_main.set() # print(f'Temperature: {self.temperature:.2f} Current sample: {sample} out of {self.num_samples} is num with {self.swap_sample.value} as next swap') # # Wait for signal from Master result = self.event.wait() # # retrieve parameters fom queues if it has been swapped # print(f'Temperature: {self.temperature:.2f} Call get') # result = self.parameter_queue.get(timeout=2) # while not result.all(): # time.sleep(0.01) # result = self.parameter_queue.get(timeout=2) # weights_current = result[0:self.w_size] # self.population[self.best_index] = weights_current # self.fitness[self.best_index] = self.fitness_function(weights_current) # eta = result[self.w_size] # likelihood = result[self.w_size+1]/self.temperature # print(f'Temperature: {self.temperature:.2f} Swapped weights: {weights_current[:2]}') # elif (sample >= self.swap_sample.value): # with self.swap_sample.get_lock(): # self.swap_sample.value += self.swap_interval # print(f'Temperature: {self.temperature:.2f} Sample: {sample} P5') elapsed_time = ":".join( Replica.convert_time(time.time() - self.start_time)) # print("Temperature: {} Sample: {:d}, Best Fitness: {:.4f}, Proposal: {:.4f}, Time Elapsed: {:s}".format(self.temperature, sample, rmse_train_current, rmse_train, elapsed_time)) elapsed_time = time.time() - self.start_time accept_ratio = num_accept / self.num_samples print("Written {} values for Accuracies".format(writ)) # Close the files train_rmse_file.close() test_rmse_file.close() with self.active_chains.get_lock(): self.active_chains.value -= 1 with self.num_accepted.get_lock(): self.num_accepted.value += num_accept print( f"Temperature: {self.temperature} done, {sample+1} samples sampled out of {self.num_samples}. Number of active chains: {self.active_chains.value}" )
def run(self): print(f'Entered Run, chain: {self.temperature:.2f}') np.random.seed(int(self.temperature * 1000)) save_knowledge = True train_rmse_file = open( os.path.join(self.directory, 'train_rmse_{:.4f}.csv'.format(self.temperature)), 'w') test_rmse_file = open( os.path.join(self.directory, 'test_rmse_{:.4f}.csv'.format(self.temperature)), 'w') if self.problem_type == 'classification': train_acc_file = open( os.path.join(self.directory, 'train_acc_{:.4f}.csv'.format(self.temperature)), 'w') test_acc_file = open( os.path.join(self.directory, 'test_acc_{:.4f}.csv'.format(self.temperature)), 'w') swarm_initial, best_swarm_pos, best_swarm_err = self.swarm, self.best_swarm_pos, self.best_swarm_err # Initialize MCMC print(f'Initialize MCMC, chain: {self.temperature:.2f}') self.start_time = time.time() train_size = self.train_data.shape[0] test_size = self.test_data.shape[0] y_test = self.test_data[:, self.topology[0]:self.topology[0] + self.topology[2]] y_train = self.train_data[:, self.topology[0]:self.topology[0] + self.topology[2]] swarm_current = copy.copy(swarm_initial) swarm_proposal = copy.copy(swarm_initial) eta = [None for n in range(self.pop_size)] eta_proposal = [None for n in range(self.pop_size)] tau_proposal = [None for n in range(self.pop_size)] prior = [None for n in range(self.pop_size)] likelihood = [None for n in range(self.pop_size)] rmse_train = [None for n in range(self.pop_size)] rmse_test = [None for n in range(self.pop_size)] acc_test = [None for n in range(self.pop_size)] acc_train = [None for n in range(self.pop_size)] for i in range(self.pop_size): prediction_train = self.neural_network.generate_output( self.train_data, swarm_current[i].position) prediction_test = self.neural_network.generate_output( self.test_data, swarm_current[i].position) eta[i] = np.log(np.var(prediction_train - y_train)) eta_proposal[i] = copy.copy(eta[i]) tau_proposal[i] = np.exp(eta[i]) prior[i] = self.prior_function(swarm_current[i].position, tau_proposal[i]) likelihood[i], rmse_train[i], acc_train[ i] = self.likelihood_function(self.neural_network, self.train_data, swarm_current[i].position, tau_proposal[i], self.temperature) rmse_test[i] = Network.calculate_rmse(prediction_test, y_test) if self.problem_type == 'classification': acc_test[i] = Network.calculate_accuracy( prediction_test, y_test) # Save values into previous variables rmse_train_current = rmse_train rmse_test_current = rmse_test num_accept = 0 if self.problem_type == 'classification': acc_test_current = acc_test acc_train_current = acc_train writ = 0 if save_knowledge: train_rmse_file.write(str(rmse_train_current) + "\n") test_rmse_file.write(str(rmse_test_current) + "\n") if self.problem_type == 'classification': train_acc_file.write(str(acc_train_current) + "\n") test_acc_file.write(str(acc_test_current) + "\n") writ += 1 print(f'Starting sampling, chain:{self.temperature:.2f}') # Start sampling for sample in range(1, self.num_samples): # swarm_evolved, best_swarm_evolved_pos, best_swarm_evolved_err= self.evolve(copy.copy(swarm_current), best_swarm_pos, best_swarm_err) swarm_evolved, best_swarm_pos, best_swarm_err = self.evolve( copy.copy(swarm_current), best_swarm_pos, best_swarm_err) arr = [] for i in range(self.pop_size): arr.append(swarm_evolved[i].position) arr = np.asarray(arr) np.savetxt(f'test/{sample}.txt', arr, delimiter=',') # for i in range(self.pop_size): # swarm_proposal[i].position = swarm_evolved[i].position #+ np.random.normal(0, self.weights_stepsize, size=self.num_params) # eta_proposal[i] = eta[i] + np.random.normal(0, self.eta_stepsize, 1) # tau_proposal[i] = np.exp(eta_proposal[i]) # # print(f'Temperature: {self.temperature:.2f} Sample: {sample} P3') # if self.problem_type == 'classification': # accept, rmse_train[i], rmse_test[i], acc_train[i], acc_test[i], likelihood[i], prior[i] = self.evaluate_proposal(self.neural_network, self.train_data, self.test_data, swarm_proposal[i].position, tau_proposal[i], likelihood[i], prior[i]) # else: # accept, rmse_train[i], rmse_test[i], likelihood[i], prior[i] = self.evaluate_proposal(self.neural_network, self.train_data, self.test_data, swarm_proposal[i].position, tau_proposal[i], likelihood[i], prior[i]) # if accept: # num_accept += 1 # swarm_current[i] = swarm_proposal[i] # eta[i] = eta_proposal[i] # # save values into previous variables # rmse_train_current[i] = rmse_train[i] # rmse_test_current[i] = rmse_test[i] # if self.problem_type == 'classification': # acc_train_current[i] = acc_train[i] # acc_test_current[i] = acc_test[i] # if swarm_current[i].error < best_swarm_err: # best_swarm_err = swarm_current[i].error # best_swarm_pos = copy.copy(swarm_current[i].position) # if save_knowledge: # train_rmse_file.write(str(rmse_train_current)+"\n") # test_rmse_file.write(str(rmse_test_current)+"\n") # if self.problem_type == 'classification': # train_acc_file.write(str(acc_train_current)+"\n") # test_acc_file.write(str(acc_test_current)+"\n") # writ += 1 print( f'Temperature: {self.temperature:.2f} Sample: {sample} Next swap at: {self.swap_sample.value} Best rmse: {best_swarm_err} P4' ) # SWAPPING PREP if (sample == self.swap_sample.value): # print('\nTemperature: {} Swapping weights: {}'.format(self.temperature, weights_current[:2])) # param = np.concatenate([weights_current, np.asarray([eta]).reshape(1), np.asarray([likelihood*self.temperature]),np.asarray([self.temperature])]) # self.parameter_queue.put(param) self.event.clear() self.signal_main.set() # print(f'Temperature: {self.temperature:.2f} Current sample: {sample} out of {self.num_samples} is num with {self.swap_sample.value} as next swap') # # Wait for signal from Master result = self.event.wait() # # retrieve parameters fom queues if it has been swapped # print(f'Temperature: {self.temperature:.2f} Call get') # result = self.parameter_queue.get(timeout=2) # while not result.all(): # time.sleep(0.01) # result = self.parameter_queue.get(timeout=2) # weights_current = result[0:self.w_size] # self.population[self.best_index] = weights_current # self.fitness[self.best_index] = self.fitness_function(weights_current) # eta = result[self.w_size] # likelihood = result[self.w_size+1]/self.temperature # print(f'Temperature: {self.temperature:.2f} Swapped weights: {weights_current[:2]}') # elif (sample >= self.swap_sample.value): # with self.swap_sample.get_lock(): # self.swap_sample.value += self.swap_interval # print(f'Temperature: {self.temperature:.2f} Sample: {sample} P5') elapsed_time = ":".join( Replica.convert_time(time.time() - self.start_time)) # print("Temperature: {} Sample: {:d}, Best Fitness: {:.4f}, Proposal: {:.4f}, Time Elapsed: {:s}".format(self.temperature, sample, rmse_train_current, rmse_train, elapsed_time)) elapsed_time = time.time() - self.start_time accept_ratio = num_accept / self.num_samples print("Written {} values for Accuracies".format(writ)) # Close the files train_rmse_file.close() test_rmse_file.close() with self.active_chains.get_lock(): self.active_chains.value -= 1 with self.num_accepted.get_lock(): self.num_accepted.value += num_accept print( f"Temperature: {self.temperature} done, {sample+1} samples sampled out of {self.num_samples}. Number of active chains: {self.active_chains.value}" )
def main(params): print("Loading dataset ... ") with open(params['train_data_pkl'], 'rb') as f: train_data = pkl.load(f) with open(params['train_anno_pkl'], 'rb') as f: train_anno = pkl.load(f) """ with open(params['val_data_pkl'], 'rb') as f: val_data = pkl.load(f) with open(params['val_anno_pkl'], 'rb') as f: val_anno = pkl.load(f) """ # Train dataset and Train dataloader train_data = np.transpose(train_data, (0, 3, 1, 2)) train_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(train_data), torch.LongTensor(train_anno)) train_loader = dataloader.DataLoader(train_dataset, params['batch_size'], shuffle=True, collate_fn=collate_fn) """ # Validation dataset and Validation dataloader val_data = np.transpose(val_data, (0, 3, 1, 2)) val_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(val_data), torch.LongTensor(val_anno)) val_loader = dataloader.DataLoader( val_dataset, params['batch_size'], collate_fn=collate_fn) """ # the number of layers in each dense block n_layers_list = [4, 5, 7, 10, 12, 15, 12, 10, 7, 5, 4] print("Constructing the network ... ") # Define the network densenet = Network(n_layers_list, 5).to(device) if os.path.isfile(params['model_from']): print("Starting from the saved model") densenet.load_state_dict(torch.load(params['model_from'])) else: print("Couldn't find the saved model") print("Starting from the bottom") print("Training the model ...") # hyperparameter, optimizer, criterion learning_rate = params['lr'] optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) criterion = nn.CrossEntropyLoss() for epoch in range(params['max_epoch']): for i, (img, label) in enumerate(train_loader): img = img.to(device) label = label.to(device) # forward-propagation pred = densenet(img) # flatten for all pixel pred = pred.view((-1, params['num_answers'])) label = label.view((-1)) # get loss loss = criterion(pred, label) # back-propagation optimizer.zero_grad() loss.backward() optimizer.step() print("Epoch: %d, Steps:[%d/%d], Loss: %.4f" % (epoch, i, len(train_loader), loss.data)) learning_rate *= 0.995 optimizer = torch.optim.RMSprop(densenet.parameters(), learning_rate, weight_decay=params['l2_reg']) if (epoch + 1) % 10 == 0: print("Saved the model") torch.save(densenet.state_dict(), params['model_save'])
def train(args, train_loader, train_val_loader, val_loader, test_loader): seed(args.seed) job_id = os.environ.get('SLURM_JOB_ID', 'local') print('Starting run {} with:\n{}'.format(job_id, args)) writer = SummaryWriter(args.logdir) columns = ['epoch', 'eval_loss', 'eval_acc', 'eval_prec', 'eval_recall', 'train_loss', 'train_acc', 'train_prec', 'train_recall', 'test_loss', 'test_acc', 'test_prec', 'test_recall'] stats_csv = pd.DataFrame(columns=columns) model = Network( k=args.network_k, att_type=args.network_att_type, kernel3=args.kernel3, width=args.network_width, dropout=args.network_dropout, compensate=True, norm=args.norm, inp_channels=args.input_channels) print(model) epochs = args.num_epochs * args.shrinkage milestones = np.array([80, 120, 160]) milestones *= args.shrinkage milestones = list(milestones) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") raw_model = model if torch.cuda.device_count() > 1: print('using multiple gpus') model = torch.nn.DataParallel(model) model.to(device) criterion = nn.CrossEntropyLoss() print(criterion) nn.utils.clip_grad_value_(raw_model.parameters(), 5.) if args.opt == 'rmsprop': optimizer = torch.optim.RMSprop(raw_model.parameters(), lr=args.lr, eps=1e-5, weight_decay=args.l2) elif args.opt == 'momentum': optimizer = torch.optim.SGD(raw_model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.l2) elif args.opt == 'adam': optimizer = torch.optim.Adam(raw_model.parameters(), lr=args.lr, eps=1e-5, weight_decay=args.l2) lr_schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones) state = { 'epoch': 0, 'step': 0, 'state_dict': copy.deepcopy(raw_model.state_dict()), 'optimizer': copy.deepcopy(optimizer.state_dict()), 'lr_schedule': copy.deepcopy(lr_schedule.state_dict()), 'best_acc': None, 'best_epoch': 0, 'is_best': False, 'stats_csv': stats_csv, 'config': vars(args) } if load_checkpoint(args.logdir, state): raw_model.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) lr_schedule.load_state_dict(state['lr_schedule']) stats_csv = state['stats_csv'] save_checkpoint(args.logdir, state) writer.add_text('args/str', str(args), state['epoch']) writer.add_text('job_id/str', job_id, state['epoch']) writer.add_text('model/str', str(model), state['epoch']) # Train the model for epoch in range(state['epoch'], epochs): lr_schedule.step() model.train() losses = [] tps = [] tns = [] fps = [] fns = [] batch_labels = [] delayed = 0 writer.add_scalar('stats/lr', optimizer.param_groups[0]['lr'], epoch + 1) with tqdm(train_loader, desc="Epoch [{}/{}]".format(epoch+1, epochs)) as pbar: for images, labels in pbar: batch_labels += list(labels) if torch.cuda.is_available(): if torch.cuda.device_count() == 1: images = images.cuda() labels = labels.cuda() # Forward pass outputs, att = model(images) loss = criterion(outputs, labels) predicted = torch.argmax(outputs.data, 1) TP, TN, FP, FN = pred_stats(predicted, labels) cpu_loss = loss.mean().cpu().item() losses += [cpu_loss] tps += [TP] tns += [TN] fps += [FP] fns += [FN] # Backward and optimize delayed += 1 if args.delayed_step > 0: (loss / args.delayed_step).backward() else: loss.backward() if args.delayed_step == 0 or (delayed + 1) % args.delayed_step == 0: optimizer.step() optimizer.zero_grad() precision, recall, accuracy = precision_recall_accuracy( np.sum(tps), np.sum(tns), np.sum(fps), np.sum(fns)) writer.add_scalar('train/loss', np.mean(losses), state['step']) writer.add_scalar('train/precision', precision, state['step']) writer.add_scalar('train/recall', recall, state['step']) writer.add_scalar('train/accuracy', accuracy, state['step']) writer.add_scalar('train/labels', np.mean(batch_labels), state['step']) state['step'] += 1 delayed = 0 losses = [] tps = [] tns = [] fps = [] fns = [] batch_labels = [] pbar.set_postfix(loss=cpu_loss) # step last backward if the step isn't done yet because of an 'incomplete' # delayed / accumulated batch if delayed > 0: optimizer.step() optimizer.zero_grad() precision, recall, accuracy = precision_recall_accuracy( np.sum(tps), np.sum(tns), np.sum(fps), np.sum(fns)) writer.add_scalar('train/loss', np.mean(losses), state['step']) writer.add_scalar('train/precision', precision, state['step']) writer.add_scalar('train/recall', recall, state['step']) writer.add_scalar('train/accuracy', accuracy, state['step']) writer.add_scalar('train/labels', np.mean(batch_labels), state['step']) state['step'] += 1 state['epoch'] = epoch + 1 state['state_dict'] = copy.deepcopy(raw_model.state_dict()) state['optimizer'] = copy.deepcopy(optimizer.state_dict()) state['lr_schedule'] = copy.deepcopy(lr_schedule.state_dict()) if args.opt == 'rmsprop': rms_m2 = get_rmsprop_m2(model, optimizer) writer.add_scalar('train/rmsprop_m2_min', rms_m2.min(), state['epoch']) writer.add_scalar('train/rmsprop_m2_mean', rms_m2.mean(), state['epoch']) writer.add_scalar('train/rmsprop_m2_max', rms_m2.max(), state['epoch']) writer.add_histogram('train/rmsprop_m2', rms_m2, state['epoch']) val_stats = evaluate(model, criterion, val_loader) log_evaluation(state['epoch'], val_stats, writer, 'eval') if state['best_acc'] is None or state['best_acc'] < val_stats['accuracy']: state['is_best'] = True state['best_acc'] = val_stats['accuracy'] state['best_epoch'] = state['epoch'] else: state['is_best'] = False if (state['is_best'] or state['epoch'] >= epochs or args.test_all): train_stats = evaluate(model, criterion, train_val_loader) log_evaluation(state['epoch'], train_stats, writer, 'train_eval') test_stats = evaluate(model, criterion, test_loader) log_evaluation(state['epoch'], test_stats, writer, 'test') stats_csv.loc[len(stats_csv)] = [ state['epoch'], val_stats['loss'], val_stats['accuracy'], val_stats['precision'], val_stats['recall'], train_stats['loss'], train_stats['accuracy'], train_stats['precision'], train_stats['recall'], test_stats['loss'], test_stats['accuracy'], test_stats['precision'], test_stats['recall']] else: stats_csv.loc[len(stats_csv)] = [ state['epoch'], val_stats['loss'], val_stats['accuracy'], val_stats['precision'], val_stats['recall'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] save_checkpoint(args.logdir, state) writer.add_text('done/str', 'true', state['epoch']) print('done - stopping now') writer.close()