def main(): print('Starting...') data = data_reader(args.target) # solution = LocalSearch(op_idx=2) # solution = VariableLocalSearch(op_idx1=0, op_idx2=2, keep_invariant=1000, keep_invariant_max=2000) # solution = SA(op_idx=0, init_coeff=0.9, init_inner_time=200, stop_temp=1e-2, alpha=0.98) solution = GA(population_size=200, cross_rate=[0.3, 0.5], mutation_rate=[0.1, 0.5], keep_invariant=50) tsp = TSP(solution, data, euclidean_dist) tsp.run(threshhold=args.thresh, savepath=args.savepath, save_freq=args.save_freq, print_freq=args.print_freq, max_iteration=args.max_itr) if args.savepath is not None: generate_gif(args.savepath) plot(args.savepath)
def main(): # import data kwargs = {'num_workers': 2} if FLAGS.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,)) ])), batch_size=FLAGS.batchsize, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,),(0.3081,)) ])), batch_size=FLAGS.batchsize, shuffle=False, **kwargs) # for later analysis we take some sample digits mask = 255. * (np.ones((1, 28, 28))); print(FLAGS.cuda) examples = train_loader.sampler.data_source.data[5:10].numpy() images = np.vstack([mask, examples]); print("We will start training") if not FLAGS.load_pretrained: print('Starting from scratch') fc1_w_init = None fc1_b_init = None fc2_w_init = None fc2_b_init = None fc3_w_init = None fc3_b_init = None else: print('Starting from a pretrained point') ckpt_pret = torch.load('mnist_nn.pt') fc1_w_init = ckpt_pret['fc1.weight'].numpy() fc1_b_init = ckpt_pret['fc1.bias'].numpy() fc2_w_init = ckpt_pret['fc2.weight'].numpy() fc2_b_init = ckpt_pret['fc2.bias'].numpy() fc3_w_init = ckpt_pret['fc3.weight'].numpy() fc3_b_init = ckpt_pret['fc3.bias'].numpy() # build a simple MLP class Net(nn.Module): def __init__(self): super(Net, self).__init__() # activation self.relu = nn.ReLU() # layers self.fc1 = BayesianLayers.LinearGroupNJ(28 * 28, 300, clip_var=0.04, init_weight=fc1_w_init, init_bias=fc1_b_init, cuda=FLAGS.cuda) self.fc2 = BayesianLayers.LinearGroupNJ(300, 100,init_weight=fc2_w_init, init_bias=fc2_b_init, cuda=FLAGS.cuda) self.fc3 = BayesianLayers.LinearGroupNJ(100, 10,init_weight=fc3_w_init, init_bias=fc3_b_init, cuda=FLAGS.cuda) # layers including kl_divergence self.kl_list = [self.fc1, self.fc2, self.fc3] def forward(self, x): x = x.view(-1, 28 * 28) x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.fc3(x) def get_masks(self,thresholds): weight_masks = [] mask = None for i, (layer, threshold) in enumerate(zip(self.kl_list, thresholds)): # compute dropout mask if mask is None: log_alpha = layer.get_log_dropout_rates().cpu().data.numpy() mask = log_alpha < threshold else: mask = np.copy(next_mask) try: log_alpha = layers[i + 1].get_log_dropout_rates().cpu().data.numpy() next_mask = log_alpha < thresholds[i + 1] except: # must be the last mask next_mask = np.ones(10) weight_mask = np.expand_dims(mask, axis=0) * np.expand_dims(next_mask, axis=1) weight_masks.append(weight_mask.astype(np.float)) return weight_masks def kl_divergence(self): KLD = 0 for layer in self.kl_list: KLD += layer.kl_divergence() return KLD # init model model = Net().cuda();print('Loaded model') if FLAGS.cuda: model.cuda() # init optimizer optimizer = optim.Adam(model.parameters()); print('Loaded optimizer') # we optimize the variational lower bound scaled by the number of data # points (so we can keep our intuitions about hyper-params such as the learning rate) discrimination_loss = nn.functional.cross_entropy def objective(output, target, kl_divergence): discrimination_error = discrimination_loss(output, target) variational_bound = discrimination_error + kl_divergence / N if FLAGS.cuda: variational_bound = variational_bound.cuda() return variational_bound def train(epoch): model.train(); print('Entering training block');iter_num=0 for data, target in train_loader: print(iter_num) data, target = data.cuda(),target.cuda(); #import pdb; pdb.set_trace() optimizer.zero_grad() output = model(data) loss = objective(output, target, model.kl_divergence()) loss.backward() optimizer.step();iter_num +=1 # clip the variances after each step for layer in model.kl_list: layer.clip_variances() print('Epoch: {} \tTrain loss: {:.6f} \t'.format( epoch, loss.item())) def test(): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: if FLAGS.cuda: data, target = data.cuda(), target.cuda() output = model(data) test_loss += discrimination_loss(output, target, size_average=False).item() pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('Test loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) # train the model and save some visualisations on the way for epoch in range(1, FLAGS.epochs + 1): print('Now we will train the epoch:'+str(epoch)) train(epoch) test() # visualizations weight_mus = [model.fc1.weight_mu, model.fc2.weight_mu]; #import pdb; pdb.set_trace() log_alphas = [model.fc1.get_log_dropout_rates(), model.fc2.get_log_dropout_rates(), model.fc3.get_log_dropout_rates()] visualise_weights(weight_mus, log_alphas, epoch=epoch,FLAGS=FLAGS) log_alpha = model.fc1.get_log_dropout_rates().cpu().data.numpy() visualize_pixel_importance(images, log_alpha=log_alpha, FLAGS=FLAGS, epoch=str(epoch)) if epoch%3 == 0: if not FLAGS.load_pretrained: torch.save(model.state_dict(), "epoch" + str(epoch) + "bcdl_no_pretrained.pt") else: torch.save(model.state_dict(), "epoch" + str(epoch) + "bcdl_pretrained.pt") if FLAGS.load_pretrained: generate_gif(save='pretrained_pixel', epochs=FLAGS.epochs) generate_gif(save='pretrained_weight0_e', epochs=FLAGS.epochs) generate_gif(save='pretrained_weight1_e', epochs=FLAGS.epochs) else: generate_gif(save='pixel', epochs=FLAGS.epochs) generate_gif(save='weight0_e', epochs=FLAGS.epochs) generate_gif(save='weight1_e', epochs=FLAGS.epochs) # compute compression rate and new model accuracy layers = [model.fc1, model.fc2, model.fc3] thresholds = FLAGS.thresholds compute_compression_rate(layers, model.get_masks(thresholds)) print("Test error after with reduced bit precision:") weights = compute_reduced_weights(layers, model.get_masks(thresholds)) for layer, weight in zip(layers, weights): if FLAGS.cuda: layer.post_weight_mu.data = torch.Tensor(weight).cuda() else: layer.post_weight_mu.data = torch.Tensor(weight) for layer in layers: layer.deterministic = True test()
lines = f.read().split('\n') tasks = [] for line in lines: if line == "": continue print(f"Parsing: {line}") lexer = compiler.Lexer().get() pg = compiler.Parser() pg.parse() parser = pg.get() tasks.append(parser.parse(lexer.lex(line))) # Default values FRAMES = [] FPS = 15 LOOP = 1 output_path = "flipbook.pdf" for task in tasks: if 'fps' in task: FPS = task['fps'] if 'loop' in task: LOOP = task['loop'] if 'frames' in task: FRAMES += task['frames'] if ('gif' in outfile): utils.generate_gif(FPS, LOOP, FRAMES, outfile) elif ('avi' in outfile): utils.generate_video(FPS, LOOP, FRAMES, outfile) print("DONE")
def train(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True dataset = pickle.load( open("data/" + args.expert_file + "_" + str(args.num_sampled), "rb")) dataset.min_reward = 0 dataset.max_reward = 1 action_getter = utils.ActionGetter( atari.env.action_space.n, replay_memory_start_size=REPLAY_MEMORY_START_SIZE, max_frames=MAX_FRAMES, eps_initial=args.initial_exploration) utils.generate_weights(dataset) saver = tf.train.Saver(max_to_keep=10) sess = tf.Session(config=config) sess.run(init) fixed_state = np.expand_dims(atari.fixed_state(sess), axis=0) if args.checkpoint_index >= 0: saver.restore( sess, args.checkpoint_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/" + "model--" + str(args.checkpoint_index)) print( "Loaded Model ... ", args.checkpoint_dir + args.env_id + "seed_" + str(args.seed) + "/" + "model--" + str(args.checkpoint_index)) logger.configure(args.log_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/") if not os.path.exists(args.gif_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/"): os.makedirs(args.gif_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/") if not os.path.exists(args.checkpoint_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/"): os.makedirs(args.checkpoint_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/") frame_number = 0 loss_list = [] epoch = 0 while frame_number < MAX_FRAMES: print("Training Model ...") epoch_frame = 0 start_time = time.time() for j in tqdm(range(EVAL_FREQUENCY // BS)): loss = learn(sess, dataset, MAIN_DQN, TARGET_DQN, BS, gamma=DISCOUNT_FACTOR) # (8★) loss_list.append(loss) # Output the progress: # logger.log("Runing frame number {0}".format(frame_number)) logger.record_tabular("frame_number", frame_number) logger.record_tabular("td loss", np.mean(loss_list[-100:])) q_vals = sess.run(MAIN_DQN.action_prob, feed_dict={MAIN_DQN.input: fixed_state}) for i in range(atari.env.action_space.n): logger.record_tabular("q_val action {0}".format(i), q_vals[0, i]) utils.test_q_values(sess, dataset, atari, action_getter, MAIN_DQN, MAIN_DQN.input, MAIN_DQN.action_prob_expert, BS) print("Current Frame: ", frame_number) print("TD Loss: ", np.mean(loss_list[-100:])) # Evaluation ... gif = True frames_for_gif = [] eval_rewards = [] evaluate_frame_number = 0 print("Evaluating Model.... ") while evaluate_frame_number < EVAL_STEPS: terminal_life_lost = atari.reset(sess, evaluation=True) episode_reward_sum = 0 for _ in range(MAX_EPISODE_LENGTH): # Fire (action 1), when a life was lost or the game just started, # so that the agent does not stand around doing nothing. When playing # with other environments, you might want to change this... action = 1 if terminal_life_lost and args.env_id == "BreakoutDeterministic-v4" else action_getter.get_action( sess, frame_number, atari.state, MAIN_DQN, evaluation=True) processed_new_frame, reward, terminal, terminal_life_lost, new_frame = atari.step( sess, action) evaluate_frame_number += 1 episode_reward_sum += reward if gif: frames_for_gif.append(new_frame) if terminal: eval_rewards.append(episode_reward_sum) gif = False # Save only the first game of the evaluation as a gif break if len(eval_rewards) % 10 == 0: print("Evaluation Completion: ", str(evaluate_frame_number) + "/" + str(EVAL_STEPS)) print("Evaluation score:\n", np.mean(eval_rewards)) try: utils.generate_gif( frame_number, frames_for_gif, eval_rewards[0], args.gif_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/") except IndexError: print("No evaluation game finished") logger.log("Average Evaluation Reward", np.mean(eval_rewards)) logger.log("Average Sequence Length", evaluate_frame_number / len(eval_rewards)) # Save the network parameters saver.save(sess, args.checkpoint_dir + args.env_id + "/" + "seed_" + str(args.seed) + "/" + 'model-', global_step=frame_number) print("Runtime: ", time.time() - start_time) print("Epoch: ", epoch, "Total Frames: ", frame_number) epoch += 1 logger.dumpkvs()
1: 'down', 2: 'right', 3: 'left', } env = FourRoomGridWorld() task = LearnEightPoliciesTileCodingFeat() state = env.reset() frame = env.render(mode=render_mode) frames.append(frame) is_terminal = False s_a = [0, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2] for step in range(len(s_a)): # a = task.select_target_action(state, policy_id=0) # a = np.random.randint(0, 4) a = s_a[step] next_state, r, is_terminal, info = env.step(a) x, y, x_p, y_p, is_rand, selected_action = info.values() print(f'sept:{step}, ' f's({state}):({x},{y}), ' f'a:{actions[a]}, ' f'environment_action: {actions[selected_action]}, ' f's_p({next_state}):({x_p},{y_p}), ' f'stochasticity:{is_rand}, ' f'terminal:{is_terminal}') state = next_state frame = env.render(mode=render_mode) frames.append(frame) if is_terminal: break utils.generate_gif(frames, 'fourRoomGridWorld.gif')
def main(): # import data kwargs = {'num_workers': 1, 'pin_memory': True} if FLAGS.cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), lambda x: 2 * (x - 0.5), ])), batch_size=FLAGS.batchsize, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, transform=transforms.Compose([ transforms.ToTensor(), lambda x: 2 * (x - 0.5), ])), batch_size=FLAGS.batchsize, shuffle=True, **kwargs) # for later analysis we take some sample digits mask = 255. * (np.ones((1, 28, 28))) examples = train_loader.sampler.data_source.train_data[0:5].numpy() images = np.vstack([mask, examples]) # build a simple MLP class Net(nn.Module): def __init__(self): super(Net, self).__init__() # activation self.relu = nn.ReLU() # layers self.fc1 = BayesianLayers.LinearGroupNJ(28 * 28, 300, clip_var=0.04, cuda=FLAGS.cuda) self.fc2 = BayesianLayers.LinearGroupNJ(300, 100, cuda=FLAGS.cuda) self.fc3 = BayesianLayers.LinearGroupNJ(100, 10, cuda=FLAGS.cuda) # layers including kl_divergence self.kl_list = [self.fc1, self.fc2, self.fc3] def forward(self, x): x = x.view(-1, 28 * 28) x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.fc3(x) def get_masks(self, thresholds): weight_masks = [] mask = None for i, (layer, threshold) in enumerate(zip(self.kl_list, thresholds)): # compute dropout mask if mask is None: log_alpha = layer.get_log_dropout_rates().cpu().data.numpy( ) mask = log_alpha < threshold else: mask = np.copy(next_mask) try: log_alpha = layers[ i + 1].get_log_dropout_rates().cpu().data.numpy() next_mask = log_alpha < thresholds[i + 1] except: # must be the last mask next_mask = np.ones(10) weight_mask = np.expand_dims(mask, axis=0) * np.expand_dims( next_mask, axis=1) weight_masks.append(weight_mask.astype(np.float)) return weight_masks def kl_divergence(self): KLD = 0 for layer in self.kl_list: KLD += layer.kl_divergence() return KLD # init model model = Net() if FLAGS.cuda: model.cuda() # init optimizer optimizer = optim.Adam(model.parameters()) # we optimize the variational lower bound scaled by the number of data # points (so we can keep our intuitions about hyper-params such as the learning rate) discrimination_loss = nn.functional.cross_entropy def objective(output, target, kl_divergence): discrimination_error = discrimination_loss(output, target) variational_bound = discrimination_error + kl_divergence / N if FLAGS.cuda: variational_bound = variational_bound.cuda() return variational_bound def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if FLAGS.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = objective(output, target, model.kl_divergence()) loss.backward() optimizer.step() # clip the variances after each step for layer in model.kl_list: layer.clip_variances() print('Epoch: {} \tTrain loss: {:.6f} \t'.format(epoch, loss.data[0])) def test(): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if FLAGS.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += discrimination_loss(output, target, size_average=False).data[0] pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('Test loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) # train the model and save some visualisations on the way for epoch in range(1, FLAGS.epochs + 1): train(epoch) test() # visualizations weight_mus = [model.fc1.weight_mu, model.fc2.weight_mu] log_alphas = [ model.fc1.get_log_dropout_rates(), model.fc2.get_log_dropout_rates(), model.fc3.get_log_dropout_rates() ] visualise_weights(weight_mus, log_alphas, epoch=epoch) log_alpha = model.fc1.get_log_dropout_rates().cpu().data.numpy() visualize_pixel_importance(images, log_alpha=log_alpha, epoch=str(epoch)) generate_gif(save='pixel', epochs=FLAGS.epochs) generate_gif(save='weight0_e', epochs=FLAGS.epochs) generate_gif(save='weight1_e', epochs=FLAGS.epochs) # compute compression rate and new model accuracy layers = [model.fc1, model.fc2, model.fc3] thresholds = FLAGS.thresholds compute_compression_rate(layers, model.get_masks(thresholds)) print("Test error after with reduced bit precision:") weights = compute_reduced_weights(layers, model.get_masks(thresholds)) for layer, weight in zip(layers, weights): if FLAGS.cuda: layer.post_weight_mu.data = torch.Tensor(weight).cuda() else: layer.post_weight_mu.data = torch.Tensor(weight) for layer in layers: layer.deterministic = True test()