def main(): rand.seed() #env = gym.make('Asteroids-v0') env = gym.make('Breakout-v0') num_actions = env.action_space.n #print(num_actions) plotter = LossAccPlotter(title="mem256_perGame",show_acc_plot=True,save_to_filepath="/mem256_perGame.png",show_loss_plot=False) plotter2 = LossAccPlotter(title="mem256_100",show_acc_plot=True,save_to_filepath="/mem256_100.png",show_loss_plot=False) #plotter.save_plot("./mem256.png") observation = env.reset() #observation = downsample(observation) #reward = 0 action = 0 total_reward = 0 total_reward2 = 0 env.render() prev_obs = [] curr_obs = [] D = [] step = 0 rate = 1 sess, output_net, x, cost, trainer, mask, reward, nextQ = initialize() #load(sess) startPrinting = False for i in range(5): observation, rw, done, info = env.step(action) # pass in 0 for action observation = convert_to_small_and_grayscale(observation) prev_obs = deepcopy(curr_obs) curr_obs = obsUpdate(curr_obs,observation) #e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)] #D.append(e) action = 1 #print(i) print("Entering mini-loop") for _ in range(10): step +=1 #print(step) if done: observation = env.reset() if (len(D) > 256): D.pop() if step % 1000 == 0: rate = rate / 2 if (rate < 0.05): rate=0.05 #if step % 1000 == 0: #save(sess) action = magic(curr_obs, sess, output_net, x,step,rate, False) #change this to just take in curr_obs, sess, and False #action = env.action_space.sample() env.render() observation, rw, done, info = env.step(action) # take a random action #print(action, rw, step) observation = convert_to_small_and_grayscale(observation) e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)] D.append(e) prev_obs = deepcopy(curr_obs) curr_obs = obsUpdate(curr_obs,observation) print("Entering full loop") while step < 10001: step +=1 #print(step) if done: print("saving to plot....") plot_reward = total_reward plotter.add_values(step, acc_train=plot_reward) total_reward = 0 observation = env.reset() if (len(D) > 256): D.pop() if step % 100 == 0: print(step,"steps have passed") save(sess, step) rate = rate / 2 startPrinting = True if (rate < 0.05): rate=0.05 print("saving to plot2....") plot_reward = total_reward2/100 plotter2.add_values(step, acc_train=plot_reward) total_reward2 = 0 #print(step,"steps have passed") if step % 500 == 0: plotter.save_plot("./mem256_perGame.png") plotter2.save_plot("./mem256_100.png") action = magic(curr_obs, sess, output_net, x,step,rate, startPrinting) #change this to just take in curr_obs, sess, and False #action = env.action_space.sample() env.render() observation, rw, done, info = env.step(action) # take a random action #print(action, rw, step) observation = convert_to_small_and_grayscale(observation) e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)] D.insert(0,e) prev_obs = deepcopy(curr_obs) curr_obs = obsUpdate(curr_obs,observation) update_q_function(D, sess, output_net, x, cost, trainer, mask, reward, nextQ) total_reward = total_reward + rw total_reward2 = total_reward2 + rw plotter.block() plotter2.block()
def train_model(num_tasks, models, dataloaders, dataset_sizes, criterion, optimizers, schedulers, epochs=15): since = time.time() num_tasks = num_tasks use_gpu = torch.cuda.is_available() final_outputs = [ ] #this is the variable to accumulate the outputs of all columns for each task middle_outputs = [ ] #this is the variable for keeping outputs of current task in each column #we iterate for each task for task_id in range(num_tasks): #everytime we do a new task, we empty the final outputs final_outputs[:] = [] # we now iterate for each previous column until the one of our task for i in range(0, task_id + 1): #we save the weights with best results best_model_wts = copy.deepcopy(models[task_id][i].state_dict()) model = models[task_id][i] optimizer = optimizers[i] scheduler = schedulers[i] # if it's not the column corresponding to the task, do not train if task_id != i: #this is the case for "previous" columns so we only need to pass data, not train num_epochs = 1 middle_outputs[:] = [] else: num_epochs = epochs dataloader = dataloaders[i] dataset_size = dataset_sizes[i] best_acc = 0.0 # let's add a plotter for loss and accuracy plotter = LossAccPlotter() for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. #tqdm shows a progression bar for data in tqdm(dataloader[phase]): # get the inputs inputs, labels = data if inputs.type() != int and labels.type() != int: # wrap them in Variable if use_gpu: inputs = Variable(inputs.cuda()) labels = Variable(labels.cuda()) else: inputs, labels = Variable(inputs), Variable( labels) # zero the parameter gradients optimizer.zero_grad() # forward outputs, middle_outputs = model( inputs, final_outputs) #we save the outputs of this column in middle outputs and we have previous columns in final _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.data[0] * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / dataset_size[phase] epoch_acc = running_corrects / dataset_size[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) if phase == 'train': plotter.add_values(epoch, loss_train=epoch_loss, acc_train=epoch_acc, redraw=False) else: plotter.add_values(epoch, loss_val=epoch_loss, acc_val=epoch_acc, redraw=False) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() plotter.redraw() if task_id != i: final_outputs.append(middle_outputs) #we add the ouput of this column to final outputs plotter.save_plot('plots%d.%d.png' % (task_id, i)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights models[task_id][i].load_state_dict(best_model_wts) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) return models