Exemple #1
0
def main():
    rand.seed()
    #env = gym.make('Asteroids-v0')
    env = gym.make('Breakout-v0')
    num_actions = env.action_space.n
    #print(num_actions)
    
    plotter = LossAccPlotter(title="mem256_perGame",show_acc_plot=True,save_to_filepath="/mem256_perGame.png",show_loss_plot=False)
    plotter2 = LossAccPlotter(title="mem256_100",show_acc_plot=True,save_to_filepath="/mem256_100.png",show_loss_plot=False)
    #plotter.save_plot("./mem256.png")
    
    observation = env.reset()
    #observation = downsample(observation)
    #reward = 0
    action = 0
    total_reward = 0
    total_reward2 = 0
    env.render()
    prev_obs = []
    curr_obs = []
    D = []
    step = 0
    rate = 1
    sess, output_net, x, cost, trainer, mask, reward, nextQ = initialize()
    #load(sess)
    startPrinting = False
    for i in range(5):
        observation, rw, done, info = env.step(action)  # pass in 0 for action
        observation = convert_to_small_and_grayscale(observation)
        prev_obs = deepcopy(curr_obs)
        curr_obs = obsUpdate(curr_obs,observation)
        #e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)]
        #D.append(e)
        action = 1
        #print(i)
    print("Entering mini-loop")
    for _ in range(10):
        step +=1
        #print(step)
        if done:
            observation = env.reset()
        if (len(D) > 256):
            D.pop()
        if step % 1000 == 0:
            rate = rate / 2
            if (rate < 0.05):
                rate=0.05
        #if step % 1000 == 0:
            #save(sess)
        action = magic(curr_obs, sess, output_net, x,step,rate, False) #change this to just take in curr_obs, sess, and False
        #action = env.action_space.sample()
        env.render()
        observation, rw, done, info = env.step(action) # take a random action
        #print(action, rw, step)
        observation = convert_to_small_and_grayscale(observation)
        e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)]
        D.append(e)
        prev_obs = deepcopy(curr_obs)
        curr_obs = obsUpdate(curr_obs,observation)
    print("Entering full loop")
    while step < 10001:
        step +=1
        
        #print(step)
        if done:
            print("saving to plot....")
            plot_reward = total_reward
            plotter.add_values(step, acc_train=plot_reward)
            total_reward = 0
            observation = env.reset()
        if (len(D) > 256):
            D.pop()
        if step % 100 == 0:
            print(step,"steps have passed")
            save(sess, step)
            rate = rate / 2
            startPrinting = True
            if (rate < 0.05):
                rate=0.05
            print("saving to plot2....")
            plot_reward = total_reward2/100
            plotter2.add_values(step, acc_train=plot_reward)
            total_reward2 = 0
            #print(step,"steps have passed")
        if step % 500 == 0:
            plotter.save_plot("./mem256_perGame.png")
            plotter2.save_plot("./mem256_100.png")
        action = magic(curr_obs, sess, output_net, x,step,rate, startPrinting) #change this to just take in curr_obs, sess, and False
        #action = env.action_space.sample()
        env.render()
        observation, rw, done, info = env.step(action) # take a random action
        #print(action, rw, step)
        observation = convert_to_small_and_grayscale(observation)
        e = [rw, action, deepcopy(prev_obs), deepcopy(curr_obs)]
        D.insert(0,e)
        prev_obs = deepcopy(curr_obs)
        curr_obs = obsUpdate(curr_obs,observation)
        update_q_function(D, sess, output_net, x, cost, trainer, mask, reward, nextQ)
        total_reward = total_reward + rw
        total_reward2 = total_reward2 + rw
    plotter.block()
    plotter2.block()
def train_model(num_tasks,
                models,
                dataloaders,
                dataset_sizes,
                criterion,
                optimizers,
                schedulers,
                epochs=15):
    since = time.time()
    num_tasks = num_tasks
    use_gpu = torch.cuda.is_available()
    final_outputs = [
    ]  #this is the variable to accumulate the outputs of all columns for each task
    middle_outputs = [
    ]  #this is the variable for keeping outputs of current task in each column

    #we iterate for each task
    for task_id in range(num_tasks):
        #everytime we do a new task, we empty the final outputs
        final_outputs[:] = []

        # we now iterate for each previous column until the one of our task
        for i in range(0, task_id + 1):
            #we save the weights with best results
            best_model_wts = copy.deepcopy(models[task_id][i].state_dict())
            model = models[task_id][i]
            optimizer = optimizers[i]
            scheduler = schedulers[i]
            # if it's not the column corresponding to the task, do not train
            if task_id != i:
                #this is the case for "previous" columns so we only need to pass data, not train
                num_epochs = 1
                middle_outputs[:] = []
            else:
                num_epochs = epochs
            dataloader = dataloaders[i]
            dataset_size = dataset_sizes[i]
            best_acc = 0.0
            # let's add a plotter for loss and accuracy
            plotter = LossAccPlotter()
            for epoch in range(num_epochs):
                print('Epoch {}/{}'.format(epoch, num_epochs - 1))
                print('-' * 10)
                # Each epoch has a training and validation phase
                for phase in ['train', 'val']:
                    if phase == 'train':
                        scheduler.step()
                        model.train(True)  # Set model to training mode
                    else:
                        model.train(False)  # Set model to evaluate mode

                    running_loss = 0.0
                    running_corrects = 0

                    # Iterate over data.
                    #tqdm shows a progression bar
                    for data in tqdm(dataloader[phase]):
                        # get the inputs
                        inputs, labels = data
                        if inputs.type() != int and labels.type() != int:
                            # wrap them in Variable
                            if use_gpu:
                                inputs = Variable(inputs.cuda())
                                labels = Variable(labels.cuda())

                            else:
                                inputs, labels = Variable(inputs), Variable(
                                    labels)

                            # zero the parameter gradients
                            optimizer.zero_grad()

                            # forward
                            outputs, middle_outputs = model(
                                inputs, final_outputs)
                            #we save the outputs of this column in middle outputs and we have previous columns in final
                            _, preds = torch.max(outputs.data, 1)
                            loss = criterion(outputs, labels)

                            # backward + optimize only if in training phase
                            if phase == 'train':
                                loss.backward()
                                optimizer.step()

                            # statistics
                            running_loss += loss.data[0] * inputs.size(0)
                            running_corrects += torch.sum(preds == labels.data)

                    epoch_loss = running_loss / dataset_size[phase]
                    epoch_acc = running_corrects / dataset_size[phase]

                    print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                        phase, epoch_loss, epoch_acc))

                    if phase == 'train':
                        plotter.add_values(epoch,
                                           loss_train=epoch_loss,
                                           acc_train=epoch_acc,
                                           redraw=False)
                    else:
                        plotter.add_values(epoch,
                                           loss_val=epoch_loss,
                                           acc_val=epoch_acc,
                                           redraw=False)
                    # deep copy the model
                    if phase == 'val' and epoch_acc > best_acc:
                        best_acc = epoch_acc
                        best_model_wts = copy.deepcopy(model.state_dict())

                print()
                plotter.redraw()
            if task_id != i:
                final_outputs.append(middle_outputs)
                #we add the ouput of this column to final outputs

        plotter.save_plot('plots%d.%d.png' % (task_id, i))
        print('Best val Acc: {:4f}'.format(best_acc))

        # load best model weights
        models[task_id][i].load_state_dict(best_model_wts)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return models