Exemple #1
0
    def trainer(self):

        steps = 0

        loss_deque = deque(maxlen=100)
        train_loss = []

        last_epoch = 0
        if self.opts.resume:
            last_epoch, loss = self.load_progress()

        for e in range(self.opts.epoch - last_epoch):
            '''Adaptive LR Change'''
            for param_group in self.RNN_optim.param_groups:
                param_group['lr'] = util.linear_LR(e, self.opts)
                print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr']))

            if self.opts.save_progress:
                '''Save the progress before start adjusting the LR'''
                if e == self.opts.const_epoch:
                    self.save_progress(self.opts.const_epoch,
                                       np.mean(loss_deque))

                if e % self.opts.save_every == 0:
                    self.save_progress(e, np.mean(loss_deque))

            for data, labels, lengths in self.data_loader:
                steps += 1

                data, labels, lengths = util.sort_batch(data, labels, lengths)

                #data = data[:lengths]
                #labels = labels[:lengths]

                self.RNN_optim.zero_grad()

                loss = self.RNN(data.to(device), labels.to(device),
                                lengths.to(device))

                loss.backward()
                self.RNN_optim.step()

                loss_deque.append(loss.cpu().item())
                train_loss.append(np.mean(loss_deque))

                if steps % self.opts.print_every == 0:
                    print('Epoch: {}, Steps: {}, Loss: {:.4}'.format(
                        e, steps, loss.item()))
                    util.raw_score_plotter(train_loss)

        if self.opts.save_progress:
            '''Save the progress before start adjusting the LR'''
            self.save_progress(-1, np.mean(loss_deque))

        util.raw_score_plotter(train_loss)
Exemple #2
0
    def train(self):
        steps = 0

        loss_deque = deque(maxlen=100)
        train_loss = []

        for e in range(self.opts.epoch):
            running_loss = 0

            correct = 0
            total = 0

            for images, labels in iter(self.trainloader):
                steps += 1

                output = self.model(images.to(device))

                self.optimizer.zero_grad()
                loss = self.criterion(output,
                                      labels.to(device).long().squeeze(1))
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()

                pred = torch.max(output, 1)[1]

                for pred, label in zip(pred, labels):
                    if pred.cpu().item() == label.item():
                        correct += 1
                    total += 1

                loss_deque.append(loss.cpu().item())
                train_loss.append(np.mean(loss_deque))

                if steps % self.opts.print_every == 0:
                    print(
                        "Epoch: {}/{}...".format(e + 1, self.opts.epoch),
                        "LossL {:.4f}".format(running_loss /
                                              self.opts.print_every),
                        "Running Accuracy {:4f}".format(correct /
                                                        np.float(total)))

                    running_loss = 0

        util.raw_score_plotter(train_loss)
Exemple #3
0
    def train(self, D, D_solver, criterion, dataloader):

        steps = 0
        loss_deque = deque(maxlen=100)
        train_loss = []

        for epoch in range(self.opts.epoch):
            running_loss = 0
            correct = 0
            total = 0

            for x, y in dataloader:
                if len(x) != self.opts.batch:
                    continue
                D_solver.zero_grad()

                '''Real Images'''
                output = D(x.to(device))  # returns logit of real data.
                #print(output.shape)

                loss = criterion(output, y.to(device))
                loss.backward()
                D_solver.step()  # One step Descent into loss

                correct_, total_ = util.prediction_accuracy(output, y.to(device))

                correct += correct_
                total += total_

                loss_deque.append(loss.cpu().item())
                train_loss.append(np.mean(loss_deque))

                if steps % self.opts.print_every == 0:
                    print("Epoch: {}/{}...".format(epoch + 1, self.opts.epoch),
                          "LossL {:.4f}".format(running_loss / self.opts.print_every),
                          "Running Accuracy {:4f}".format(correct / np.float(total)))

                    running_loss = 0

        util.raw_score_plotter(train_loss)
        if self.opts.save_progress:
            print('\nSaving the model\n')
            torch.save(D.state_dict(), self.opts.model_path)
Exemple #4
0
def trainer(opts, RNN, RNN_optim, criterion, data_loader):

    steps = 0
    for e in range(opts.epoch):

        for data, labels, lengths in data_loader:
            steps += 1

            data, labels, lengths = util.sort_batch(data, labels, lengths)

            RNN_optim.zero_grad()
            pred = RNN(data, lengths)
            loss = criterion(pred, labels.to(device))
            loss.backward()
            RNN_optim.step()

            loss_deque.append(loss.cpu().item())
            train_loss.append(np.mean(loss_deque))

            if steps % opts.print_every == 0:
                print('Epoch: {}, Steps: {}, Loss: {:.4},'.format(
                    e, steps, loss.item()))

    util.raw_score_plotter(train_loss)
Exemple #5
0
def trainer(opts, RNN, RNN_optim, criterion, loader):

    last_100_loss = deque(maxlen=100)
    last_100_g_loss = []

    iter_count = 0

    for epoch in range(opts.epoch):

        for param_group in RNN_optim.param_groups:
            param_group['lr'] = util.linear_LR(epoch, opts)
            print('Epoch: {}, D_LR: {:.4}'.format(epoch, param_group['lr']))

        for image, label in loader:
            '''Images'''
            image = image.view(-1, 28, 28)
            image = image.to(device)

            label = label.to(device)
            '''run the data through RNN'''
            output = RNN(image)
            loss = criterion(output, label)
            '''take a gradient step'''
            RNN_optim.zero_grad()
            loss.backward()
            RNN_optim.step()  # One step Descent into loss
            '''plot the loss'''
            last_100_loss.append(loss.item())
            last_100_g_loss.append(np.mean(last_100_loss))
            util.raw_score_plotter(last_100_g_loss)
            '''Train Generator'''
            iter_count += 1

            if iter_count % opts.print_every == 0:
                print('Epoch: {}, Iter: {}, Loss: {:.4},'.format(
                    epoch, iter_count, loss.item()))
Exemple #6
0
def train_CycleGan(train_step,
                   loss,
                   reconstruction,
                   show_every=opts.show_every,
                   print_every=opts.print_every,
                   batch_size=128,
                   num_epoch=10):
    """
    function that trains VAE.
    :param train_step: an op that defines what to do with the loss function (minimize or maximize)
    :param loss: an op that defines the loss function to be minimized
    :param reconstruction: an op that defines how to reconstruct a target image
    :param show_every: how often to show an image to gauge training progress
    :param print_every: how often to print loss
    :param batch_size: batch size of training samples
    :param num_epoch: how many times to iterate over the training samples
    :return:
    """

    image_dir = '/home/youngwook/Downloads/edges2shoes'
    folder_names = get_folders(image_dir)

    train_folder = folder_names[2]
    val_folder = folder_names[1]

    train_data = AB_Combined_ImageLoader(train_folder,
                                         size=opts.resize,
                                         num_images=opts.num_images,
                                         randomcrop=opts.image_shape)
    train_loader = DataLoader(train_data,
                              batch_size=opts.batch,
                              shuffle=True,
                              num_workers=12)

    step = 0
    target_pred_list = []
    input_pred_list = []
    input_true_list = []
    target_true_list = []
    last_100_loss_dq = deque(maxlen=100)
    last_100_loss = []

    checkpoint_dir = './model'
    saver = tf.train.Saver()

    if opts.resume:
        #print('Loading Saved Checkpoint')
        tf_util.load_session(checkpoint_dir,
                             saver,
                             session,
                             model_name=opts.model_name)

    for epoch in range(num_epoch):
        # every show often, show a sample result

        lr = util.linear_LR(epoch, opts)

        for (minibatch, minbatch_y) in train_loader:
            # run a batch of data through the network
            # logits= sess.run(logits_real, feed_dict={x:minibatch})

            target_pred, input_pred = session.run(
                [target_image_prediction, input_image_prediction],
                feed_dict={
                    input_image: minibatch,
                    target_image: minbatch_y,
                    adaptive_lr: lr
                })

            input_memory.append(input_pred)
            target_memory.append(target_pred)

            target_replay_images = np.vstack(target_memory)
            input_replay_images = np.vstack(input_memory)

            #train the Generator
            _, G_loss_curr = session.run(
                [train_step[2], loss[1]],
                feed_dict={
                    input_image: minibatch,
                    target_image: minbatch_y,
                    input_replay: input_replay_images,
                    target_replay: target_replay_images,
                    input_image_pred: input_pred,
                    target_image_pred: target_pred,
                    adaptive_lr: lr
                })

            #train the discriminator
            _, D_loss_curr = session.run(
                [train_step[0], loss[0][0]],
                feed_dict={
                    input_image: minibatch,
                    input_replay: input_replay_images,
                    adaptive_lr: lr
                })
            _, D_loss_curr = session.run(
                [train_step[1], loss[0][1]],
                feed_dict={
                    target_image: minbatch_y,
                    target_replay: target_replay_images,
                    adaptive_lr: lr
                })

            last_100_loss_dq.append(G_loss_curr)
            last_100_loss.append(np.mean(last_100_loss_dq))

            step += 1
            if step % show_every == 0:
                '''for every show_every step, show reconstructed images from the training iteration'''

                target_name = './img/target_pred_%s.png' % step
                input_name = './img/input_pred_%s.png' % step
                input_true_name = './img/true_input_%s.png' % step
                target_true_name = './img/true_target_%s.png' % step

                #translate the image
                target_pred, input_pred = session.run(
                    [target_image_prediction, input_image_prediction],
                    feed_dict={
                        input_image: minibatch,
                        target_image: minbatch_y
                    })

                target_pred_list.append(target_name)
                input_pred_list.append(input_name)
                input_true_list.append(input_true_name)
                target_true_list.append(target_true_name)

                util.show_images(target_pred[:opts.batch], opts, target_name)
                util.plt.show()
                util.show_images(minbatch_y[:opts.batch], opts,
                                 target_true_name)
                util.plt.show()

                util.show_images(input_pred[:opts.batch], opts, input_name)
                util.plt.show()
                util.show_images(minibatch[:opts.batch], opts, input_true_name)
                util.plt.show()

            if step % print_every == 0:
                print('Epoch: {}, D: {:.4}'.format(epoch, G_loss_curr))
                util.raw_score_plotter(last_100_loss)

        #save the model after every epoch
        if opts.save_progress:
            tf_util.save_session(saver,
                                 session,
                                 checkpoint_dir,
                                 epoch,
                                 model_name=opts.model_name)

    util.raw_score_plotter(last_100_loss)

    image_to_gif('', target_pred_list, duration=0.5, gifname='target_pred')
    image_to_gif('', input_pred_list, duration=0.5, gifname='input_pred')
    image_to_gif('', input_true_list, duration=0.5, gifname='input_true')
    image_to_gif('', target_true_list, duration=0.5, gifname='target_true')
Exemple #7
0
        #a deque object used to hold values for the last 100 scores
        total_reward_window.append(rewards)

        #a list that holds the average score of the last 100 episodes
        avg_score_last_100.append(np.mean(total_reward_window))

        #total_reward holds all the rewards
        total_reward.append(rewards)

        #epsilon is decayed after every episode
        epsilon = max(min_epsilon, epsilon * decay)

        print('\rEpisode {}\tAverage Score: {:.3f}\tScore: {:.3f}'.format(episodes, avg_score_last_100[-1], rewards), end="")
        if episodes % PRINT_EVERY == 0:
            print('\rEpisode {}\tAverage Score: {:.3f}\tScore: {:.3f}'.format(episodes, avg_score_last_100[-1], rewards))

        if avg_score_last_100[-1] >= threshold:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.3f}\tScore: {:3f}'.format(episodes - 100, avg_score_last_100[-1], rewards))
            torch.save(agent.local_model.state_dict(), 'successful_model.pth')
            break

    return total_reward, avg_score_last_100

num_episodes = 10000
threshold = 13

scores, avg_last_100 = DQN(num_episodes, threshold)

raw_score_plotter(scores)
plotter(env_name, len(scores), avg_last_100, threshold)
Exemple #8
0
    def train(self):
        """
        Vanilla GAN Trainer

        :param D: Discriminator
        :param G: Generator
        :param D_solver: Optimizer for D
        :param G_solver: Optimizer for G
        :param discriminator_loss:  Loss for D
        :param generator_loss:  Loss for G
        :param loader: Torch dataloader
        :param show_every: Show samples after every show_every iterations
        :param batch_size: Batch Size used for training
        :param noise_size: Dimension of the noise to use as input for G
        :param num_epochs: Number of epochs over the training dataset to use for training
        :return:
        """
        last_100_loss = deque(maxlen=100)
        last_100_g_loss = []

        iter_count = 0

        last_epoch = 0
        if self.opts.resume:
            last_epoch, loss = self.load_progress()

        for epoch in range(self.opts.epoch - last_epoch):
            '''Adaptive LR Change'''
            for param_group in self.D_solver.param_groups:
                param_group['lr'] = util.linear_LR(epoch, self.opts)
                print('epoch: {}, D_LR: {:.4}'.format(epoch,
                                                      param_group['lr']))

            if self.opts.save_progress:
                '''Save the progress before start adjusting the LR'''
                if epoch == self.opts.const_epoch:
                    self.save_progress(self.opts.const_epoch,
                                       np.mean(last_100_loss))

            for image, label in self.loader:
                '''Real Images'''
                image = image.to(device)
                '''one hot encode the real label'''

                label = label.float().to(device)
                '''Train Discriminator'''
                '''Get the logits'''

                real_logits_cls = self.D(image.to(device))

                loss = self.opts.cls_lambda * F.binary_cross_entropy_with_logits(
                    real_logits_cls, label,
                    reduction='sum') / real_logits_cls.size(0)

                self.D_solver.zero_grad()
                loss.backward()
                self.D_solver.step()  # One step Descent into loss
                '''Train Generator'''
                iter_count += 1

                last_100_loss.append(loss.cpu().item())
                last_100_g_loss.append(np.mean(last_100_loss))

                if iter_count % self.opts.print_every == 0:
                    print('Epoch: {}, Iter: {}, D: {:.4} '.format(
                        epoch, iter_count, loss.item()))
                    util.raw_score_plotter(last_100_g_loss)

                if self.opts.save_progress:
                    if iter_count % self.opts.save_every == 0:
                        self.save_progress(epoch, np.mean(last_100_loss))

        if self.opts.save_progress:
            '''Save the progress before start adjusting the LR'''
            self.save_progress(-1, np.mean(last_100_loss))
def main():
    seeding()
    # number of parallel agents

    env = UnityEnvironment(file_name="Tennis.x86_64")
    env_name = 'Tennis'

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    # number of agents
    num_agents = len(env_info.agents)

    # size of each action
    action_size = brain.vector_action_space_size

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[-1]

    # number of training episodes.
    # change this to higher number to experiment. say 30000.
    number_of_episodes = 10000
    episode_length = 10000
    batchsize = 128

    # amplitude of OU noise
    # this slowly decreases to 0
    noise = 1
    noise_reduction = 0.9999

    log_path = os.getcwd() + "/log"
    model_dir = os.getcwd() + "/model_dir"

    os.makedirs(model_dir, exist_ok=True)

    # initialize memory buffer
    buffer = ReplayBuffer(int(500000), batchsize, 0)

    # initialize policy and critic
    maddpg = MADDPG(state_size,
                    action_size,
                    num_agents,
                    seed=12345,
                    discount_factor=0.95,
                    tau=0.02)

    #how often to update the MADDPG model
    episode_per_update = 2
    # training loop

    PRINT_EVERY = 5
    scores_deque = deque(maxlen=100)

    # holds raw scores
    scores = []
    # holds avg scores of last 100 epsiodes
    avg_last_100 = []

    threshold = 0.5

    # use keep_awake to keep workspace from disconnecting
    for episode in range(number_of_episodes):

        env_info = env.reset(
            train_mode=True)[brain_name]  # reset the environment
        state = env_info.vector_observations  # get the current state (for each agent)
        episode_reward_agent0 = 0
        episode_reward_agent1 = 0

        for agent in maddpg.maddpg_agent:
            agent.noise.reset()

        for episode_t in range(episode_length):

            actions = maddpg.act(torch.tensor(state, dtype=torch.float),
                                 noise=noise)
            noise *= noise_reduction

            actions_array = torch.stack(actions).detach().numpy()

            env_info = env.step(actions_array)[brain_name]
            next_state = env_info.vector_observations

            reward = env_info.rewards
            done = env_info.local_done

            episode_reward_agent0 += reward[0]
            episode_reward_agent1 += reward[1]
            # add data to buffer
            '''
            I can either hstack or concat two states here or do it in the update function in MADDPG
            However I think it's easier to do it here, since in the update function I have batch_size to deal with
            Although the replay buffer would have to hold more data by preprocessing and creating 2 new variables that 
            hold essentially the same info as state, and next_state, but just concatenated.
            '''
            full_state = np.concatenate((state[0], state[1]))
            full_next_state = np.concatenate((next_state[0], next_state[1]))

            buffer.add(state, full_state, actions_array, reward, next_state,
                       full_next_state, done)

            state = next_state

            # update once after every episode_per_update
            if len(buffer) > batchsize and episode % episode_per_update == 0:
                for i in range(num_agents):
                    samples = buffer.sample()
                    maddpg.update(samples, i)
                maddpg.update_targets(
                )  # soft update the target network towards the actual networks

            if np.any(done):
                #if any of the agents are done break
                break

        episode_reward = max(episode_reward_agent0, episode_reward_agent1)
        scores.append(episode_reward)
        scores_deque.append(episode_reward)
        avg_last_100.append(np.mean(scores_deque))
        # scores.append(episode_reward)
        print('\rEpisode {}\tAverage Score: {:.4f}\tScore: {:.4f}'.format(
            episode, avg_last_100[-1], episode_reward),
              end="")

        if episode % PRINT_EVERY == 0:
            print('\rEpisode {}\tAverage Score: {:.4f}'.format(
                episode, avg_last_100[-1]))

        # saving successful model
        #training ends when the threshold value is reached.
        if avg_last_100[-1] >= threshold:
            save_dict_list = []

            for i in range(num_agents):
                save_dict = {
                    'actor_params':
                    maddpg.maddpg_agent[i].actor.state_dict(),
                    'actor_optim_params':
                    maddpg.maddpg_agent[i].actor_optimizer.state_dict(),
                    'critic_params':
                    maddpg.maddpg_agent[i].critic.state_dict(),
                    'critic_optim_params':
                    maddpg.maddpg_agent[i].critic_optimizer.state_dict()
                }
                save_dict_list.append(save_dict)

                torch.save(
                    save_dict_list,
                    os.path.join(model_dir, 'episode-{}.pt'.format(episode)))
            # plots graphs
            raw_score_plotter(scores)
            plotter(env_name, len(scores), avg_last_100, threshold)
            break
Exemple #10
0
    def trainer(self):

        steps = 0
        correct = 0
        total = 0

        loss_deque = deque(maxlen=100)
        train_loss = []

        last_epoch = 0
        if self.opts.resume:
            last_epoch, loss = self.load_progress()

        for e in range(self.opts.epoch - last_epoch):
            '''Adaptive LR Change'''
            for param_group in self.RNN_optim.param_groups:
                param_group['lr'] = util.linear_LR(e, self.opts)
                print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr']))

            if self.opts.save_progress:
                '''Save the progress before start adjusting the LR'''
                if e == self.opts.const_epoch:
                    self.save_progress(self.opts.const_epoch,
                                       np.mean(loss_deque))

                if e % self.opts.save_every == 0:
                    self.save_progress(e, np.mean(loss_deque))

            for data, labels, lengths in self.data_loader:
                steps += 1

                data, labels, lengths = util.sort_batch(data, labels, lengths)

                self.RNN_optim.zero_grad()
                pred = self.RNN(data, lengths)
                loss = self.criterion(pred, labels.to(device))
                loss.backward()
                self.RNN_optim.step()

                # pick the argmax
                output = torch.max(pred, 1)[1]

                for output, label in zip(output, labels):
                    if output.cpu().item() == label.item():
                        correct += 1
                    total += 1

                loss_deque.append(loss.cpu().item())
                train_loss.append(np.mean(loss_deque))

                if steps % self.opts.print_every == 0:
                    print(
                        'Epoch: {}, Steps: {}, Loss: {:.4}, Train Accuracy {:4}, '
                        .format(e, steps, loss.item(), correct / float(total)))
                    correct = 0
                    total = 0
                    util.raw_score_plotter(train_loss)

        if self.opts.save_progress:
            '''Save the progress before start adjusting the LR'''
            self.save_progress(-1, np.mean(loss_deque))

        util.raw_score_plotter(train_loss)