Ejemplo n.º 1
0
def main():
    from tools import plot_data
    print
    print
    print("\t \t \033[1m Creating Plot for Antidunes \033[0m")
    print
    print
    # TODO: Implement choice as to whether density should be calculated or read rom file
    # FIXME: afjdkajfa fjljk
    plot_data(save_fig=0, plot_density=1)
    print("\t\t\t \033[1m D O N E !\033[0m")
Ejemplo n.º 2
0
    def visualisation(self, data):
        self.predict(data)

        lc1 = mc.LineCollection(self.lines1, color="r")
        lc2 = mc.LineCollection(self.lines2, color="b")
        fig, ax = plt.subplots()
        ax.add_collection(lc1)
        ax.add_collection(lc2)
        ax.autoscale()
        ax.margins(0.1)
        tools.plot_data(self.data, self.label)
        tools.plot_data(data, self.predict(data), dec=2)
        plt.show()
Ejemplo n.º 3
0
    def begin_DIC(self):
        '''
        Begins DIC analysis, using the selected parameters.
        '''
        # Test initial guess:
        reply = self.test_init()
        if reply == QtGui.QMessageBox.No:
            return
        # If initial guess test is positive, begin analysis:
        self.progressBar.show()
        # Begin timing:
        beginning_time = time.time()

        if self.mode == 'integer':
            result = tools.get_integer_translation(
                self.mraw_path,
                self.roi_reference,
                self.roi_size, (self.N_images, self.h, self.w),
                progressBar=self.progressBar,
                n_im=10)
            inc = result[-1]  # Image sequence selection increment.
            n_iters = [1]
            errors = dict()
            # Unit calibration:
            if self.mm_px != 1:
                result[0][:, 0] = result[0][:, 0] * self.mm_px
                result[0][:, 1] = result[0][:, 1] * self.mm_px

        elif self.mode == 'translation':
            if self.debug:
                print('Model: SIMPLE TRANSLATION')
            #try:
            result = tools.get_simple_translation(
                self.mraw_path,
                self.roi_reference,
                self.roi_size, (self.N_images, self.h, self.w),
                progressBar=self.progressBar,
                increment=self.sequence_increment)
            #except:
            #print('An error occurred. Try using a different method.')

            inc = result[-1]
            n_iters = [1]
            errors = dict()
            # Unit calibration:
            if self.mm_px != 1:
                result[0][:, 0] = result[0][:, 0] * self.mm_px
                result[0][:, 1] = result[0][:, 1] * self.mm_px

        elif self.mode == 'rigid':
            if self.debug:
                print('Model: RIGID')
                print('Interpolating (cropped?) ROI ({:d} px border).'.format(
                    self.crop_px))
            try:
                result = tools.get_rigid_movement(
                    self.mraw_path,
                    self.roi_reference,
                    self.roi_size, (self.N_images, self.h, self.w),
                    progressBar=self.progressBar,
                    tol=self.conv_tol,
                    maxiter=self.max_iter,
                    int_order=self.int_order,
                    increment=self.sequence_increment,
                    crop=self.crop_px)
            except ValueError:
                if self.debug:
                    print(
                        'An error occurred attemptiong to use cropped ROI, continuing without cropping.'
                    )
                result = tools.get_rigid_movement(
                    self.mraw_path,
                    self.roi_reference,
                    self.roi_size, (self.N_images, self.h, self.w),
                    progressBar=self.progressBar,
                    tol=self.conv_tol,
                    maxiter=self.max_iter,
                    int_order=self.int_order,
                    increment=self.sequence_increment,
                    crop=False)

            n_iters = result[-2]
            inc = result[-1]
            errors = result[1]

            # Unit calibration:
            if self.mm_px != 1:
                result[0][:, 0] = result[0][:, 0] * self.mm_px
                result[0][:, 1] = result[0][:, 1] * self.mm_px

        elif self.mode == 'deformations':
            if self.debug:
                print('Model: DEFORMABLE')
                print('Interpolating (cropped?) ROI ({:d} px border).'.format(
                    self.crop_px))
            try:
                result = tools.get_affine_deformations(
                    self.mraw_path,
                    self.roi_reference,
                    self.roi_size, (self.N_images, self.h, self.w),
                    progressBar=self.progressBar,
                    tol=self.conv_tol,
                    maxiter=self.max_iter,
                    int_order=self.int_order,
                    increment=self.sequence_increment,
                    crop=self.crop_px)
            except ValueError:
                if self.debug:
                    print(
                        'An error occurred attemptiong to use cropped ROI, continuing without cropping.'
                    )
                result = tools.get_affine_deformations(
                    self.mraw_path,
                    self.roi_reference,
                    self.roi_size, (self.N_images, self.h, self.w),
                    progressBar=self.progressBar,
                    tol=self.conv_tol,
                    maxiter=self.max_iter,
                    int_order=self.int_order,
                    increment=self.sequence_increment,
                    crop=False)
            n_iters = result[-2]
            inc = result[-1]
            errors = result[1]

            # Unit calibration:
            if self.mm_px != 1:
                result[0][:, 2] = result[0][:, 2] * self.mm_px
                result[0][:, 5] = result[0][:, 5] * self.mm_px

        # Hide ROI center marker
        self.imw.CHroi.hide()
        self.ax.hide()
        self.xlabel.hide()
        self.ay.hide()
        self.ylabel.hide()

        # If maximum number of iterations was reached:
        if n_iters[-1] == 0:
            if self.debug:
                print(
                    '\nMaximum iterations reached. Iteration numbers by image:\n{:}\n'
                    .format(
                        n_iters))  # Print optimization loop iteration numbers.

            niter_warning = QtGui.QMessageBox.warning(
                self, 'Waring!',
                'Maximum iterations reached in the optimization process ' +
                '(image {:}).\n(Iterations: mean: {:0.3f}, std: {:0.3f})\n'.
                format(n_iters[-2] + 1, np.mean(n_iters[:-2]),
                       np.std(n_iters[:-2])) +
                'If this occurred early in the analyis process, the selected '
                + 'region of interest might be inappropriate.\n' +
                'Try moving the ROI or increasing its size.\n\n' +
                'Do yo wish to prooceed to analysis resuts anyway?',
                QtGui.QMessageBox.Yes | QtGui.QMessageBox.No,
                QtGui.QMessageBox.No)
            if niter_warning == QtGui.QMessageBox.No:
                self.to_beginning()
                return

        # If warnings errors were raised:
        if len(errors.keys()) != 0:
            if self.debug:
                print(
                    '\nErrors ({:d}) occurred during analysis. See log for more info.'
                    .format(len(errors.keys())))
                matrices = [{
                    key: item['warp_matrix']
                } for key, item in errors.items()]
                pickle.dump(matrices,
                            open(self.save_path + '/warp_matrices.pkl', 'wb'))
            error_warning = QtGui.QMessageBox.warning(
                self, 'Waring!', 'Errors occurred during the analysis ' +
                '(first at image {:d}).\n(Total: {:d} errors\n'.format(
                    min(errors.keys()), len(errors.keys())) +
                'Iterations: mean: {:0.3f}, std: {:0.3f})\n'.format(
                    n_iters[-2] + 1, np.mean(n_iters[:-2]), np.std(
                        n_iters[:-2])) +
                'If this occurred early in the analyis process, the selected '
                + 'region of interest might be inappropriate.\n' +
                'Try moving the ROI or increasing its size.\n\n' +
                'Do yo wish to prooceed to analysis resuts anyway?',
                QtGui.QMessageBox.Yes | QtGui.QMessageBox.No,
                QtGui.QMessageBox.No)
            if error_warning == QtGui.QMessageBox.No:
                self.to_beginning()
                return

        time_taken = time.time() - beginning_time
        self.kin = result[0]
        self.t = np.reshape(
            np.arange(len(self.kin)) * (inc / self.fps), (len(self.kin), 1))

        # Save the results:
        tkin_data = np.hstack((self.t, self.kin))
        timestamp = datetime.datetime.now().strftime('%d-%m-%H-%M-%S')
        print(self.timestampCheckbox.checkState())
        if self.timestampCheckbox.checkState():
            stamp = timestamp
        else:
            stamp = ''
        self.save_csv(data=tkin_data, stamp=stamp)
        self.pickledump(data=tkin_data, stamp=stamp)

        # Show black image - to close loaded memmap:
        self.imw.setImage(np.zeros((100, 100)))

        # End-of-analysis pop-up message:
        end_reply = QtGui.QMessageBox.question(
            self, 'Analysis eneded!',
            '{:} images proessed (in {:0.1f} s).\n'.format(
                len(self.kin), time_taken) +
            'Results saved to:\n{} ({}).\n\n'.format(
                self.save_path.replace('\\', '/'), timestamp) +
            'Do yo wish to prooceed to analysis resuts?',
            QtGui.QMessageBox.Yes | QtGui.QMessageBox.No, QtGui.QMessageBox.No)

        # Result visualization:
        if end_reply == QtGui.QMessageBox.Yes:
            tools.plot_data(tkin_data, self.unit)

        self.to_beginning()

        # Delete temporary file:
        head, tail = os.path.split(self.mraw_path)
        if self.image_type in ['tif', 'tiff'] and tail == '_images.npy':
            delete_temp_reply = QtGui.QMessageBox.question(
                self, 'Delete temporary files',
                'A temporary file has been created from .tif images ' +
                '({:s}). Do you wish to remove it? '.format(self.mraw_path) +
                '(Select "No", if you plan to analyse the same image sequence again.)',
                QtGui.QMessageBox.Yes | QtGui.QMessageBox.No,
                QtGui.QMessageBox.Yes)

            if delete_temp_reply == QtGui.QMessageBox.Yes:
                if self.debug:
                    print('Deleting temporary .npy file.')
                os.remove(self.mraw_path)
Ejemplo n.º 4
0
def train(net,
          data,
          optimizer,
          model_path,
          plot_dir,
          batch_size,
          epochs,
          cuda=False,
          grad_clip=None,
          target_net=None,
          env=None,
          low=0,
          high=0.05,
          target_test_episodes=1):
    """
    Train the QBN

    :param net: given network
    :param data: given data to train the network on
    :param optimizer: optimizer method(Adam is preferred)
    :param model_path: path to where save the model
    :param plot_dir: path to where save the plots
    :param batch_size: batch size
    :param epochs: number of training epochs
    :param cuda: check if cuda is available
    :param grad_clip: max norm of the gradients
    :param target_net:
    :param env: environment
    :param low: lower bound of noise data
    :param high: upper bound of noise data
    :param target_test_episodes: number of episodes to test on
    :return: returns the trained model
    """
    mse_loss = nn.MSELoss().cuda() if cuda else nn.MSELoss()
    train_data, test_data = data

    min_loss_i, best_perf_i = None, None
    batch_loss_data, epoch_losses, test_losses, test_perf_data = [], [], [], []
    total_batches = math.ceil(len(train_data) / batch_size)

    for epoch in range(epochs):
        net.train()
        batch_losses = []
        random.shuffle(train_data)
        for b_i in range(total_batches):
            batch_input = train_data[(b_i * batch_size):(b_i * batch_size) +
                                     batch_size]
            batch_target = Variable(torch.FloatTensor(batch_input))
            batch_input = torch.FloatTensor(batch_input)
            batch_input = Variable(batch_input, requires_grad=True)

            if cuda:
                batch_input, batch_target = batch_input.cuda(
                ), batch_target.cuda()
            batch_output, _ = net(batch_input)

            optimizer.zero_grad()
            loss = mse_loss(batch_output, batch_target)
            loss.backward()
            batch_losses.append(loss.item())
            if grad_clip is not None:
                torch.nn.utils.clip_grad_norm_(net.parameters(), grad_clip)
            optimizer.step()

            logger.info('epoch: %d batch: %d loss: %f' %
                        (epoch, b_i, loss.item()))

        batch_loss_data += batch_losses
        epoch_losses.append(round(np.average(batch_losses), 5))
        test_losses.append(
            round(test(net, test_data, len(test_data), cuda=cuda), 5))
        test_perf = test_with_env(target_net(net),
                                  env,
                                  target_test_episodes,
                                  cuda=cuda)
        test_perf_data.append(test_perf)

        if (best_perf_i is
                None) or (test_perf_data[best_perf_i] <= test_perf_data[-1]
                          ) or test_perf_data[-1] == env.spec.reward_threshold:
            torch.save(net.state_dict(), model_path)
            logger.info('Bottle Net Model Saved!')
        if (best_perf_i is None) or (test_perf_data[best_perf_i] <
                                     test_perf_data[-1]):
            best_perf_i = len(test_perf_data) - 1
            logger.info('Best Perf i updated')
        if (min_loss_i is None) or (test_losses[min_loss_i] > test_losses[-1]):
            min_loss_i = len(test_losses) - 1
            logger.info('min_loss_i updated')

        plot_data(
            verbose_data_dict(test_losses, epoch_losses, batch_loss_data,
                              test_perf_data), plot_dir)
        logger.info('epoch: %d test loss: %f best perf i: %d min loss i: %d' %
                    (epoch, test_losses[-1], best_perf_i, min_loss_i))

        if np.isnan(batch_losses[-1]):
            logger.info('Batch Loss: Nan')
            break
        if ((len(test_losses) - 1 - min_loss_i) > 50) or (test_losses[-1]
                                                          == 0):
            logger.info('Test Loss hasn\'t improved in last 50 epochs'
                        if test_losses[-1] != 0 else 'Zero Test Loss!!')
            logger.info('Stopping!')
            break

    net.load_state_dict(torch.load(model_path))
    return net
Ejemplo n.º 5
0
    tmp = list(zip(*[get_usps(i, datax, datay) for i in l]))
    tmpx, tmpy = np.vstack(tmp[0]), np.hstack(tmp[1])
    idx = np.random.permutation(range(len(tmpy)))
    return tmpx[idx, :], tmpy[idx]


def show_usps(data):
    plt.imshow(data.reshape((16, 16)), interpolation="nearest", cmap="gray")


### Donnees artificielles
plt.ion()
xgentrain, ygentrain = gen_arti(data_type=0, sigma=0.5, nbex=1000, epsilon=0.1)
xgentest, ygentest = gen_arti(data_type=0, sigma=0.5, nbex=1000, epsilon=0.1)
plt.figure()
plot_data(xgentrain, ygentrain)

### Donnees reelles
plt.figure()
xuspstrain, yuspstrain = load_usps("USPS/USPS_train.txt")
xuspstest, yuspstest = load_usps("USPS/USPS_test.txt")
x06train, y06train = get_usps([0, 6], xuspstrain, yuspstrain)
x06test, y06test = get_usps([0, 6], xuspstest, yuspstest)
show_usps(x06train[0])


def f(X):
    return np.linalg.norm(X, axis=1)


#### Pour la visualisation des couts
Ejemplo n.º 6
0
Archivo: ac.py Proyecto: koulanurag/pfa
    def train(self, net, env_fn, net_path, plots_dir, args):
        optimizer = Adam(net.parameters(), lr=args.lr)
        mse_loss = nn.MSELoss.cuda() if args.cuda else nn.MSELoss()

        test_perf_data = []
        train_perf_data = []
        best = None
        # n_trajectory_loss = []
        n_trajectory_info = []
        for episode in range(1, args.train_episodes + 1):
            net.train()
            env = env_fn()

            # Gather data for a single episode
            done = False
            total_reward = 0
            log_probs = []
            ep_rewards = []
            critic_info = []
            ep_obs = []
            obs = env.reset()
            while not done:
                ep_obs.append(obs)
                obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0)
                action_probs, critic = net(obs)
                m = Categorical(action_probs)
                action = m.sample()
                log_probs.append(m.log_prob(Variable(action.data)))

                action = int(action.data[0])
                obs, reward, done, info = env.step(action)
                ep_rewards.append(reward)
                critic_info.append(critic)
                total_reward += reward
            train_perf_data.append(total_reward)
            n_trajectory_info.append(
                (ep_obs, ep_rewards, critic_info, log_probs))

            # Update the network after collecting n trajectories
            if episode % args.batch_size == 0:

                optimizer.zero_grad()
                critic_loss = 0
                for trajectory_info in n_trajectory_info:
                    obs, _rewards, _critic_info, _log_probs = trajectory_info
                    for i, r in enumerate(_rewards):
                        critic = _critic_info[i]

                        if i != len(_rewards) - 1:
                            target_critic = r + Variable(
                                _critic_info[i + 1].data)
                        else:
                            target_critic = Variable(torch.Tensor([[r]]))
                        critic_loss += mse_loss(critic, target_critic)
                critic_loss = critic_loss / args.batch_size
                critic_loss.backward(retain_graph=True)
                optimizer.step()

                optimizer.zero_grad()
                actor_loss = 0
                for trajectory_info in n_trajectory_info:
                    obs, _rewards, _critic_info, _log_probs = trajectory_info
                    for i, r in enumerate(_rewards):
                        _, v_state = net(
                            Variable(torch.FloatTensor(
                                obs[i].tolist())).unsqueeze(0))
                        v_state = Variable(v_state.data)
                        if i != len(_rewards) - 1:
                            _, v_next_state = net(
                                Variable(torch.FloatTensor(
                                    obs[i + 1].tolist())).unsqueeze(0))
                            v_next_state = Variable(v_next_state.data)
                        else:
                            v_next_state = 0

                        advantage = r + v_next_state - v_state
                        actor_loss -= _log_probs[i] * advantage

                actor_loss = actor_loss / args.batch_size
                actor_loss.backward()
                optimizer.step()

                n_trajectory_info = []
            print('Train=> Episode:{} Reward:{} Length:{}'.format(
                episode, total_reward, len(ep_rewards)))

            # test and log
            if episode % 20 == 0:
                test_reward = self.test(net, env_fn, 10, log=True)
                test_perf_data.append(test_reward)
                print('Test Performance:', test_reward)
                if best is None or best <= test_reward:
                    torch.save(net.state_dict(), net_path)
                    best = test_reward
                    print('Model Saved!')
                if best == env.reward_threshold:
                    print('Optimal Performance achieved!!')
                    break
            if episode % 10 == 0:
                plot_data(
                    self.__get_plot_data_dict(train_perf_data, test_perf_data),
                    plots_dir)

        return net
Ejemplo n.º 7
0
import tools


def prixMaison(taille):
    return taille * (10**4)


data = ([20, 2], [40, 4], [80, 8], [30, 2.5], [70, 5], [80, 6])
#data += ([150, 6.5], [200, 11], [90, 7.5])
a = (4 - 2) / (40 - 20)
# y=ax+b
b = 2 - a * 20
print('Une maison de 30m² coute ' + str(prixMaison(30)))
print('Une maison de 80m² coute ' + str(prixMaison(80)))
print('Une maison de 90m² coute ' + str(prixMaison(90)))
print(tools.LSE(data, [a, b]))
meilleur = tools.LSE(data, [a, b])
for i in range(-10000, 10000, 1):
    test = a + (i / 10000)
    if tools.LSE(data, [test, b]) < meilleur:
        meilleur = tools.LSE(data, [test, b])
        aOpti = test

print(aOpti)
print(tools.LSE(data, [aOpti, b]))
print(tools.reg_lin(data))
tools.plot_data(data, aOpti, b)
Ejemplo n.º 8
0
    def train(self, net, env_fn, net_path, plots_dir, args):
        optimizer = Adam(net.parameters(), lr=args.lr)
        mse_loss = nn.MSELoss().cuda() if args.cuda else nn.MSELoss()

        test_perf_data = []
        train_perf_data = []
        best = None
        # n_trajectory_loss = []
        n_trajectory_info = []
        for episode in range(1, args.train_episodes + 1):
            net.train()
            env = env_fn()

            # Gather data for a single episode
            done = False
            total_reward = 0
            log_probs = []
            ep_rewards = []
            critic_info = []
            ep_obs = []
            obs = env.reset()
            entropies = []
            while not done:
                ep_obs.append(obs)
                obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0)
                if args.cuda:
                    obs = obs.cuda()
                logit, critic = net(obs)

                action_probs = F.softmax(logit, dim=1)
                action_log_prob = F.log_softmax(logit, dim=1)
                entropy = -(action_log_prob * action_probs).sum(1)
                entropies.append(entropy)

                m = Categorical(action_probs)
                action = m.sample()
                log_probs.append(m.log_prob(Variable(action.data)))
                action = int(action.data[0])

                obs, reward, done, info = env.step(action)
                ep_rewards.append(reward)
                critic_info.append(critic)
                total_reward += sum(reward)
            train_perf_data.append(total_reward)
            n_trajectory_info.append(
                (ep_obs, ep_rewards, critic_info, log_probs, entropies))

            # Update the network after collecting n trajectories
            if episode % args.batch_size == 0:
                # critic update
                # TODO: Optimize critic update by calculating MSE once for everything
                optimizer.zero_grad()
                critic_loss = 0
                for trajectory_info in n_trajectory_info:
                    obs, _rewards, _critic_info, _log_probs, _ = trajectory_info
                    for i in range(len(obs)):
                        critic = _critic_info[i]
                        target_critic = []
                        for r_i, r in enumerate(_rewards[i]):
                            if i != len(obs) - 1:
                                target_critic.append(
                                    r + args.gamma *
                                    _critic_info[i +
                                                 1].data.cpu().numpy()[0][r_i])
                            else:
                                target_critic.append(r)
                        target_critic = Variable(
                            torch.FloatTensor(target_critic)).unsqueeze(0)
                        if args.cuda:
                            target_critic = target_critic.cuda()
                        critic_loss += mse_loss(critic, target_critic)
                critic_loss = critic_loss / args.batch_size
                critic_loss.backward(retain_graph=True)
                optimizer.step()

                optimizer.zero_grad()
                actor_loss = 0
                for trajectory_info in n_trajectory_info:
                    obs, _rewards, _critic_info, _log_probs, _entropies = trajectory_info
                    gae = [0 for _ in range(self.reward_types)]
                    for i in range(len(obs)):
                        obs_i = Variable(torch.FloatTensor(
                            obs[i].tolist())).unsqueeze(0)
                        if args.cuda:
                            obs_i = obs_i.cuda()
                        _, v_state = net(obs_i)
                        v_state = v_state.data.cpu().numpy()[0]
                        if i != len(_rewards) - 1:
                            obs_next = Variable(
                                torch.FloatTensor(
                                    obs[i + 1].tolist())).unsqueeze(0)
                            if args.cuda:
                                obs_next = obs_next.cuda()
                            _, v_next_state = net(obs_next)
                            v_next_state = v_next_state.data.cpu().numpy()[0]
                        else:
                            v_next_state = [0 for _ in range(len(_rewards))]

                        advantage = 0
                        for r_i, r in enumerate(_rewards[i]):
                            advantage += r + args.gamma * v_next_state[
                                r_i] - v_state[r_i]
                        actor_loss += -_log_probs[
                            i] * advantage - args.beta * _entropies[i]

                # for trajectory_info in n_trajectory_info:
                #     obs, _rewards, _critic_info, _log_probs, _entropies = trajectory_info
                #     gae = [0 for _ in range(self.reward_types)]
                #     for i in range(len(obs)-1,-1,-1):
                #         obs_i = Variable(torch.FloatTensor(obs[i].tolist())).unsqueeze(0)
                #         if args.cuda:
                #             obs_i = obs_i.cuda()
                #         _, v_state = net(obs_i)
                #         v_state = v_state.data.cpu().numpy()[0]
                #         if i != len(obs) - 1:
                #             obs_next = Variable(torch.FloatTensor(obs[i + 1].tolist())).unsqueeze(0)
                #             if args.cuda:
                #                 obs_next = obs_next.cuda()
                #             _, v_next_state = net(obs_next)
                #             v_next_state = v_next_state.data.cpu().numpy()[0]
                #         else:
                #             v_next_state = [0 for _ in range(len(_rewards))]
                #
                #         # advantage = 0
                #         for r_i, r in enumerate(_rewards[i]):
                #             delta_t = r + args.gamma * v_next_state[r_i] - v_state[r_i]
                #             gae[r_i] = gae[r_i] * args.gamma * args.tau + delta_t
                #             # advantage += r + args.gamma * v_next_state[r_i] - v_state[r_i]
                #         actor_loss -= _log_probs[i] * sum(gae) - args.beta * _entropies[i]
                # for r_i, r in enumerate(_rewards[i]):
                #     delta_t = r + args.gamma * v_next_state[r_i] - v_state[r_i]
                #     gae[r_i] += (args.gamma *  args.tau)
                # actor_loss -= _log_probs[i] * sum(gae) - args.beta * _entropies[i]

                actor_loss = actor_loss / args.batch_size
                actor_loss.backward()
                optimizer.step()

                n_trajectory_info = []
            print('Train=> Episode:{} Reward:{} Length:{}'.format(
                episode, total_reward, len(ep_rewards)))

            # test and log
            if episode % (args.batch_size * 5) == 0:
                test_reward = self.test(net, env_fn, 10, log=True, args=args)
                test_perf_data.append(test_reward)
                print('Test Performance:', test_reward)
                if best is None or best <= test_reward:
                    torch.save(net.state_dict(), net_path)
                    best = test_reward
                    print('Model Saved!')
                if best == env.reward_threshold:
                    print('Optimal Performance achieved!!')
                    break
            if episode % (args.batch_size * 10) == 0:
                plot_data(
                    self.__get_plot_data_dict(train_perf_data, test_perf_data),
                    plots_dir)

        return net
Ejemplo n.º 9
0
import tools

data = ([20, 2], [40, 4], [80, 8])
tools.plot_data(data)

data1 = ([2, 2], [3, 4], [5, 8])
tools.plot_data(data1)

data2 = ([0, 2], [0, 4], [1, 8])
tools.plot_data(data2)

data3 = ([4, 2], [10, 4], [15, 8])
tools.plot_data(data3)
Ejemplo n.º 10
0
    def train(self, net, env_fn, net_path, plots_dir, args):
        optimizer = Adam(net.parameters(), lr=args.lr)

        test_perf_data = []
        test_steps_data = []
        train_perf_data = []
        best = None
        n_trajectory_loss = []
        loss_data = []

        for episode in range(args.train_episodes):
            net.train()
            env = env_fn()

            # Gather data for a single episode
            done = False
            total_reward = 0
            log_probs = []
            ep_rewards = []
            entropies = []
            obs = env.reset()
            while not done:
                obs = Variable(torch.FloatTensor(obs.tolist())).unsqueeze(0)
                action_probs = net(obs)
                m = Categorical(action_probs)
                action = m.sample()
                action_log_prob = m.log_prob(Variable(action.data))
                log_probs.append(action_log_prob)
                entropy = -(action_log_prob * action_probs).sum(1)
                entropies.append(entropy)

                action = int(action.data[0])
                obs, reward, done, info = env.step(action)
                ep_rewards.append(reward)
                total_reward += reward

            train_perf_data.append(total_reward)

            # Estimate the Gradients
            R = 0
            discounted_returns = []
            for r in ep_rewards[::-1]:
                R = r + args.gamma * R
                discounted_returns.insert(0, R)

            discounted_returns = torch.FloatTensor(discounted_returns)
            discounted_returns = (
                discounted_returns - discounted_returns.mean()) / (
                    discounted_returns.std() + np.finfo(np.float32).eps)

            policy_loss = []
            for log_prob, score, entorpy in zip(log_probs, discounted_returns,
                                                entropies):
                policy_loss.append(-(log_prob * score - args.beta * entorpy))
            n_trajectory_loss.append(policy_loss)  # collect n-trajectories

            # Update the network after collecting n trajectories
            if episode % args.batch_size == 0:
                optimizer.zero_grad()
                sample_loss = 0
                for _loss in n_trajectory_loss:
                    sample_loss += torch.cat(_loss).sum()
                sample_loss = sample_loss / args.batch_size
                loss_data.append(sample_loss.data[0])
                sample_loss.backward()
                optimizer.step()
                n_trajectory_loss = []
            print('Train=> Episode:{} Reward:{} Length:{}'.format(
                episode, total_reward, len(ep_rewards)))

            # test and log
            if episode % args.batch_size == 0:
                test_reward, test_steps = self.test(net, env_fn, 10, log=True)
                test_perf_data.append(test_reward)
                test_steps_data.append(test_steps)
                print('Performance (Reward):', test_reward)
                print('Performance (Steps):', test_steps)
                if best is None or best <= test_reward:
                    torch.save(net.state_dict(), net_path)
                    best = test_reward
                    print('Model Saved!')
                if best == env.reward_threshold:
                    print('Optimal Performance achieved!!')
                    break
            if episode % 10 == 0:
                plot_data(
                    self.__get_plot_data_dict(
                        train_perf_data, (test_perf_data, test_steps_data),
                        loss_data), plots_dir)
        return net
Ejemplo n.º 11
0
def train(net,
          env,
          optimizer,
          model_path,
          plot_dir,
          train_data,
          batch_size,
          epochs,
          cuda=False,
          test_episodes=300,
          trunc_k=10):
    """
    Supervised Learning to train the policy. Saves model in the given path.

    :param net: Bottleneck GRU network
    :param env: environment
    :param optimizer: optimizer method(Adam is preferred)
    :param model_path: path to where save the model
    :param plot_dir: path to where save the plots
    :param train_data: given training data
    :param batch_size: batch size
    :param epochs: number of training epochs
    :param cuda: check if cuda is available
    :param test_episodes: number of episodes to check
    :return: returns the trained model
    """
    batch_seeds = list(train_data.keys())
    test_seeds = [
        random.randint(1000000, 10000000) for _ in range(test_episodes)
    ]

    best_i = None
    batch_loss_data = {'actor_mse': [], 'actor_ce': []}
    epoch_losses = {'actor_mse': [], 'actor_ce': []}
    perf_data = []

    logger.info('Padding Sequences ...')
    for batch_i, batch_seed in enumerate(batch_seeds):
        data_obs, data_actions, data_action_probs, data_len = train_data[
            batch_seed]
        _max, _min = max(data_len), min(data_len)
        obs_shape = data_obs[0][0].shape
        act_shape = np.array(data_actions[0][0]).shape
        act_prob_shape = np.array(data_action_probs[0][0]).shape
        if _max != _min:
            for i in range(len(data_obs)):
                data_obs[i] += [np.zeros(obs_shape)] * (_max - data_len[i])
                data_actions[i] += [np.zeros(act_shape)] * (_max - data_len[i])
                data_action_probs[i] += [np.zeros(act_prob_shape)
                                         ] * (_max - data_len[i])

    for epoch in range(epochs):
        # Testing before training as sometimes the combined model doesn't needs to be trained
        test_perf = test(net,
                         env,
                         test_episodes,
                         test_seeds=test_seeds,
                         cuda=cuda,
                         log=False,
                         render=True)
        perf_data.append(test_perf)
        logger.info('epoch %d Test Performance: %f' % (epoch, test_perf))
        if best_i is None or perf_data[best_i] <= perf_data[-1]:
            torch.save(net.state_dict(), model_path)
            logger.info('Binary GRU Model Saved!')
            best_i = len(perf_data) - 1 if best_i is None or perf_data[
                best_i] < perf_data[-1] else best_i

        _reward_threshold_check = perf_data[-1] >= env.spec.reward_threshold
        _epoch_loss_check = (len(epoch_losses['actor_mse']) >
                             0) and (epoch_losses['actor_mse'][-1] == 0)

        if _reward_threshold_check or _epoch_loss_check:
            logger.info('Optimal Performance achieved!!!')
            logger.info('Exiting!')
            break

        net.train()
        batch_losses = {'actor_mse': [], 'actor_ce': []}
        random.shuffle(batch_seeds)
        for batch_i, batch_seed in enumerate(batch_seeds):
            net, actor_mse_loss, actor_ce_loss = _train(net,
                                                        optimizer,
                                                        train_data[batch_seed],
                                                        batch_size,
                                                        cuda=cuda,
                                                        trunc_k=trunc_k)
            batch_losses['actor_mse'].append(actor_mse_loss)
            batch_losses['actor_ce'].append(actor_ce_loss)
            logger.info(
                'epoch: {} batch: {} actor mse loss: {} actor ce loss: {}'.
                format(epoch, batch_i, actor_mse_loss, actor_ce_loss))
        batch_loss_data['actor_mse'] += batch_losses['actor_mse']
        batch_loss_data['actor_ce'] += batch_losses['actor_ce']
        epoch_losses['actor_mse'].append(np.average(batch_losses['actor_mse']))
        epoch_losses['actor_ce'].append(np.average(batch_losses['actor_ce']))
        plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data),
                  plot_dir)

        if np.isnan(batch_loss_data['actor_mse'][-1]) or np.isnan(
                batch_loss_data['actor_ce'][-1]):
            logger.info('Actor Loss: Nan')
            break
        if (len(perf_data) - 1 - best_i) > 50:
            logger.info('Early Stopping!')
            break

    plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data),
              plot_dir)
    net.load_state_dict(torch.load(model_path))
    return net
Ejemplo n.º 12
0
def train(net,
          env,
          optimizer,
          model_path,
          plot_dir,
          train_data,
          batch_size,
          epochs,
          cuda=False,
          grad_clip=5,
          trunc_k=10,
          ep_check=True,
          rw_check=True):
    """
    Supervised Learning to train the policy. Saves model in the given path.

    :param net: Bottleneck GRU network
    :param env: environment
    :param optimizer: optimizer method(Adam is preferred)
    :param model_path: path to where save the model
    :param plot_dir: path to where save the plots
    :param train_data: given training data
    :param batch_size: batch size
    :param epochs: number of training epochs
    :param cuda: check if cuda is available
    :param grad_clip: max norm of the gradients
    :param ep_check: check number of episodes
    :param rw_check: check reward
    :return: returns the trained model
    """
    batch_seeds = list(train_data.keys())
    test_env = copy.deepcopy(env)
    test_episodes = 300
    test_seeds = [
        random.randint(1000000, 10000000) for _ in range(test_episodes)
    ]

    best_i = None
    batch_loss_data = {'actor': []}
    epoch_losses = {'actor': []}
    perf_data = []

    logger.info('Padding Sequences ...')
    for batch_i, batch_seed in enumerate(batch_seeds):
        data_obs, data_actions, _, data_len = train_data[batch_seed]
        _max, _min = max(data_len), min(data_len)
        _shape = data_obs[0][0].shape
        for i in range(len(data_obs)):
            data_obs[i] += [np.zeros(_shape)] * (_max - data_len[i])
            data_actions[i] += [-1] * (_max - data_len[i])

    for epoch in range(epochs):
        net.train()
        batch_losses = {'actor': []}
        random.shuffle(batch_seeds)
        for batch_i, batch_seed in enumerate(batch_seeds):
            net, actor_loss = _train(net, optimizer, train_data[batch_seed],
                                     batch_size, cuda, grad_clip, trunc_k)
            batch_losses['actor'].append(actor_loss)
            logger.info('epoch: {} batch: {} actor loss: {}'.format(
                epoch, batch_i, actor_loss))

        test_perf = test(net,
                         test_env,
                         test_episodes,
                         test_seeds=test_seeds,
                         cuda=cuda)
        batch_loss_data['actor'] += batch_losses['actor']
        epoch_losses['actor'].append(np.average(batch_losses['actor']))

        perf_data.append(test_perf)
        logger.info('epoch %d Test Performance: %f' % (epoch, test_perf))
        plot_data(verbose_data_dict(perf_data, epoch_losses, batch_loss_data),
                  plot_dir)

        if best_i is None or perf_data[best_i] <= perf_data[-1]:
            torch.save(net.state_dict(), model_path)
            logger.info('GRU Model Saved!')
            best_i = len(perf_data) - 1 if best_i is None or perf_data[
                best_i] < perf_data[-1] else best_i

        if np.isnan(batch_loss_data['actor'][-1]):
            logger.info('Batch Loss : Nan')
            break
        if (len(perf_data) - 1 - best_i) > 100:
            logger.info('Early Stopping!')
            break

        _reward_threshold_check = ((env.spec.reward_threshold is not None) and len(perf_data) > 1) \
                                  and (np.average(perf_data[-10:]) == env.spec.reward_threshold)
        _epoch_loss_check = (len(epoch_losses['actor']) >
                             0) and (epoch_losses['actor'][-1] == 0)

        # We need to ensure complete imitation rather than just performance . Many a times, optimal
        # performance could be achieved without complete imitation of the actor
        if _epoch_loss_check and ep_check:
            logger.info('Complete Imitation of the Agent!!!')
            break
        if _reward_threshold_check and rw_check:
            logger.info('Consistent optimal performance achieved!!!')
            break

    net.load_state_dict(torch.load(model_path))
    return net
Ejemplo n.º 13
0
    def train(self, net, env_fn, net_path, plots_dir, args):
        optimizer = Adam(net.parameters(), lr=args.lr)

        test_perf_data = []
        test_steps_data = []
        train_perf_data = []
        loss_data = []
        best = None
        n_trajectory_loss = []
        n_trajectory_type_loss = []
        for episode in range(args.train_episodes):
            episode_start_time = time.time()
            net.train()
            env = env_fn()

            # Gather data for a single episode
            done = False
            total_reward = 0
            log_probs = []
            entropies = []
            reward_type_log_probs = {i: [] for i in range(self.reward_types)}

            ep_decomposed_rewards = []
            obs = env.reset()
            while not done:
                obs = Variable(torch.Tensor(obs.tolist())).unsqueeze(0)
                action_logits, reward_type_action_probs = net(obs)

                action_probs = F.softmax(action_logits)
                action_log_prob = F.log_softmax(action_logits, dim=0)
                entropy = -(action_log_prob * action_probs).sum()
                entropies.append(entropy)

                m = Categorical(action_probs)
                action = m.sample()
                log_probs.append(m.log_prob(Variable(action.data)))

                for reward_type_i in range(self.reward_types):
                    m = Categorical(
                        F.softmax(reward_type_action_probs[reward_type_i]))
                    log_prob = m.log_prob(Variable(action.data))
                    if math.isnan(log_prob.data[0]):
                        print(reward_type_action_probs[reward_type_i])
                        import pdb
                        pdb.set_trace()

                    reward_type_log_probs[reward_type_i].append(log_prob)

                action = int(action.data[0])
                obs, reward, done, info = env.step(action)

                ep_decomposed_rewards.append(reward)
                total_reward += sum(reward)

            train_perf_data.append(total_reward)

            # Estimate the Gradients and update the network
            R_total = 0
            R_decomposed = {i: 0 for i in range(self.reward_types)}
            discounted_total_returns = []
            discounted_decomposed_returns = {
                i: []
                for i in range(self.reward_types)
            }
            for r in ep_decomposed_rewards[::-1]:
                R_total = sum(r) + args.gamma * R_total
                discounted_total_returns.insert(0, R_total)
                for i, r_d in enumerate(r):
                    R_decomposed[i] = r_d + args.gamma * R_decomposed[i]
                    discounted_decomposed_returns[i].insert(0, R_decomposed[i])

            discounted_total_returns = torch.FloatTensor(
                discounted_total_returns)
            discounted_total_returns = (
                discounted_total_returns - discounted_total_returns.mean()) / (
                    discounted_total_returns.std() + np.finfo(np.float32).eps)

            for i in discounted_decomposed_returns:
                discounted_decomposed_returns[i] = torch.FloatTensor(
                    discounted_decomposed_returns[i])
                discounted_decomposed_returns[i] = (
                    discounted_decomposed_returns[i] -
                    discounted_decomposed_returns[i].mean()) / (
                        discounted_decomposed_returns[i].std() +
                        np.finfo(np.float32).eps)

            policy_loss = []
            policy_type_losses = {i: [] for i in range(self.reward_types)}
            for log_prob, score, entorpy in zip(log_probs,
                                                discounted_total_returns,
                                                entropies):
                loss = -log_prob * score - args.beta * entorpy
                policy_loss.append(loss)

            for type_i in range(self.reward_types):
                for log_prob, score in zip(
                        reward_type_log_probs[type_i],
                        discounted_decomposed_returns[type_i]):
                    policy_type_losses[type_i].append(-log_prob * score)

            n_trajectory_loss.append(policy_loss)
            n_trajectory_type_loss.append(policy_type_losses)

            if episode % args.batch_size == 0:
                start_time = time.time()
                optimizer.zero_grad()
                sample_loss = 0

                for _loss in n_trajectory_loss:
                    sample_loss += torch.cat(_loss).sum()

                for _loss in n_trajectory_type_loss:
                    for type_i in range(self.reward_types):
                        sample_loss += torch.cat(_loss[type_i]).sum()

                end_time = time.time()
                print("Loss Time", end_time - start_time)

                sample_loss = sample_loss / args.batch_size
                loss_data.append(sample_loss.data[0])
                start_time = time.time()
                sample_loss.backward()
                optimizer.step()
                end_time = time.time()
                n_trajectory_loss = []
                n_trajectory_type_loss = []

                print("Update Network Time", end_time - start_time)

            episode_end_time = time.time()
            print('Episode:{} Reward:{} Length:{} Time:{}'.format(
                episode, total_reward, len(ep_decomposed_rewards),
                episode_end_time - episode_start_time))

            # test and log
            if episode % 10 == 0:
                test_reward, test_steps = self.test(net,
                                                    env_fn,
                                                    10,
                                                    log=True,
                                                    render=False)
                test_perf_data.append(test_reward)
                test_steps_data.append(test_steps)
                print('Performance (Reward):', test_reward)
                print('Performance (Steps):', test_steps)
                if best is None or best <= test_reward:
                    torch.save(net.state_dict(), net_path)
                    best = test_reward
                    print('Model Saved!')
            if episode % 10 == 0:
                plot_data(
                    self.__get_plot_data_dict(
                        train_perf_data, (test_perf_data, test_steps_data),
                        loss_data), plots_dir)

        return net