Ejemplo n.º 1
0
def fun_with_precalculated_sift_reduce_rot(x0, numPlane, img1, xys1, normals1,
                                           img2, xys2, normals2, init_R,
                                           weight):
    assert numPlane == len(xys1)
    assert numPlane == len(xys2)
    assert numPlane == len(normals1)
    assert numPlane == len(normals2)
    R = vec6dToSo3(x0[:6])
    T = np.array(x0[6:9])
    offsets1 = x0[9:9 + numPlane].reshape(-1, 1)
    offsets2 = x0[9 + numPlane:9 + 2 * numPlane].reshape(-1, 1)
    planes1_suncg = offsets1 * normals1
    planes2_suncg = offsets2 * normals2
    planes1_habitat = (planes1_suncg * np.array([1.0, -1.0, -1.0])).T
    planes2_habitat = (planes2_suncg * np.array([1.0, -1.0, -1.0])).T

    err_plane = huber(
        weight["huber_delta"],
        np.linalg.norm(project(R, T, planes1_habitat) - planes2_habitat,
                       axis=0),
    ).sum()
    err_pixel = get_pixel_error_precalculated_sift(img1, xys1, planes1_suncg,
                                                   img2, xys2, planes2_suncg,
                                                   R, T)
    change_R = angle_error_mat(R, init_R)
    err = err_plane + err_pixel + change_R * weight["lambda_R"]
    # err = huber(0.01, np.linalg.norm(project(R, T, x1) - x2, axis=0)).sum() + lambda_R * huber(1, change_R) #+ huber(1, change_T)
    return err
Ejemplo n.º 2
0
    def score(self, X_val, y_val, sample_weights='None'):
        '''
        This returns the huber loss of our validation
        '''
        if isinstance(sample_weights, str):
            sample_weights = np.ones(y_val.shape[0])
            sample_weights *= (1.0 / sample_weights.shape[0])

        res = self.predict(X_val) - y_val
        return np.dot(sample_weights, huber(self.eps, res) * 2.0 * self.eps)
Ejemplo n.º 3
0
    def _print_metric(self, progress, loss_trace, targ_trace):

        sigdig = 3
        Y_flat = [y for batch in self._Y for y in batch]
        if self._continuous:
            resid_mean = np.mean(loss_trace)

            if self._regression_type == "linear":
                targ_var = np.mean(
                    np.square(np.array(targ_trace) - np.mean(Y_flat)))
                r2 = 1. - (resid_mean / targ_var)
                print(progress + "%" + '\t\t residual variance:\t',
                      np.round(resid_mean, sigdig),
                      '\n', ' \t\t total variance:\t',
                      np.round(targ_var, sigdig), '\n', ' \t\t r-squared:\t\t',
                      np.round(r2, sigdig), '\n')

            elif self._regression_type == "robust":
                ae = np.abs(targ_trace - np.median(Y_flat))
                mae = np.mean(ae)
                pmae = 1. - (resid_mean / mae)

                print(progress + "%" + '\t\t residual absolute error:\t',
                      np.round(resid_mean, sigdig),
                      '\n', ' \t\t total absolute error:\t\t',
                      np.round(mae, sigdig), '\n',
                      ' \t\t proportion absolute error:\t',
                      np.round(pmae, sigdig), '\n')

            elif self._regression_type == "robust_smooth":
                ae = huber(1., targ_trace - np.median(Y_flat))
                mae = np.mean(ae)
                pmae = 1. - (resid_mean / mae)

                print(progress + "%" + '\t\t residual absolute error:\t',
                      np.round(resid_mean, sigdig),
                      '\n', ' \t\t total absolute error:\t\t',
                      np.round(mae, sigdig), '\n',
                      ' \t\t proportion absolute error:\t',
                      np.round(pmae, sigdig), '\n')

        else:
            model_mean_neglogprob = np.mean(loss_trace)
            targ_mean_neglogprob = -np.mean(
                [self._Y_logprob[x] for x in targ_trace])
            pnlp = 1. - (model_mean_neglogprob / targ_mean_neglogprob)

            print(progress + "%" + '\t\t residual mean cross entropy:\t',
                  np.round(model_mean_neglogprob,
                           sigdig), '\n', ' \t\t total mean cross entropy:\t',
                  np.round(targ_mean_neglogprob, sigdig), '\n',
                  ' \t\t proportion entropy explained:\t',
                  np.round(pnlp, sigdig), '\n')
Ejemplo n.º 4
0
    def _print_metric(self, progress, loss_trace, targ_trace):

        sigdig = 3
        Y_flat = [y for batch in self._Y for y in batch]
        if self._continuous:
            resid_mean = np.mean(loss_trace)

            if self._regression_type == "linear":
                targ_var = np.mean(np.square(np.array(targ_trace) - np.mean(Y_flat)))
                r2 = 1. - (resid_mean / targ_var)
                print(progress + "%" + '\t\t residual variance:\t', np.round(resid_mean, sigdig), '\n',
                      ' \t\t total variance:\t', np.round(targ_var, sigdig), '\n',
                      ' \t\t r-squared:\t\t', np.round(r2, sigdig), '\n')

            elif self._regression_type == "robust":
                ae = np.abs(targ_trace - np.median(Y_flat))
                mae = np.mean(ae)
                pmae = 1. - (resid_mean / mae)

                print(progress + "%" + '\t\t residual absolute error:\t', np.round(resid_mean, sigdig), '\n',
                      ' \t\t total absolute error:\t\t', np.round(mae, sigdig), '\n',
                      ' \t\t proportion absolute error:\t', np.round(pmae, sigdig), '\n')

            elif self._regression_type == "robust_smooth":
                ae = huber(1., targ_trace - np.median(Y_flat))
                mae = np.mean(ae)
                pmae = 1. - (resid_mean / mae)

                print(progress + "%" + '\t\t residual absolute error:\t', np.round(resid_mean, sigdig), '\n',
                      ' \t\t total absolute error:\t\t', np.round(mae, sigdig), '\n',
                      ' \t\t proportion absolute error:\t', np.round(pmae, sigdig), '\n')

        else:
            model_mean_neglogprob = np.mean(loss_trace)
            targ_mean_neglogprob = -np.mean([self._Y_logprob[x] for x in targ_trace])
            pnlp = 1. - (model_mean_neglogprob / targ_mean_neglogprob)

            print(progress + "%" + '\t\t residual mean cross entropy:\t', np.round(model_mean_neglogprob, sigdig), '\n',
                  ' \t\t total mean cross entropy:\t', np.round(targ_mean_neglogprob, sigdig), '\n',
                  ' \t\t proportion entropy explained:\t', np.round(pnlp, sigdig), '\n')
Ejemplo n.º 5
0
def tri_distance(pnts, triangles, all_vols, all_angles, omega=0.2):
    """calculates the area of all the triangles in the Delaunay triangulation."""

    vols = []
    for triangle in triangles:
        points = [pnts[triangle[i]] for i in range(3)]

        vec1 = points[1] - points[0]
        vec2 = points[2] - points[0]
        vec3 = points[2] - points[1]
        angles = np.array(
            [angle(vec1, vec2),
             angle(vec3, -vec1),
             angle(-vec2, -vec3)])
        angle_loss = np.mean(huber(1, (angles - 1.047) / 0.864))
        all_angles.append(angle_loss)

        # Use log because the distribution is right skewed
        normalized_vol = (np.log(volume(vec1, vec2)) + 3.40) / 0.62
        vols.append(omega * normalized_vol**2 + (1.0 - omega) * angle_loss)
        all_vols.append(normalized_vol)

    return np.mean(vols)
Ejemplo n.º 6
0
def huber_norm(delta, x):
    return (huber(delta, x) / delta).sum()
Ejemplo n.º 7
0
 def _huber_loss_function(self, param, y, weights=None, huber_delta=None):
     if weights is None:
         weights = 1.0
     if huber_delta is None:
         huber_delta = 1.0
     return huber(huber_delta, weights * self._errfunc(param, y)).sum()
Ejemplo n.º 8
0
def huberLoss(outputs, labels):
    delta = 1
    r = np.abs(outputs - labels)
    loss = huber(delta, r)
    return loss
Ejemplo n.º 9
0
    def learn(self,
              total_timesteps,
              callback=None,
              seed=None,
              log_interval=100,
              tb_log_name="DQN"):
        with SetVerbosity(self.verbose), TensorboardWriter(
                self.graph, self.tensorboard_log, tb_log_name) as writer:
            self._setup_learn(seed)

            # Create the replay buffer
            if self.prioritized_replay:
                self.replay_buffer = SimplePrioritizedReplayBuffer(
                    self.buffer_size, alpha=self.prioritized_replay_alpha)
                if self.prioritized_replay_beta_iters is None:
                    prioritized_replay_beta_iters = total_timesteps * self.beta_fraction
                    self.beta_schedule = LinearSchedule(
                        prioritized_replay_beta_iters,
                        initial_p=self.prioritized_replay_beta0,
                        final_p=1.0)
            else:
                # self.replay_buffer = ReplayBuffer(self.buffer_size, gamma=self.gamma, hindsight=self.hindsight, multistep=self.multistep)
                self.replay_buffer = EpisodeReplayBuffer(
                    self.buffer_size, hindsight=self.hindsight)
                self.solved_replay_buffer = EpisodeReplayBuffer(
                    self.buffer_size, hindsight=self.hindsight)
                # self.replay_buffer = SimpleReplayBuffer(self.buffer_size)
                self.beta_schedule = None
            # Create the schedule for exploration starting from 1.
            self.exploration = LinearSchedule(
                schedule_timesteps=int(self.exploration_fraction *
                                       total_timesteps),
                initial_p=1.0,
                final_p=self.exploration_final_eps)

            episode_rewards = [0.0]
            episode_trans = []
            episode_replays = []
            episode_success = [0] * log_interval
            episode_finals = [0] * log_interval
            episode_losses = []
            is_in_loop = False
            loss_accumulator = [0.] * 50

            episode_places = set()
            episode_div = [0] * log_interval

            full_obs = self.env.reset()
            part_obs = np.concatenate(
                (full_obs['observation'], full_obs['desired_goal']), axis=-1)
            begin_obs = [full_obs] * log_interval

            reset = True
            self.episode_reward = np.zeros((1, ))

            for step in range(total_timesteps):
                # self.steps_made += 1
                # if step >= 7 * 100 * 150:
                #     raise Exception("trigger")
                # curriculum
                # curriculum_scrambles = 1 + int(self.steps_made ** (0.50)) // 500
                # curriculum_step_limit = min((curriculum_scrambles + 2) * 2, 100)
                # self.replay_buffer.set_sampling_cut(curriculum_step_limit)
                # self.env.scrambleSize = curriculum_scrambles
                # self.env.step_limit = curriculum_step_limit

                # Take action and update exploration to the newest value
                kwargs = {}
                if not self.param_noise:
                    update_eps = self.exploration.value(step)
                    update_param_noise_threshold = 0.
                else:
                    update_eps = 0.
                    # Compute the threshold such that the KL divergence between perturbed and non-perturbed
                    # policy is comparable to eps-greedy exploration with eps = exploration.value(t).
                    # See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017
                    # for detailed explanation.
                    update_param_noise_threshold = \
                        -np.log(1. - self.exploration.value(step) +
                                self.exploration.value(step) / float(self.env.action_space.n))
                    kwargs['reset'] = reset
                    kwargs[
                        'update_param_noise_threshold'] = update_param_noise_threshold
                    kwargs['update_param_noise_scale'] = True
                with self.sess.as_default():
                    # Loop breaking
                    if self.loop_breaking and is_in_loop:
                        # update_eps_value = (update_eps + 1.) / 2.
                        update_eps_value = 1.
                    else:
                        update_eps_value = update_eps
                    if self.boltzmann:
                        values = self.predict_q_values(np.array(part_obs))[0]
                        exp = 1. / update_eps_value
                        action = np.random.choice(
                            np.arange(0, values.shape[0]),
                            p=(exp**values) / sum(exp**values))
                    else:
                        action = self.act(np.array(part_obs)[None],
                                          update_eps=update_eps_value,
                                          **kwargs)[0]
                # action = self.env.action_space.sample()
                env_action = action
                reset = False
                new_obs, rew, done, _ = self.env.step(env_action)

                current_place = None
                is_in_loop = False
                try:
                    current_place = tuple(self.env.room_state.flatten())
                except AttributeError:
                    current_place = tuple(new_obs['observation'].flatten())
                if current_place in episode_places:
                    is_in_loop = True
                episode_places.add(current_place)

                # Store transition in the replay buffer.
                # self.replay_buffer.add(part_obs, action, rew, np.concatenate((new_obs['observation'], new_obs['desired_goal'])), float(done))
                episode_replays.append(
                    (full_obs, action, rew, new_obs, float(done)))
                episode_trans.append((full_obs, action, rew, new_obs))
                full_obs = new_obs
                part_obs = np.concatenate(
                    (full_obs['observation'], full_obs['desired_goal']),
                    axis=-1)

                if writer is not None:
                    ep_rew = np.array([rew]).reshape((1, -1))
                    ep_done = np.array([done]).reshape((1, -1))
                    self.episode_reward = total_episode_reward_logger(
                        self.episode_reward, ep_rew, ep_done, writer, step)

                episode_rewards[-1] += rew
                if done:
                    if np.array_equal(full_obs['achieved_goal'],
                                      full_obs['desired_goal']):
                        episode_success.append(1.)
                        self.solved_episodes.append(episode_replays)
                    else:
                        episode_success.append(0.)
                    episode_success = episode_success[1:]
                    episode_div.append(len(episode_places))
                    episode_div = episode_div[1:]
                    self.episodes_completed += 1
                    if self.model_save_freq > 0 and self.episodes_completed % self.model_save_freq == 0:
                        self.save_model_checkpoint()
                    if self.episodes_completed % (200 * 100) == 0:
                        self.dump_solved_episodes()

                    if not isinstance(self.env, VecEnv):
                        full_obs = self.env.reset()
                        # print(full_obs)
                        part_obs = np.concatenate((full_obs['observation'],
                                                   full_obs['desired_goal']),
                                                  axis=-1)

                    def postprocess_replays(raw_replays, buffer,
                                            prioritized_replay):
                        if not prioritized_replay:
                            buffer.add(raw_replays)
                            return

                        for _ in range(10):
                            for id, (full_obs, action, rew, new_obs,
                                     done) in enumerate(raw_replays):
                                offset = np.random.randint(
                                    id, len(raw_replays))
                                target = raw_replays[offset][3][
                                    'achieved_goal']
                                obs = np.concatenate(
                                    [full_obs['observation'], target], axis=-1)
                                step = np.concatenate(
                                    [new_obs['observation'], target], axis=-1)
                                if np.array_equal(new_obs['achieved_goal'],
                                                  target):
                                    rew = 0.
                                    done = 1.
                                else:
                                    rew = -1.
                                    done = 0.

                                buffer.add(obs, action, rew, step, done)

                    postprocess_replays(episode_replays, self.replay_buffer,
                                        self.prioritized_replay)

                    begin_obs.append(full_obs)
                    begin_obs = begin_obs[1:]

                    if callback is not None:
                        callback(locals(), globals())

                    episode_rewards.append(0.0)
                    episode_trans = []
                    episode_replays = []
                    episode_places = set()
                    episode_losses = []
                    reset = True
                    is_in_loop = False

                if step > self.learning_starts and step % self.train_freq == 0:
                    # Minimize the error in Bellman's equation on a batch sampled from replay buffer.
                    if self.prioritized_replay:
                        experience = self.replay_buffer.sample(
                            self.batch_size,
                            beta=self.beta_schedule.value(step))
                        (obses_t, actions, rewards, obses_tp1, dones, weights,
                         batch_idxes) = experience
                        weights /= np.mean(weights)
                    else:
                        if np.random.randint(0, 100) < 100:  # always
                            obses_t, actions, rewards, obses_tp1, dones, info = self.replay_buffer.sample(
                                self.batch_size)
                        else:
                            obses_t, actions, rewards, obses_tp1, dones, info = self.solved_replay_buffer.sample(
                                self.batch_size)
                        weights, batch_idxes = np.ones_like(rewards), None

                    if writer is not None:
                        # run loss backprop with summary, but once every 100 steps save the metadata
                        # (memory, compute time, ...)
                        if (1 + step) % 100 == 0:
                            run_options = tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE)
                            run_metadata = tf.RunMetadata()
                            summary, td_errors = self._train_step(
                                obses_t,
                                actions,
                                rewards,
                                obses_tp1,
                                obses_tp1,
                                dones,
                                weights,
                                sess=self.sess,
                                options=run_options,
                                run_metadata=run_metadata)
                            writer.add_run_metadata(run_metadata,
                                                    'step%d' % step)
                        else:
                            summary, td_errors = self._train_step(
                                obses_t,
                                actions,
                                rewards,
                                obses_tp1,
                                obses_tp1,
                                dones,
                                weights,
                                sess=self.sess)
                        writer.add_summary(summary, step)
                    else:
                        _, td_errors = self._train_step(obses_t,
                                                        actions,
                                                        rewards,
                                                        obses_tp1,
                                                        obses_tp1,
                                                        dones,
                                                        weights,
                                                        sess=self.sess)

                    if not self.prioritized_replay:
                        for (dist, error) in zip(info, td_errors):
                            if len(loss_accumulator) < dist + 1:
                                loss_accumulator += [0.] * (
                                    dist + 1 - len(loss_accumulator))
                            loss_accumulator[
                                dist] = loss_accumulator[dist] * 0.99 + huber(
                                    1., error)

                        # if step % 1000 == 0:
                        #     print('accumulator', [int(x) for x in loss_accumulator])
                        #     weights_sum = sum(loss_accumulator)
                        #     print('normalized ', ['%.2f' % (x / weights_sum) for x in loss_accumulator])
                        #     print('distance   ', info)

                    loss = np.mean(
                        np.dot(weights,
                               [huber(1., error) for error in td_errors]))
                    episode_losses.append(loss)

                    if self.prioritized_replay:
                        new_priorities = np.abs(
                            td_errors) + self.prioritized_replay_eps
                        self.replay_buffer.update_priorities(
                            batch_idxes, new_priorities)

                if step > self.learning_starts and step % self.target_network_update_freq == 0:
                    # Update target network periodically.
                    self.update_target(sess=self.sess)

                if len(episode_rewards[-(log_interval + 1):-1]) == 0:
                    mean_100ep_reward = -np.inf
                else:
                    mean_100ep_reward = round(
                        float(np.mean(
                            episode_rewards[-(log_interval + 1):-1])), 1)

                num_episodes = len(episode_rewards)
                if self.verbose >= 1 and done and log_interval is not None and len(
                        episode_rewards) % log_interval == 0:
                    logger.record_tabular("steps", step)
                    logger.record_tabular("episodes", num_episodes)
                    logger.record_tabular(
                        "mean {0} episode reward".format(log_interval),
                        mean_100ep_reward)
                    logger.record_tabular(
                        "{0} episode success".format(log_interval),
                        np.mean(episode_success))
                    logger.record_tabular(
                        "% time spent exploring",
                        int(100 * self.exploration.value(step)))
                    logger.dump_tabular()

        return self
Ejemplo n.º 10
0
    def eval_model(self, dataset, vid, pairWise):
        self.nn.model = self.nn.model.eval()

        num_img_pair = dataset.get_num_images(vid)
        quads = []
        iou_list = []
        sobel_x = []
        sobel_y = []
        imgs = []
        in_video = dataset.get_in_video_path(vid)
        out_video = dataset.get_out_video_path(vid)
        info = self.nn.model.params.info
        imgs_out_dir = join(out_video, "img_tcr")        
        model_out_dir = join(out_video, info)
        make_dir(imgs_out_dir)
        make_dir(model_out_dir)
        print("Evaluating dataset for video ", vid)
        data_x, quad_gt = dataset.get_data_point(vid, 0)
        quads.append(data_x[2])
        # data_x[0] = data_x[0][np.newaxis, :, :, :]
        # bbox = data_x[2][np.newaxis, :]
        # self.nn.init(data_x[0], bbox)
        self.nn.cnt = 0
        start_t = time.time()
        loss = 0
        sz_loss = 0
        with torch.no_grad():
            for img_pair in range(num_img_pair):
                # print(img_pair)
                data_x, quad_gt = dataset.get_data_point(vid, img_pair)
                _, quad_pip_gt = dataset.get_train_data_point(vid, img_pair)
                data_x[0] = data_x[0][np.newaxis, :, :, :]
                bbox = data_x[2][np.newaxis, :]
                data_x[1] = data_x[1][np.newaxis, :, :, :]

                if(img_pair == 0 and not pairWise):
                    quad = bbox
                    self.nn.init(data_x[0], quad)
                elif(pairWise):
                    quad = bbox
                    self.nn.init(data_x[0], quad)
                # else:

                # try:
                outputs = self.nn.track(data_x[1])
                # except:
                #     print("Error!!!!!!")
                #     break
                if(len(outputs) == 9):
                    quad_new, sx, sy, img_pip_tcr, sx_ker, \
                        sy_ker, img_pip_i, quad_pip, scale_z = outputs
                    
                    sx_ker = tensor_to_numpy(sx_ker[0])
                    sy_ker = tensor_to_numpy(sy_ker[0])  
                    np.save(join(model_out_dir, str(img_pair) + "-sx.npy"),\
                            sx_ker)
                    np.save(join(model_out_dir, str(img_pair) + "-sy.npy"),\
                            sy_ker)
 
                elif(len(outputs) == 7):
                    quad_new, sx, sy, img_pip_tcr, img_pip_i,\
                        quad_pip, scale_z = outputs
                # print(quad_pip, quad_pip_gt)
                sz_loss +=  huber(100, quad_pip - quad_pip_gt).mean()
                loss += (scale_z[0]) * (scale_z[0]) * huber(100, quad_new - quad_gt).mean()
                # from IPython import embed;embed()
                # print(img_pair, quad.shape, quad_new.shape)

                img_pip_tcr = img_to_numpy(img_pip_tcr[0])
                # img_i = img_to_numpy(img_i[0])

                # cv2.imwrite(join(imgs_out_dir,\
                #     str(img_pair) +"_i.jpeg"), data_x[1][0, :, :, :])
                # np.save(join(imgs_out_dir, str(img_pair) + "-quad-gt.npy"),\
                #         quad_gt)
                # print(img_pip_tcr.shape)
                # cv2.imwrite(join(model_out_dir,\
                #     str(img_pair) +"_pip_tcr.jpeg"), img_pip_tcr)
                # cv2.imwrite(join(model_out_dir,\
                #     str(img_pair) +"_pip_i.jpeg"), img_pip_i)
                # np.save(join(model_out_dir, str(img_pair) + "_quad_pip.npy"),\
                #     quad_pip[-1][0, :])
                # np.save(join(model_out_dir, str(img_pair) + "_quad_pip_id.npy"),\
                #     quad_pip[0][0, :])
                # np.save(join(model_out_dir, str(img_pair) + "_quad_pip_gt.npy"),\
                #     quad_pip_gt)
                
                # np.save(join(model_out_dir, str(img_pair) + "_quad.npy"),\
                #     quad_new[-1][0, :])
                # np.save(join(model_out_dir, str(img_pair) + "_quad_id.npy"),\
                #     quad_new[0][0, :])
                # np.save(join(model_out_dir, str(img_pair) + "_quad_gt.npy"),\
                #     quad_gt)

                # for j in range(len(quad_new)):
                #     resize_path = join(model_out_dir, str(img_pair) + "-resized")
                #     dir_path = join(model_out_dir, str(img_pair))
                #     make_dir(dir_path) 
                #     make_dir(resize_path) 
                #     np.save(join(resize_path, str(j) + "-quad-resized.npy"), quad_uns[j][0, :])
                #     np.save(join(dir_path, str(j) + "-quad.npy"), quad_new[j][0, :])
            
                # sx = img_to_numpy(sx[0])
                # sy = img_to_numpy(sy[0])

                # for i in range(3):
                #     cv2.imwrite(join(model_out_dir,\
                #         str(img_pair) + "-sx-" + str(i) +".jpeg"), sx[:, :, i])
                #     cv2.imwrite(join(model_out_dir,\
                #         str(img_pair) + "-sy-" + str(i) +".jpeg"), sy[:, :, i])


                try:
                    iou = calc_iou(quad_new[0], quad_gt)
                    iou_list.append(iou)
                except Exception as e: 
                    print(e)
                    break
                quads.append(quad_new[0])

                quad = quad_new

        end_t = time.time()
        loss /= num_img_pair
        sz_loss /= num_img_pair
        mean_iou = np.sum(iou_list) / num_img_pair
        write_to_output_file(quads, out_video + "/results.txt")
        
        outputBboxes(in_video +"/", out_video + "/images/", out_video + "/results.txt")
        print("Resized loss = ", sz_loss)
        print("Actual loss = ", loss)
        print("Total time taken = ", end_t - start_t)
        print("Mean IOU = ", mean_iou)

        # plt.plot(iou_list)
        # plt.savefig(out_video + "/iou_plot.png")
        # plt.close()
        return mean_iou
Ejemplo n.º 11
0
def huber_loss(a, b):
    r = a - b
    delta = 1.0
    return np.sum(huber(delta, r), axis=-1)
Ejemplo n.º 12
0
    def learning_step(self, step, replay_buffer, writer, episode_losses):
        if step > self.learning_starts and step % self.train_freq == 0:
            # Minimize the error in Bellman's equation on a batch sampled from replay buffer.
            if self.prioritized_replay:
                experience = replay_buffer.sample(
                    self.batch_size, beta=self.beta_schedule.value(step))
                (obses_t, actions, rewards, obses_tp1, dones, weights,
                 batch_idxes) = experience
            else:
                obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(
                    self.batch_size)
                weights, batch_idxes = np.ones_like(rewards), None

            if writer is not None:
                # run loss backprop with summary, but once every 100 steps save the metadata
                # (memory, compute time, ...)
                if (1 + step) % 100 == 0:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    summary, td_errors = self._train_step(
                        obses_t,
                        actions,
                        rewards,
                        obses_tp1,
                        obses_tp1,
                        dones,
                        weights,
                        sess=self.sess,
                        options=run_options,
                        run_metadata=run_metadata)
                    writer.add_run_metadata(run_metadata, 'step%d' % step)
                else:
                    summary, td_errors = self._train_step(obses_t,
                                                          actions,
                                                          rewards,
                                                          obses_tp1,
                                                          obses_tp1,
                                                          dones,
                                                          weights,
                                                          sess=self.sess)
                writer.add_summary(summary, step)
            else:
                _, td_errors = self._train_step(obses_t,
                                                actions,
                                                rewards,
                                                obses_tp1,
                                                obses_tp1,
                                                dones,
                                                weights,
                                                sess=self.sess)

            loss = np.mean(
                np.dot(weights, [huber(1., error) for error in td_errors]))
            episode_losses.append(loss)

            if self.prioritized_replay:
                new_priorities = np.abs(
                    td_errors) + self.prioritized_replay_eps
                replay_buffer.update_priorities(batch_idxes, new_priorities)

        if step > self.learning_starts and step % self.target_network_update_freq == 0:
            # Update target network periodically.
            self.update_target(sess=self.sess)