def draw_state(self, return_list=False):
        if not constants.DRAWING:
            return
        from utils import drawing
        curr_image = self.game_state.detection_image.copy()
        curr_depth = self.game_state.s_t_depth
        if curr_depth is not None:
            curr_depth = self.game_state.s_t_depth.copy()
            curr_depth[0, 0] = 0
            curr_depth[0, 1] = constants.MAX_DEPTH

        label = np.flipud(self.get_label())
        patch = np.flipud(self.game_state.graph.get_graph_patch(self.pose)[0])
        state_image = self.game_state.draw_state().copy()
        memory_map = np.flipud(self.game_state.graph.memory.copy())
        memory_map = np.concatenate(
            (memory_map[:, :, [0]], np.zeros(
                memory_map[:, :, [0]].shape), memory_map[:, :, 1:]),
            axis=2)

        images = [
            curr_image,
            state_image,
            np.minimum(memory_map[:, :, 0], 200),
            np.argmax(memory_map[:, :, 1:], axis=2),
            label[:, :],
            np.minimum(patch[:, :, 0], 10),
        ]
        if return_list:
            return images
        action_str = 'action: %s possible %.3f' % (
            self.action_util.actions[np.where(
                self.action == 1)[0].squeeze()]['action'], self.is_possible)
        titles = [
            '%07d' % self.num_steps, action_str, 'Occupancy Map',
            'Objects Map', 'Label Patch', 'Learned Patch'
        ]
        image = drawing.subplot(images,
                                4,
                                3,
                                curr_image.shape[1],
                                curr_image.shape[0],
                                titles=titles,
                                border=3)

        return image
Esempio n. 2
0
def run():
    try:
        with tf.variable_scope('nav_global_network'):
            network = FreeSpaceNetwork(constants.GRU_SIZE,
                                       constants.BATCH_SIZE,
                                       constants.NUM_UNROLLS)
            network.create_net()
            training_step = network.training_op

        with tf.variable_scope('loss'):
            loss_summary_op = tf.summary.merge([
                tf.summary.scalar('loss', network.loss),
            ])
        summary_full = tf.summary.merge_all()
        conv_var_list = [
            v for v in tf.trainable_variables()
            if 'conv' in v.name and 'weight' in v.name and
            (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1
             )
        ]
        for var in conv_var_list:
            tf_util.conv_variable_summaries(var,
                                            scope=var.name.replace('/',
                                                                   '_')[:-2])
        summary_with_images = tf.summary.merge_all()

        # prepare session
        sess = tf_util.Session()

        seq_inds = np.zeros((constants.BATCH_SIZE, 2), dtype=np.int32)

        sequence_generators = []
        for thread_index in range(constants.PARALLEL_SIZE):
            gpus = str(constants.GPU_ID).split(',')
            sequence_generator = SequenceGenerator(sess)
            sequence_generators.append(sequence_generator)

        sess.run(tf.global_variables_initializer())

        if not (constants.DEBUG or constants.DRAWING):
            from utils import py_util
            time_str = py_util.get_time_str()
            summary_writer = tf.summary.FileWriter(
                os.path.join(constants.LOG_FILE, time_str), sess.graph)
        else:
            summary_writer = None

        saver = tf.train.Saver(max_to_keep=3)

        # init or load checkpoint
        start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR)

        sess.graph.finalize()

        data_lock = threading.Lock()

        def load_new_data(thread_index):
            global data_buffer
            global data_counts

            sequence_generator = sequence_generators[thread_index]
            counter = 0
            while True:
                while not (len(data_buffer) < constants.REPLAY_BUFFER_SIZE
                           or np.max(data_counts) > 0):
                    time.sleep(1)
                counter += 1
                if constants.DEBUG:
                    print('\nThread %d' % thread_index)
                new_data, bounds, goal_pose = sequence_generator.generate_episode(
                )
                new_data = {
                    key: ([new_data[ii][key] for ii in range(len(new_data))])
                    for key in new_data[0]
                }
                new_data['goal_pose'] = goal_pose
                new_data['memory'] = np.zeros(
                    (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH,
                     constants.MEMORY_SIZE))
                new_data['gru_state'] = np.zeros(constants.GRU_SIZE)
                if constants.DRAWING:
                    new_data['debug_images'] = sequence_generator.debug_images
                data_lock.acquire()
                if len(data_buffer) < constants.REPLAY_BUFFER_SIZE:
                    data_counts[len(data_buffer)] = 0
                    data_buffer.append(new_data)
                    counts = data_counts[:len(data_buffer)]
                    if counter % 10 == 0:
                        print(
                            'Buffer size %d  Num used %d  Max used amount %d' %
                            (len(data_buffer), len(
                                counts[counts > 0]), np.max(counts)))
                else:
                    max_count_ind = np.argmax(data_counts)
                    data_buffer[max_count_ind] = new_data
                    data_counts[max_count_ind] = 0
                    if counter % 10 == 0:
                        print('Num used %d  Max used amount %d' %
                              (len(data_counts[data_counts > 0]),
                               np.max(data_counts)))
                data_lock.release()

        threads = []
        for i in range(constants.PARALLEL_SIZE):
            load_data_thread = threading.Thread(target=load_new_data,
                                                args=(i, ))
            load_data_thread.daemon = True
            load_data_thread.start()
            threads.append(load_data_thread)
            time.sleep(1)

        sequences = [None] * constants.BATCH_SIZE

        curr_it = 0
        dataTimeTotal = 0.00001
        solverTimeTotal = 0.00001
        summaryTimeTotal = 0.00001
        totalTimeTotal = 0.00001

        chosen_inds = set()
        loc_to_chosen_ind = {}
        for iteration in range(start_it, constants.MAX_TIME_STEP):
            if iteration == start_it or iteration % 10 == 1:
                currentTimeStart = time.time()
            tStart = time.time()
            batch_data = []
            batch_action = []
            batch_memory = []
            batch_gru_state = []
            batch_labels = []
            batch_pose = []
            batch_mask = []
            batch_goal_pose = []
            batch_pose_indicator = []
            batch_possible_label = []
            batch_debug_images = []
            for bb in range(constants.BATCH_SIZE):
                if seq_inds[bb, 0] == seq_inds[bb, 1]:
                    # Pick a new random sequence
                    pickable_inds = set(
                        np.where(data_counts < 100)[0]) - chosen_inds
                    count_size = len(pickable_inds)
                    while count_size == 0:
                        pickable_inds = set(
                            np.where(data_counts < 100)[0]) - chosen_inds
                        count_size = len(pickable_inds)
                        time.sleep(1)
                    random_ind = random.sample(pickable_inds, 1)[0]
                    data_lock.acquire()
                    sequences[bb] = data_buffer[random_ind]
                    goal_pose = sequences[bb]['goal_pose']
                    sequences[bb]['memory'] = np.zeros(
                        (constants.SPATIAL_MAP_HEIGHT,
                         constants.SPATIAL_MAP_WIDTH, constants.MEMORY_SIZE))
                    sequences[bb]['gru_state'] = np.zeros(constants.GRU_SIZE)
                    data_counts[random_ind] += 1
                    if bb in loc_to_chosen_ind:
                        chosen_inds.remove(loc_to_chosen_ind[bb])
                    loc_to_chosen_ind[bb] = random_ind
                    chosen_inds.add(random_ind)
                    data_lock.release()
                    seq_inds[bb, 0] = 0
                    seq_inds[bb, 1] = len(sequences[bb]['color'])
                data_len = min(constants.NUM_UNROLLS,
                               seq_inds[bb, 1] - seq_inds[bb, 0])
                ind0 = seq_inds[bb, 0]
                ind1 = seq_inds[bb, 0] + data_len
                data = sequences[bb]['color'][ind0:ind1]
                action = sequences[bb]['action'][ind0:ind1]
                labels = sequences[bb]['label'][ind0:ind1]
                memory = sequences[bb]['memory'].copy()
                gru_state = sequences[bb]['gru_state'].copy()
                pose = sequences[bb]['pose'][ind0:ind1]
                goal_pose = sequences[bb]['goal_pose']
                mask = sequences[bb]['weight'][ind0:ind1]
                pose_indicator = sequences[bb]['pose_indicator'][ind0:ind1]
                possible_label = sequences[bb]['possible_label'][ind0:ind1]
                if constants.DRAWING:
                    batch_debug_images.append(
                        sequences[bb]['debug_images'][ind0:ind1])
                if data_len < (constants.NUM_UNROLLS):
                    seq_inds[bb, :] = 0
                    data.extend([
                        np.zeros_like(data[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    action.extend([
                        np.zeros_like(action[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    labels.extend([
                        np.zeros_like(labels[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    pose.extend([
                        pose[-1]
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    mask.extend([
                        np.zeros_like(mask[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    pose_indicator.extend([
                        np.zeros_like(pose_indicator[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    possible_label.extend([
                        np.zeros_like(possible_label[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                else:
                    seq_inds[bb, 0] += constants.NUM_UNROLLS
                batch_data.append(data)
                batch_action.append(action)
                batch_memory.append(memory)
                batch_gru_state.append(gru_state)
                batch_pose.append(pose)
                batch_goal_pose.append(goal_pose)
                batch_labels.append(labels)
                batch_mask.append(mask)
                batch_pose_indicator.append(pose_indicator)
                batch_possible_label.append(possible_label)

            feed_dict = {
                network.image_placeholder:
                np.ascontiguousarray(batch_data),
                network.action_placeholder:
                np.ascontiguousarray(batch_action),
                network.gru_placeholder:
                np.ascontiguousarray(batch_gru_state),
                network.pose_placeholder:
                np.ascontiguousarray(batch_pose),
                network.goal_pose_placeholder:
                np.ascontiguousarray(batch_goal_pose),
                network.labels_placeholder:
                np.ascontiguousarray(batch_labels)[..., np.newaxis],
                network.mask_placeholder:
                np.ascontiguousarray(batch_mask),
                network.pose_indicator_placeholder:
                np.ascontiguousarray(batch_pose_indicator),
                network.possible_label_placeholder:
                np.ascontiguousarray(batch_possible_label),
                network.memory_placeholders:
                np.ascontiguousarray(batch_memory),
            }
            dataTEnd = time.time()
            summaryTime = 0
            if constants.DEBUG or constants.DRAWING:
                outputs = sess.run([
                    training_step, network.loss, network.gru_state,
                    network.patch_weights_sigm, network.gru_outputs_full,
                    network.is_possible_sigm,
                    network.pose_indicator_placeholder,
                    network.terminal_patches, network.gru_outputs
                ],
                                   feed_dict=feed_dict)
            else:
                if iteration == start_it + 10:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    outputs = sess.run([
                        training_step, network.loss, network.gru_state,
                        summary_with_images, network.gru_outputs
                    ],
                                       feed_dict=feed_dict,
                                       options=run_options,
                                       run_metadata=run_metadata)
                    loss_summary = outputs[3]
                    summary_writer.add_run_metadata(run_metadata,
                                                    'step_%07d' % iteration)
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                elif iteration % 10 == 0:
                    if iteration % 100 == 0:
                        outputs = sess.run([
                            training_step, network.loss, network.gru_state,
                            summary_with_images, network.gru_outputs
                        ],
                                           feed_dict=feed_dict)
                    elif iteration % 10 == 0:
                        outputs = sess.run([
                            training_step, network.loss, network.gru_state,
                            loss_summary_op, network.gru_outputs
                        ],
                                           feed_dict=feed_dict)
                    loss_summary = outputs[3]
                    summaryTStart = time.time()
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                    summaryTime = time.time() - summaryTStart
                else:
                    outputs = sess.run([
                        training_step, network.loss, network.gru_state,
                        network.gru_outputs
                    ],
                                       feed_dict=feed_dict)

            gru_state_out = outputs[2]
            memory_out = outputs[-1]
            for mm in range(constants.BATCH_SIZE):
                sequences[mm]['memory'] = memory_out[mm, ...]
                sequences[mm]['gru_state'] = gru_state_out[mm, ...]

            loss = outputs[1]
            solverTEnd = time.time()

            if constants.DEBUG or constants.DRAWING:
                # Look at outputs
                patch_weights = outputs[3]
                is_possible = outputs[5]
                pose_indicator = outputs[6]
                terminal_patches = outputs[7]
                data_lock.acquire()
                for bb in range(constants.BATCH_SIZE):
                    for tt in range(constants.NUM_UNROLLS):
                        if batch_mask[bb][tt] == 0:
                            break
                        if constants.DRAWING:
                            import cv2
                            import scipy.misc
                            from utils import drawing
                            curr_image = batch_data[bb][tt]
                            label = np.flipud(batch_labels[bb][tt])
                            debug_images = batch_debug_images[bb][tt]
                            color_image = debug_images['color']
                            state_image = debug_images['state_image']
                            label_memory_image = debug_images[
                                'label_memory'][:, :, 0]
                            label_memory_image_class = np.argmax(
                                debug_images['label_memory'][:, :, 1:], axis=2)
                            label_memory_image_class[0,
                                                     0] = constants.NUM_CLASSES

                            label_patch = debug_images['label']

                            print('Possible pred %.3f' % is_possible[bb, tt])
                            print('Possible label %.3f' %
                                  batch_possible_label[bb][tt])
                            patch = np.flipud(patch_weights[bb, tt, ...])
                            patch_occupancy = patch[:, :, 0]
                            print('occ', patch_occupancy)
                            print('label', label)
                            terminal_patch = np.flipud(
                                np.sum(terminal_patches[bb, tt, ...], axis=2))
                            image_list = [
                                debug_images['color'],
                                state_image,
                                debug_images['label_memory'][:, :, 0],
                                debug_images['memory_map'][:, :, 0],
                                label[:, :],
                                patch_occupancy,
                                np.flipud(pose_indicator[bb, tt]),
                                terminal_patch,
                            ]

                            image = drawing.subplot(image_list, 4, 2,
                                                    constants.SCREEN_WIDTH,
                                                    constants.SCREEN_HEIGHT)
                            cv2.imshow('image', image[:, :, ::-1])
                            cv2.waitKey(0)
                        else:
                            pdb.set_trace()
                data_lock.release()

            if not (constants.DEBUG or constants.DRAWING) and (
                    iteration % 500 == 0
                    or iteration == constants.MAX_TIME_STEP - 1):
                saverTStart = time.time()
                tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
                saverTEnd = time.time()
                print('Saver:     %.3f' % (saverTEnd - saverTStart))

            curr_it += 1

            dataTimeTotal += dataTEnd - tStart
            summaryTimeTotal += summaryTime
            solverTimeTotal += solverTEnd - dataTEnd - summaryTime
            totalTimeTotal += time.time() - tStart

            if iteration == start_it or (iteration) % 10 == 0:
                print('Iteration: %d' % (iteration))
                print('Loss:      %.3f' % loss)
                print('Data:      %.3f' % (dataTimeTotal / curr_it))
                print('Solver:    %.3f' % (solverTimeTotal / curr_it))
                print('Summary:   %.3f' % (summaryTimeTotal / curr_it))
                print('Total:     %.3f' % (totalTimeTotal / curr_it))
                print('Current:   %.3f\n' %
                      ((time.time() - currentTimeStart) / min(10, curr_it)))

    except:
        import traceback
        traceback.print_exc()
    finally:
        # Save final model
        if not (constants.DEBUG or constants.DRAWING):
            tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
    def draw_state(self, return_list=False, action=None):
        if not constants.DRAWING:
            return
        from utils import drawing
        curr_image = self.game_state.detection_image.copy()
        state_image = self.game_state.draw_state()
        pi = self.pi.copy().squeeze()

        action_size = max(len(pi), 100)
        action_hist = np.zeros((action_size, action_size))
        for ii, pi_i in enumerate(pi):
            action_hist[:max(int(np.round(pi_i * action_size)), 1),
                        int(ii * action_size /
                            len(pi)):int((ii + 1) * action_size /
                                         len(pi))] = (ii + 1)
        action_hist = np.flipud(action_hist)
        images = [
            curr_image,
            np.argmax(self.game_state.detection_mask_image, 2),
            action_hist,
            state_image,
        ]
        if type(action) == int:
            action = self.game_state.get_action(action)[0]
        action_str = game_util.get_action_str(action)
        if action_str == 'Answer':
            if self.game_state.question_type_ind != 1:
                action_str += ' ' + str(self.answer > 0.5)
            else:
                action_str += ' ' + str(np.argmax(self.answer))

        if self.game_state.question_type_ind == 0:
            question_str = '%03d Ex Q: %s A: %s' % (
                self.num_steps,
                constants.OBJECTS[self.game_state.question_target],
                bool(self.game_state.answer))
        elif self.game_state.question_type_ind == 1:
            question_str = '%03d # Q: %s A: %d' % (
                self.num_steps,
                constants.OBJECTS[self.game_state.question_target],
                self.game_state.answer)
        elif self.game_state.question_type_ind == 2:
            question_str = '%03d Q: %s in %s A: %d' % (
                self.num_steps,
                constants.OBJECTS[self.game_state.question_target[0]],
                constants.OBJECTS[self.game_state.question_target[1]],
                self.game_state.answer)
        else:
            raise Exception('No matching question number')

        titles = [
            question_str, action_str,
            'reward %.3f, value %.3f' % (self.reward, self.v)
        ]
        if return_list:
            return action_hist
        image = drawing.subplot(images,
                                2,
                                2,
                                curr_image.shape[1],
                                curr_image.shape[0],
                                titles=titles)
        if not os.path.exists('visualizations/images'):
            os.makedirs('visualizations/images')
        cv2.imwrite(
            'visualizations/images/state_%05d.jpg' % self.global_step_id,
            image[:, :, ::-1])
        return image
Esempio n. 4
0
            num_total += 1
            action_key = ''
            state.reset(*question)
            while action_key != 'answer':
                if constants.DEBUG:
                    images = [
                        state.s_t, state.detection_image, state.s_t_depth,
                        state.event.class_segmentation_frame,
                        state.event.instance_segmentation_frame
                    ]
                    titles = [
                        'state', 'detections', 'depth', 'class segmentation',
                        'instance segmentation'
                    ]
                    image = drawing.subplot(images, 2, 3,
                                            constants.SCREEN_WIDTH,
                                            constants.SCREEN_HEIGHT, 5, titles)

                    cv2.imshow('image', image[:, :, ::-1])
                    cv2.waitKey(10)
                print(
                    'w: MoveAhead\na: RotateLeft\ns: RotateRight\no: OpenObject\nc: CloseObject\n+: LookUp\n-: LookDown\nanswer: Open answer dialog. type {true, false, yes, no}\nq: quit\ndd: enter debug'
                )
                new_action_key = input(">> ")
                if new_action_key != '':
                    action_key = new_action_key
                state.step(action_key)
            answer = None
            while answer is None:
                answer = input("answer: ").lower()
                if answer in {'true', 'false', 'yes', 'no'}:
Esempio n. 5
0
    def visualize_results(self, input, output, target, step, add_to_keys):

        if 'reconstructed_rgb' in output and 'reconstructed_rgb' in target:
            batch_to_visualize = 0
            # pdb.set_trace()
            output_reconstruct = output['reconstructed_rgb'][
                batch_to_visualize]
            target_reconstruct = target['reconstructed_rgb'][
                batch_to_visualize]

            all_images = [
                channel_last(normalize(img))
                for img in [target_reconstruct, output_reconstruct]
            ]

            combined_images_before_append = torch.stack(all_images, dim=1)
            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('reconstruct_rgb_viz' + '/' +
                                           add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'verb_class' in output and 'verb_class' in target:
            batch_to_visualize = 10
            output_verb_class = output[
                'verb_actual_class'][:batch_to_visualize]
            target_verb_class = target['verb_class'][:batch_to_visualize]

            rgb_images = (input['rgb'][:batch_to_visualize])
            batch_size, seq_len, c, w, h = rgb_images.shape
            # half_way = int(seq_len / 2)
            # rgb_images = rgb_images[:,[0,half_way, -1]]
            rgb_images = channel_last(normalize(rgb_images))

            combined_images_before_append = put_epic_class_text_on_images(
                rgb_images, target_verb_class, output_verb_class,
                self.dataset.VERB_CLASS_TO_NAME)
            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('verb_class_rgb_viz' + '/' +
                                           add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'class_probs' in output and 'class_probs' in target:
            batch_to_visualize = 10
            output_verb_class = output['class_probs'][:batch_to_visualize]
            target_verb_class = target['class_probs'][:batch_to_visualize]

            rgb_images = (input['rgb'][:batch_to_visualize])
            batch_size, seq_len, c, w, h = rgb_images.shape
            # half_way = int(seq_len / 2)
            # rgb_images = rgb_images[:,[0,half_way, -1]]
            rgb_images = channel_last(normalize(rgb_images))

            class_names = self.dataset.VERB_CLASS_TO_NAME
            _, output_top_k = torch.topk(output_verb_class, k=5, dim=-1)
            _, target_top_k = torch.topk(target_verb_class, k=5, dim=-1)
            output_top_k = output_top_k.squeeze(1)
            target_top_k = target_top_k.squeeze(1)
            output_text_list = []
            target_text_list = []

            for b_ind in range(len(output_top_k)):
                output_verbs = '/'.join([
                    class_names[cls.item()].split(' ')[0].split('/')[0]
                    for cls in output_top_k[b_ind]
                ])
                target_verbs = '/'.join([
                    class_names[cls.item()].split(' ')[0].split('/')[0]
                    for cls in target_top_k[b_ind]
                ])
                output_text_list.append(output_verbs)
                target_text_list.append(target_verbs)
            combined_images_before_append = put_text_on_images(
                rgb_images,
                output_text_list,
                target_text_list,
                color_list=None,
                font_scale=0.3,
                line_type=1)

            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('verb_class_rgb_viz' + '/' +
                                           add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'vind_class' in output and 'vind_class' in target:
            batch_to_visualize = 10

            output_pose_class = output[
                'vind_actual_class'][:batch_to_visualize]
            target_pose_class = target['vind_class'][:batch_to_visualize]

            rgb_images = (input['rgb'][:batch_to_visualize])
            mask_images = (target['combined_mask'][:batch_to_visualize])

            batch_size, seq_len, c, w, h = rgb_images.shape

            rgb_images = channel_last(normalize(rgb_images))
            mask_images = channel_last(normalize(mask_images))
            combined_images = torch.cat([rgb_images, mask_images], dim=1)

            combined_images_before_append = put_epic_class_text_on_images(
                combined_images, target_pose_class, output_pose_class,
                {i: str(i)
                 for i in range(100)})
            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('vind_class' + '/' + add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'scene_class' in output and 'scene_class' in target:
            batch_to_visualize = 10

            output_scene_class = output[
                'scene_actual_class'][:batch_to_visualize]
            target_scene_class = target['scene_class'][:batch_to_visualize]

            rgb_images = (input['rgb'][:batch_to_visualize])
            batch_size, seq_len, c, w, h = rgb_images.shape
            rgb_images = channel_last(normalize(rgb_images))

            combined_images_before_append = put_epic_class_text_on_images(
                rgb_images, target_scene_class, output_scene_class,
                self.dataset.SUN_SCENE_INDEX_TO_NAME)
            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('scene_class_rgb_viz' + '/' +
                                           add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'move_label' in output and 'move_label' in target:

            from utils.constants import IMU_INDEX_TO_NAME

            def get_one_set_str(imus, imu_indices):
                result = ''
                for imu_ind in range(len(imu_indices)):
                    imu_name = IMU_INDEX_TO_NAME[imu_indices[imu_ind]]
                    move_label = imus[imu_ind]
                    if move_label == 0:
                        result += imu_name + '0-'
                    elif move_label == 1:
                        result += imu_name + '1-'
                return result

            def translate_move_label(move_labels, list_of_imus):
                seq_len, num_imus = move_labels.shape
                result = []
                for seq_ind in range(seq_len):
                    this_item = get_one_set_str(move_labels[seq_ind],
                                                list_of_imus)
                    result.append(this_item)
                return result

            batch_to_visualize = 0

            # output_move_label = output['move_label'][batch_to_visualize]
            output_move_label = output['cleaned_move_label'][
                batch_to_visualize]

            target_move_label = target['move_label'][batch_to_visualize]

            rgb_images = (target['rgb'][batch_to_visualize])
            seq_len = rgb_images.shape[0]
            output_images = channel_last(normalize(rgb_images)).cpu().numpy()
            target_images = channel_last(normalize(rgb_images)).cpu().numpy()

            list_of_images = [target_images[i] for i in range(seq_len)]
            list_of_images += [output_images[i] for i in range(seq_len)]

            target_titles = translate_move_label(target_move_label, self.imus)
            output_titles = translate_move_label(output_move_label, self.imus)
            target_titles = ['gt-' + x for x in target_titles]

            titles = target_titles + output_titles

            combined_images = drawing.subplot(list_of_images, 2, seq_len, 224,
                                              224, 5, titles)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('move_label_viz' + '/' +
                                           add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'gaze_points' in output and 'gaze_points' in target:
            batch_to_visualize = 0
            rgb_images = channel_last(
                normalize(target['rgb'][batch_to_visualize]))
            target_gaze_images = visualize_gaze(
                rgb_images, target['gaze_points'][batch_to_visualize])
            output_gaze_images = visualize_gaze(
                rgb_images, output['gaze_points'][batch_to_visualize])
            combined_images_before_append = torch.stack(
                [target_gaze_images, output_gaze_images], dim=1)
            combined_images = combine_image_table(
                combined_images_before_append)

            combined_images = channel_first(combined_images)
            self.log_writer.add_image(tag=('gaze_viz' + '/' + add_to_keys),
                                      img_tensor=combined_images,
                                      global_step=step)

        if 'depth' in output:
            self.visualize_feature(input, output, target, step, add_to_keys,
                                   'depth', depth_normalize)
        if 'walk' in output:
            output['real_walk'] = torch.argmax(output['walk'],
                                               dim=2).unsqueeze(2)
            target['real_walk'] = target['walk']
            self.visualize_feature(input, output, target, step, add_to_keys,
                                   'real_walk', identity)
    def draw_state(self, return_list=False, action=None):
        if not constants.DRAWING:
            return
        # Rows are:
        # 0 - Map weights (not fed to decision network)
        # 1 and 2 - meshgrid
        # 3 - coverage
        # 4 - teleport locations
        # 5 - free space map
        # 6 - visited locations
        # 7+ - object location
        from utils import drawing
        curr_image = self.game_state.detection_image.copy()
        state_image = self.game_state.draw_state()

        action_hist = np.zeros((3, 3, 3))
        pi = self.pi.copy()
        if constants.STEPS_AHEAD == 5:
            action_hist = np.concatenate((pi, np.zeros(3)))
            action_hist = action_hist.reshape(7, 5)
        elif constants.STEPS_AHEAD == 1:
            action_hist = np.concatenate((pi, np.zeros(1)))
            action_hist = action_hist.reshape(3, 3)

        flat_action_size = max(len(pi), 100)
        flat_action_hist = np.zeros((flat_action_size, flat_action_size))
        for ii, flat_action_i in enumerate(pi):
            flat_action_hist[:max(
                int(np.round(flat_action_i * flat_action_size)), 1),
                             int(ii * flat_action_size /
                                 len(pi)):int((ii + 1) * flat_action_size /
                                              len(pi))] = (ii + 1)
        flat_action_hist = np.flipud(flat_action_hist)

        # Answer histogram
        ans = self.answer
        if len(ans) == 1:
            ans = [1 - ans[0], ans[0]]
        ans_size = max(len(ans), 100)
        ans_hist = np.zeros((ans_size, ans_size))
        for ii, ans_i in enumerate(ans):
            ans_hist[:max(int(np.round(ans_i * ans_size)), 1),
                     int(ii * ans_size / len(ans)):int((ii + 1) * ans_size /
                                                       len(ans))] = (ii + 1)
        ans_hist = np.flipud(ans_hist)

        dil = np.flipud(self.dilation)
        dil[0, 0] = 4
        coverage = int(self.coverage * 100 / self.max_coverage)

        possible = np.zeros((3, 3, 3))
        possible_pred = np.zeros((3, 3, 3))
        if constants.STEPS_AHEAD == 5:
            possible = self.possible_moves.copy()
            possible = np.concatenate((possible, np.zeros(4)))
            possible = possible.reshape(constants.STEPS_AHEAD + 2,
                                        constants.STEPS_AHEAD)

            possible_pred = self.possible_moves_pred.copy()
            possible_pred = np.concatenate((possible_pred, np.zeros(4)))
            possible_pred = possible_pred.reshape(constants.STEPS_AHEAD + 2,
                                                  constants.STEPS_AHEAD)

        elif constants.STEPS_AHEAD == 1:
            possible = self.possible_moves.copy()
            possible = np.concatenate((possible, np.zeros(2)))
            possible = possible.reshape(3, 3)

            possible_pred = self.possible_moves_pred.copy()
            possible_pred = np.concatenate((possible_pred, np.zeros(2)))
            possible_pred = possible_pred.reshape(3, 3)

        if self.game_state.question_type_ind in {2, 3}:
            obj_mem = self.spatial_map.memory[:, :, 7 + self.game_state.
                                              question_target[1]].copy()
            obj_mem += self.spatial_map.memory[:, :, 7 + self.game_state.
                                               object_target] * 2
        else:
            obj_mem = self.spatial_map.memory[:, :, 7 + self.game_state.
                                              object_target].copy()
        obj_mem[0, 0] = 2

        memory_map = np.flipud(self.spatial_map.memory[:, :, 7:].copy())
        curr_objs = np.argmax(memory_map, axis=2)

        gt_objs = np.flipud(
            np.argmax(self.game_state.xray_graph.memory[:, :, 1:], 2))
        curr_objs[0, 0] = np.max(gt_objs)
        memory_crop = self.memory_crops[0, ...].copy()
        memory_crop_cov = np.argmax(np.flipud(memory_crop), axis=2)

        gt_semantic_crop = np.flipud(
            np.argmax(self.next_memory_crops_rot, axis=2))

        images = [
            curr_image,
            state_image,
            dil + np.max(np.flipud(self.spatial_map.memory[:, :, 3:5]) *
                         np.array([1, 3]),
                         axis=2),
            memory_crop_cov,
            ans_hist,
            flat_action_hist,
            np.flipud(action_hist),
            np.flipud(possible),
            np.flipud(possible_pred),
            gt_objs,
            curr_objs,
            np.flipud(obj_mem),
        ]
        if type(action) == int:
            action = self.game_state.get_action(action)[0]
        action_str = game_util.get_action_str(action)
        if action_str == 'Answer':
            if self.game_state.question_type_ind != 1:
                action_str += ' ' + str(self.answer > 0.5)
            elif self.game_state.question_type_ind == 1:
                action_str += ' ' + str(np.argmax(self.answer))
        if self.game_state.question_type_ind == 0:
            question_str = '%03d S %s Ex Q: %s A: %s' % (
                self.num_steps, self.game_state.scene_name[9:],
                constants.OBJECTS[self.game_state.question_target],
                bool(self.game_state.answer))
        elif self.game_state.question_type_ind == 1:
            question_str = '%03d S %s # Q: %s A: %d' % (
                self.num_steps, self.game_state.scene_name[9:],
                constants.OBJECTS[self.game_state.question_target],
                self.game_state.answer)
        elif self.game_state.question_type_ind == 2:
            question_str = '%03d S %s Q: %s in %s A: %s' % (
                self.num_steps, self.game_state.scene_name[9:],
                constants.OBJECTS[self.game_state.question_target[0]],
                constants.OBJECTS[self.game_state.question_target[1]],
                bool(self.game_state.answer))
        else:
            raise Exception('No matching question number')
        titles = [
            question_str,
            str(self.answer),
            action_str,
            'coverage %d%% can end %s' %
            (coverage, bool(self.game_state.can_end)),
            'reward %.3f, value %.3f' % (self.reward, self.v),
        ]
        if return_list:
            return action_hist
        image = drawing.subplot(images,
                                4,
                                3,
                                curr_image.shape[1],
                                curr_image.shape[0],
                                titles=titles,
                                border=3)
        if not os.path.exists('visualizations/images'):
            os.makedirs('visualizations/images')
        cv2.imwrite(
            'visualizations/images/state_%05d.jpg' % self.global_step_id,
            image[:, :, ::-1])

        return image
            gt_map = (2 - im_dict['label_memory'][:, :, 0])

            image_list = [
                im_dict['detections']
                if constants.OBJECT_DETECTION else im_dict['color'],
                im_dict['state_image'],
                im_dict['memory_map'][:, :, 0],
                gt_map + np.argmax(
                    im_dict['memory_map'][:, :,
                                          1:constants.NUM_RECEPTACLES + 2],
                    axis=2),
                gt_map + np.argmax(
                    im_dict['memory_map'][:, :,
                                          constants.NUM_RECEPTACLES + 2:],
                    axis=2),
            ]
            titles = [
                'color', 'state', 'occupied', 'label receptacles',
                'label objects'
            ]
            print('possible pred', im_dict['possible_pred'])
            image = drawing.subplot(image_list,
                                    2,
                                    2,
                                    constants.SCREEN_WIDTH,
                                    constants.SCREEN_HEIGHT,
                                    titles=titles)
            cv2.imshow('image', image[:, :, ::-1])
            cv2.waitKey(0)
Esempio n. 8
0
def run():
    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(constants.GPU_ID)
        with tf.variable_scope('global_network'):
            network = QAPlannerNetwork(constants.RL_GRU_SIZE, int(constants.BATCH_SIZE), 1)
            network.create_net()
            training_step = network.training(network.rl_total_loss)

        conv_var_list = [v for v in tf.trainable_variables() if 'conv' in v.name and 'weight' in v.name and
                        (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1)]
        for var in conv_var_list:
            tf_util.conv_variable_summaries(var, scope=var.name.replace('/', '_')[:-2])
        summary_with_images = tf.summary.merge_all()

        with tf.variable_scope('supervised_loss'):
            loss_ph = tf.placeholder(tf.float32)
            accs_ph = [tf.placeholder(tf.float32) for _ in range(4)]
            loss_summary_op = tf.summary.merge([
                tf.summary.scalar('supervised_loss', loss_ph),
                tf.summary.scalar('acc_1_exist', accs_ph[0]),
                tf.summary.scalar('acc_2_count', accs_ph[1]),
                tf.summary.scalar('acc_3_contains', accs_ph[2]),
                ])

        # prepare session
        sess = tf_util.Session()
        sess.run(tf.global_variables_initializer())

        if not (constants.DEBUG or constants.DRAWING):
            from utils import py_util
            time_str = py_util.get_time_str()
            summary_writer = tf.summary.FileWriter(os.path.join(constants.LOG_FILE, time_str), sess.graph)
        else:
            summary_writer = None

        # init or load checkpoint with saver
        saver = tf.train.Saver(max_to_keep=3)
        start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR)

        sess.graph.finalize()

        import h5py
        h5_file = sorted(glob.glob('question_data_dump/*.h5'), key=os.path.getmtime)[-1]
        dataset = h5py.File(h5_file)
        num_entries = np.sum(np.sum(dataset['question_data/pose_placeholder'][...], axis=1) > 0)
        print('num_entries', num_entries)
        start_inds = dataset['question_data/new_episode'][:num_entries]
        start_inds = np.where(start_inds[1:] != 1)[0]

        curr_it = 0
        data_time_total = 0
        solver_time_total = 0
        total_time_total = 0

        for iteration in range(start_it, constants.MAX_TIME_STEP):
            if iteration == start_it or iteration % 10 == 1:
                current_time_start = time.time()
            t_start = time.time()

            rand_inds = np.sort(np.random.choice(start_inds, int(constants.BATCH_SIZE), replace=False))
            rand_inds = rand_inds.tolist()

            existence_answer_placeholder = dataset['question_data/existence_answer_placeholder'][rand_inds]
            counting_answer_placeholder = dataset['question_data/counting_answer_placeholder'][rand_inds]

            question_type_placeholder = dataset['question_data/question_type_placeholder'][rand_inds]
            question_object_placeholder = dataset['question_data/question_object_placeholder'][rand_inds]
            question_container_placeholder = dataset['question_data/question_container_placeholder'][rand_inds]

            pose_placeholder = dataset['question_data/pose_placeholder'][rand_inds]
            image_placeholder = dataset['question_data/image_placeholder'][rand_inds]
            map_mask_placeholder = np.ascontiguousarray(dataset['question_data/map_mask_placeholder'][rand_inds])

            meta_action_placeholder = dataset['question_data/meta_action_placeholder'][rand_inds]
            possible_move_placeholder = dataset['question_data/possible_move_placeholder'][rand_inds]

            taken_action = dataset['question_data/taken_action'][rand_inds]
            answer_weight = np.ones((constants.BATCH_SIZE, 1))

            map_mask_placeholder = np.ascontiguousarray(map_mask_placeholder, dtype=np.float32)
            map_mask_placeholder[:, :, :, :2] -= 2
            map_mask_placeholder[:, :, :, :2] /= constants.STEPS_AHEAD

            map_mask_starts = map_mask_placeholder.copy()

            for bb in range(0, constants.BATCH_SIZE):
                object_ind = int(question_object_placeholder[bb])
                question_type_ind = int(question_type_placeholder[bb])
                if question_type_ind in {2, 3}:
                    container_ind = np.argmax(question_container_placeholder[bb])

                max_map_inds = np.argmax(map_mask_placeholder[bb, ...], axis=2)
                map_range = np.where(max_map_inds > 0)
                map_range_x = (np.min(map_range[1]), np.max(map_range[1]))
                map_range_y = (np.min(map_range[0]), np.max(map_range[0]))
                for jj in range(random.randint(0, 100)):
                    tmp_patch_start = (random.randint(map_range_x[0], map_range_x[1]),
                            random.randint(map_range_y[0], map_range_y[1]))
                    tmp_patch_end = (random.randint(map_range_x[0], map_range_x[1]),
                            random.randint(map_range_y[0], map_range_y[1]))
                    patch_start = (min(tmp_patch_start[0], tmp_patch_end[0]),
                            min(tmp_patch_start[1], tmp_patch_end[1]))
                    patch_end = (max(tmp_patch_start[0], tmp_patch_end[0]),
                            max(tmp_patch_start[1], tmp_patch_end[1]))

                    patch = map_mask_placeholder[bb, patch_start[1]:patch_end[1], patch_start[0]:patch_end[0], :]
                    if question_type_ind in {2, 3}:
                        obj_mem = patch[:, :, 6 + container_ind] + patch[:, :, 6 + object_ind]
                    else:
                        obj_mem = patch[:, :, 6 + object_ind].copy()
                    obj_mem += patch[:, :, 2]  # make sure seen locations stay marked.
                    if patch.size > 0 and np.max(obj_mem) == 0:
                        map_mask_placeholder[bb, patch_start[1]:patch_end[1], patch_start[0]:patch_end[0], 6:] = 0
            feed_dict = {
                    network.existence_answer_placeholder: np.ascontiguousarray(existence_answer_placeholder),
                    network.counting_answer_placeholder: np.ascontiguousarray(counting_answer_placeholder),

                    network.question_type_placeholder: np.ascontiguousarray(question_type_placeholder),
                    network.question_object_placeholder: np.ascontiguousarray(question_object_placeholder),
                    network.question_container_placeholder: np.ascontiguousarray(question_container_placeholder),
                    network.question_direction_placeholder: np.zeros((constants.BATCH_SIZE, 4), dtype=np.float32),

                    network.pose_placeholder: np.ascontiguousarray(pose_placeholder),
                    network.image_placeholder: np.ascontiguousarray(image_placeholder),
                    network.map_mask_placeholder: map_mask_placeholder,
                    network.meta_action_placeholder: np.ascontiguousarray(meta_action_placeholder),
                    network.possible_move_placeholder: np.ascontiguousarray(possible_move_placeholder),

                    network.taken_action: np.ascontiguousarray(taken_action),
                    network.answer_weight: np.ascontiguousarray(answer_weight),
                    network.episode_length_placeholder: np.ones((constants.BATCH_SIZE)),
                    network.question_count_placeholder: np.zeros((constants.BATCH_SIZE)),
                    }
            new_feed_dict = {}
            for key,value in feed_dict.items():
                if len(value.squeeze().shape) > 1:
                    new_feed_dict[key] = np.reshape(value, [int(constants.BATCH_SIZE), 1] + list(value.squeeze().shape[1:]))
                else:
                    new_feed_dict[key] = np.reshape(value, [int(constants.BATCH_SIZE), 1])
            feed_dict = new_feed_dict
            feed_dict[network.taken_action] = np.reshape(feed_dict[network.taken_action], (constants.BATCH_SIZE, -1))
            feed_dict[network.gru_placeholder] = np.zeros((int(constants.BATCH_SIZE), constants.RL_GRU_SIZE))

            data_t_end = time.time()
            if constants.DEBUG or constants.DRAWING:
                outputs = sess.run(
                        [training_step, network.rl_total_loss, network.existence_answer, network.counting_answer,
                            network.possible_moves, network.memory_crops_rot, network.taken_action],
                        feed_dict=feed_dict)
            else:
                if iteration == start_it + 10:
                    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    outputs = sess.run([training_step, network.rl_total_loss, summary_with_images],
                            feed_dict=feed_dict,
                            options=run_options,
                            run_metadata=run_metadata)
                    loss_summary = outputs[2]
                    summary_writer.add_run_metadata(run_metadata, 'step_%07d' % iteration)
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                elif iteration % 10 == 0:
                    if iteration % 100 == 0:
                        outputs = sess.run(
                                [training_step, network.rl_total_loss, summary_with_images],
                                feed_dict=feed_dict)
                        loss_summary = outputs[2]
                    elif iteration % 10 == 0:
                        outputs = sess.run(
                                [training_step, network.rl_total_loss,
                                 network.existence_answer,
                                 network.counting_answer,
                                 ],
                                feed_dict=feed_dict)
                        outputs[2] = outputs[2].reshape(-1, 1)
                        acc_q0 = np.sum((existence_answer_placeholder == (outputs[2] > 0.5)) *
                            (question_type_placeholder == 0)) / np.maximum(1, np.sum(question_type_placeholder == 0))
                        acc_q1 = np.sum((counting_answer_placeholder == np.argmax(outputs[3], axis=1)[..., np.newaxis]) *
                            (question_type_placeholder == 1)) / np.maximum(1, np.sum(question_type_placeholder == 1))
                        acc_q2 = np.sum((existence_answer_placeholder == (outputs[2] > 0.5)) *
                            (question_type_placeholder == 2)) / np.maximum(1, np.sum(question_type_placeholder == 2))
                        acc_q3 = np.sum((existence_answer_placeholder == (outputs[2] > 0.5)) *
                            (question_type_placeholder == 3)) / np.maximum(1, np.sum(question_type_placeholder == 3))

                        curr_loss = outputs[1]
                        outputs = sess.run([loss_summary_op],
                                feed_dict={
                                    accs_ph[0]: acc_q0,
                                    accs_ph[1]: acc_q1,
                                    accs_ph[2]: acc_q2,
                                    accs_ph[3]: acc_q3,
                                    loss_ph: curr_loss,
                                    })

                        loss_summary = outputs[0]
                        outputs.append(curr_loss)
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                else:
                    outputs = sess.run([training_step, network.rl_total_loss],
                            feed_dict=feed_dict)

            loss = outputs[1]
            solver_t_end = time.time()

            if constants.DEBUG or constants.DRAWING:
                # Look at outputs
                guess_bool = outputs[2].flatten()
                guess_count = outputs[3]
                possible_moves_pred = outputs[4]
                memory_crop = outputs[5]
                print('loss', loss)
                for bb in range(constants.BATCH_SIZE):
                    if constants.DRAWING:
                        import cv2
                        import scipy.misc
                        from utils import drawing
                        object_ind = int(question_object_placeholder[bb])
                        question_type_ind = question_type_placeholder[bb]
                        if question_type_ind == 1:
                            answer = counting_answer_placeholder[bb]
                            guess = guess_count[bb]
                        else:
                            answer = existence_answer_placeholder[bb]
                            guess = np.concatenate(([1 - guess_bool[bb]], [guess_bool[bb]]))
                        if question_type_ind[0] in {2, 3}:
                            container_ind = np.argmax(question_container_placeholder[bb])
                            obj_mem = np.flipud(map_mask_placeholder[bb, :, :, 6 + container_ind]).copy()
                            obj_mem += 2 * np.flipud(map_mask_placeholder[bb, :, :, 6 + object_ind])
                        else:
                            obj_mem = np.flipud(map_mask_placeholder[bb, :, :, 6 + object_ind])

                        possible = possible_move_placeholder[bb,...].flatten()
                        possible = np.concatenate((possible, np.zeros(4)))
                        possible = possible.reshape(constants.STEPS_AHEAD + 2, constants.STEPS_AHEAD)

                        possible_pred = possible_moves_pred[bb,...].flatten()
                        possible_pred = np.concatenate((possible_pred, np.zeros(4)))
                        possible_pred = possible_pred.reshape(constants.STEPS_AHEAD + 2, constants.STEPS_AHEAD)

                        mem2 = np.flipud(np.argmax(memory_crop[bb,...], axis=2))
                        mem2[0, 0] = memory_crop.shape[-1] - 2

                        # Answer histogram
                        ans = guess
                        if len(ans) == 1:
                            ans = [ans[0], 1 - ans[0]]
                        ans_size = max(len(ans), 100)
                        ans_hist = np.zeros((ans_size, ans_size))
                        for ii,ans_i in enumerate(ans):
                            ans_hist[:max(int(np.round(ans_i * ans_size)), 1),
                                    int(ii * ans_size / len(ans)):int((ii+1) * ans_size / len(ans))] = (ii + 1)
                        ans_hist = np.flipud(ans_hist)

                        image_list = [
                                image_placeholder[bb,...],
                                ans_hist,
                                np.flipud(possible),
                                np.flipud(possible_pred),
                                mem2,
                                np.flipud(np.argmax(map_mask_starts[bb, :, :, 2:], axis=2)),
                                obj_mem,
                                ]
                        if question_type_ind == 0:
                            question_str = 'Ex Q: %s A: %s' % (constants.OBJECTS[object_ind], bool(answer))
                        elif question_type_ind == 1:
                            question_str = '# Q: %s A: %d' % (constants.OBJECTS[object_ind], answer)
                        elif question_type_ind == 2:
                            question_str = 'Q: %s in %s A: %s' % (
                                    constants.OBJECTS[object_ind],
                                    constants.OBJECTS[container_ind],
                                    bool(answer))
                        image = drawing.subplot(image_list, 4, 2, constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT,
                                titles=[question_str, 'A: %s' % str(np.argmax(guess))], border=2)
                        cv2.imshow('image', image[:, :, ::-1])
                        cv2.waitKey(0)
                    else:
                        pdb.set_trace()

            if not (constants.DEBUG or constants.DRAWING) and (iteration % 1000 == 0 or iteration == constants.MAX_TIME_STEP - 1):
                saver_t_start = time.time()
                tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
                saver_t_end = time.time()
                print('Saver:     %.3f' % (saver_t_end - saver_t_start))

            curr_it += 1

            data_time_total += data_t_end - t_start
            solver_time_total += solver_t_end - data_t_end
            total_time_total += time.time() - t_start

            if iteration == start_it or (iteration) % 10 == 0:
                print('Iteration: %d' % (iteration))
                print('Loss:      %.3f' % loss)
                print('Data:      %.3f' % (data_time_total / curr_it))
                print('Solver:    %.3f' % (solver_time_total / curr_it))
                print('Total:     %.3f' % (total_time_total / curr_it))
                print('Current:   %.3f\n' % ((time.time() - current_time_start) / min(10, curr_it)))

    except:
        import traceback
        traceback.print_exc()
    finally:
        # Save final model
        if not (constants.DEBUG or constants.DRAWING):
            tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)