def _build_summaries( self ):
        if not self.losses_built:
            raise RuntimeError( "Cannot _build_summaries until 'get_losses' ({0}) and _build_metrics ({1}) is run".format(
                self.losses_built, self.metrics_built ) )

        # add check for losses, metrics built
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        tf.summary.scalar( 'metrics/d_accuracy_on_real', self.real_accuracy )
        tf.summary.scalar( 'metrics/d_accuracy_on_fake', self.fake_accuracy )

        # losses
        # slim.summarize_collection(tf.GraphKeys.LOSSES)
        # slim.summarize_tensor( self.encoder_regularization_loss )
        # slim.summarize_tensor( self.decoder_regularization_loss )
        # slim.summarize_tensor( self.discriminator_regularization_loss )
        # slim.summarize_tensor( self.loss_g_total ) #, tag='losses/generator_total_loss' )
        # slim.summarize_tensor( self.loss_d_total ) #, tag='losses/discriminator_total_loss' )
        # tf.summary.scalar( 'metrics/d_accuracy_on_real_images', self.real_accuracy )
        # tf.summary.scalar( 'metrics/d_accuracy_on_fake_images', self.fake_accuracy )

        # # losses
        # # slim.summarize_collection(tf.GraphKeys.LOSSES)
        slim.summarize_tensor( self.l1_loss, tag='losses/generator_l1_loss' )
        slim.summarize_tensor( self.encoder_regularization_loss, tag='losses/encoder_regularization_loss' )
        slim.summarize_tensor( self.decoder_regularization_loss, tag='losses/decoder_regularization_loss' )
        slim.summarize_tensor( self.loss_g, tag='losses/generator_gan_loss' ) #, tag='losses/generator_total_loss' )
        slim.summarize_tensor( self.loss_g_total, tag='losses/generator_total_loss' ) #, tag='losses/generator_total_loss' )
        slim.summarize_tensor( self.discriminator_regularization_loss, tag='losses/discriminator_regularization_loss' )
        slim.summarize_tensor( self.loss_d_total, tag='losses/discriminator_total_loss' ) #, tag='losses/discriminator_total_loss' )
       
        self.summaries_built = True
Ejemplo n.º 2
0
    def build_model(self,
                    input_imgs,
                    is_training,
                    targets,
                    masks=None,
                    privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
        Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training

        if self.decoder_only:
            encoder_output = input_imgs  # Assume that the input is the representation
        else:
            encoder_output = self.build_encoder(input_imgs, is_training)
        # encoder_output = self.build_encoder(input_imgs, is_training)

        final_output = self.build_siamese_output_postprocess(
            encoder_output, is_training)

        losses = self.get_losses(final_output,
                                 targets,
                                 is_softmax='l2_loss' not in cfg)
        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True

        # get losses
        regularization_loss = tf.add_n(slim.losses.get_regularization_losses(),
                                       name='losses/regularization_loss')
        total_loss = slim.losses.get_total_loss(
            add_regularization_losses=add_reg, name='losses/total_loss')

        self.input_images = input_imgs
        self.targets = targets
        self.masks = masks
        self.encoder_output = encoder_output
        self.losses = losses
        self.task_loss = losses[0]
        self.total_loss = total_loss
        self.decoder_output = final_output
        # add summaries
        slim.summarize_variables()
        slim.summarize_weights()
        slim.summarize_biases()
        slim.summarize_activations()
        slim.summarize_collection(tf.GraphKeys.LOSSES)
        tf.summary.scalar('accuracy', self.accuracy)
        slim.summarize_tensor(regularization_loss)
        slim.summarize_tensor(total_loss)
        self.model_built = True
    def build_model(self,
                    input_imgs,
                    is_training,
                    targets,
                    masks=None,
                    privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
        Args:
            input_imgs: batch of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training

        # decoder_output (32, 128, 128, 64)
        constant_input = tf.zeros([self.batch_size] +
                                  list(input_imgs.shape[1:3]) + [64],
                                  name='const_input',
                                  dtype=tf.float32)

        predictions = tf.get_variable(
            "constant_prediction",
            list(input_imgs.shape[1:3]) + [64],
            # initializer=tf.zeros_initializer(),
            dtype=tf.float32)

        final_output = constant_input + predictions

        print("Outputs: ", final_output.shape)
        print("Targets: ", targets.shape)

        # add_fc_layer
        # if self.decoder_only:
        #     encoder_output = input_imgs
        # else:
        #     encoder_output = self.build_encoder(input_imgs, is_training)

        # final_output = self.build_postprocess(encoder_output, is_training)

        losses = self.get_losses(final_output, targets, masks)
        total_loss = slim.losses.get_total_loss(
            add_regularization_losses=False, name='losses/total_loss')

        self.input_images = input_imgs
        self.targets = targets
        # self.encoder_output = encoder_output
        self.decoder_output = final_output
        self.losses = losses
        self.total_loss = total_loss

        # # add summaries
        slim.summarize_variables()
        slim.summarize_tensor(total_loss)
        self.model_built = True
Ejemplo n.º 4
0
    def build_model(self, input_imgs, is_training, targets=None, masks=None, privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
            Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training
        self.input_images = input_imgs
        self.target_images = targets
        self.masks = masks
        self.targets = targets

        if masks is None:
            masks = tf.constant( 1, dtype=tf.float32, shape=[], name='constant_mask' )

        if self.decoder_only:
            self.encoder_output = input_imgs # Assume that the input is the representation
        else:
            self.encoder_output = self.build_encoder(input_imgs, is_training)
        
        self.decoder_output = self.build_decoder(self.encoder_output, is_training)

        resized_output = tf.reshape(self.decoder_output, [-1, self.cfg[ 'target_num_channels' ]])
        resized_target = tf.reshape(targets, [-1])
        masks = tf.reshape(masks, [-1])
        losses = self.get_losses( resized_output, resized_target, masks)

        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True
        
        # get losses
        regularization_loss = tf.add_n( slim.losses.get_regularization_losses(), name='losses/regularization_loss' )
        total_loss = slim.losses.get_total_loss( add_regularization_losses=add_reg,
                                                 name='losses/total_loss')

        self.losses = losses
        self.total_loss = total_loss

        # add summaries
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        slim.summarize_collection(tf.GraphKeys.LOSSES)
        slim.summarize_tensor( regularization_loss )
        slim.summarize_tensor( total_loss )
        self.model_built = True
Ejemplo n.º 5
0
    def build_summary_ops(self, graph):
        """Build summary ops. Add summaries for variables, weights, biases, activations, and losses.

        Returns:
            summary_op: The (merged) summary op.
            summary_writer: A summary writer.
        """
        # add summaries
        slim.summarize_variables()
        slim.summarize_weights()
        slim.summarize_biases()
        slim.summarize_activations()
        slim.summarize_collection(tf.GraphKeys.LOSSES)

        with tf.name_scope('summary_ops'):
            summary_op = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter(cfg.DIR.LOG_PATH, graph=graph)
        self.summary_op = summary_op
        self.summary_writer = summary_writer
Ejemplo n.º 6
0
    def build_model(self,
                    input_imgs,
                    is_training,
                    targets=None,
                    masks=None,
                    privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
            Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training

        if masks is None:
            masks = tf.constant(1,
                                dtype=tf.float32,
                                shape=[],
                                name='constant_mask')

        net = ResNet50UpProj({'data': input_imgs}, cfg['batch_size'], 1, False)
        decoder_output = net.get_output()
        decoder_output = decoder_output * 128.
        decoder_output = tf.log(decoder_output + 1.) / 11.090354888959125
        #         if self.decoder_only:
        # encoder_output = input_imgs Assume that the input is the representation
        # else:
        # encoder_output = self.build_encoder(input_imgs, is_training)
        # print("enc:", encoder_output.shape)
        # decoder_output = self.build_decoder(encoder_output, is_training)
        # print("tar:", targets.shape)

        # set up losses
        if targets is None:
            losses = self.get_losses(decoder_output, input_imgs, masks)
        else:
            losses = self.get_losses(decoder_output, targets, masks)

        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True

        # get losses
        #regularization_loss = tf.add_n( slim.losses.get_regularization_losses(), name='losses/regularization_loss' )
        #total_loss = slim.losses.get_total_loss( add_regularization_losses=add_reg,
        #                                         name='losses/total_loss')

        self.input_images = input_imgs
        self.target_images = targets
        self.targets = targets
        self.masks = masks
        self.decoder_output = decoder_output
        self.losses = losses
        self.total_loss = losses[0]
        # self.init_op = tf.global_variables_initializer()

        # add summaries
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        slim.summarize_collection(tf.GraphKeys.LOSSES)
        #slim.summarize_tensor( regularization_loss )
        #slim.summarize_tensor( total_loss )
        self.model_built = True
Ejemplo n.º 7
0
    def build_model(self,
                    input_imgs,
                    is_training,
                    targets=None,
                    masks=None,
                    privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
            Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.

            There are multiple types of transfers that we might care about:
                'funnel': Train the transfer net on several types of representations
                'finetune_decoder': Train the transfer net along with the decoder. The decoder
                    should already be pretrained for the target task
                'retrain decoder': Take a trained transfer net model, clear the decoder, 
                    and train it from scratch after using the transferred representations. 
        '''
        print('building model')
        cfg = self.cfg

        # Get image and representation parts
        input_placeholder = input_imgs
        img_in, representations = input_placeholder
        self.finetune_src_encoder_imagenet = 'finetune_encoder_imagenet' in cfg
        if self.finetune_src_encoder_imagenet:

            self.src_encoder = cfg['input_cfg']['model_type'](self.global_step,
                                                              cfg['input_cfg'])
            representations = self.src_encoder.build_encoder(
                img_in, is_training=is_training)
            encoder_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            for v in tuple(encoder_vars):
                if 'global_step' in v.name:
                    encoder_vars.remove(v)
            self.encoder_vars = encoder_vars
            self.encoder_saver_imagenet = tf.train.Saver(encoder_vars)
            #self.finetune_encoder_imagenet_saver = tf.
        input_placeholder = (img_in, representations)
        input_imgs = input_placeholder
        print("Represntation input shape")
        print(representations.shape)

        # Determine what part of the model we are training/freezing
        self.is_training = is_training
        self.training_encoder = is_training
        self.training_decoder = is_training
        self.restoring_encoder = True
        self.restoring_decoder = True
        if self.retrain_decoder:
            self.training_encoder = False  # Retraining decoder means that we have trained a transfer
            self.restoring_encoder = True  # Retraining decoder means that we have trained a transfer
            self.restoring_decoder = self.finetune_decoder
        else:
            self.restoring_encoder = False
            self.restoring_decoder = True
            if not self.finetune_decoder:
                self.training_decoder = False

        if self.unlock_decoder:
            self.restoring_encoder = False
            self.restoring_decoder = False
            self.training_decoder = is_training
            self.retrain_decoder = True

        # Build encoder
        if not 'metric_net_only' in cfg:
            encoder_output = self.build_encoder(input_placeholder,
                                                self.training_encoder)
        else:
            encoder_output = representations
        current_vars = set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
        self.encoder_output = encoder_output

        # funnel networks are going to other versions of perceptual_transfer nets
        if self.encoder_scope == 'funnel':
            encoder_output = (img_in, encoder_output)

        # What to do with the decoder
        print("Building decoder")
        self.decoder.build_model(encoder_output,
                                 is_training=self.training_decoder,
                                 targets=targets,
                                 masks=masks,
                                 privileged_input=img_in)

        # Make the saver which we will restore from
        if self.finetune_decoder:  # self.retrain_decoder:
            decoder_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        else:
            decoder_vars = set(tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES)) - current_vars
        for v in tuple(decoder_vars):
            if 'global_step' in v.name:
                decoder_vars.remove(v)

        self.decoder_saver = tf.train.Saver(decoder_vars)
        print("Finished building decoder")

        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True

        regularization_scope = self.encoder_scope
        if self.retrain_decoder or self.finetune_decoder:
            regularization_scope = None
        # get losses
        regularization_loss = tf.add_n(
            slim.losses.get_regularization_losses(scope=regularization_scope),
            name='losses/{}_regularization_loss'.format(regularization_scope))

        total_loss = self.decoder.total_loss + regularization_loss
        self.input_images = img_in
        self.input_representations = representations
        self.target_images = targets
        self.losses = self.decoder.losses
        self.total_loss = total_loss
        # self.init_op = tf.global_variables_initializer()

        # add summaries
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        # slim.summarize_collection(tf.GraphKeys.LOSSES)
        slim.summarize_tensor(regularization_loss,
                              tag='losses/{}_regularizaton_loss'.format(
                                  self.encoder_scope))
        slim.summarize_tensor(total_loss,
                              tag='losses/{}_total_loss'.format(
                                  self.encoder_scope))
        self.model_built = True
Ejemplo n.º 8
0
    def build_model(self, input_imgs, is_training, targets=None, masks=None, privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
            Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training
        img_in, representations = input_imgs 
        print("Encoder input shape")
        print(representations.shape)
        encoder_output = representations
        # encoder_output = self.build_encoder(representations, is_training)
        # current_vars = set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
        # self.encoder_output = encoder_output

        # if self.encoder_scope == 'funnel':
            # encoder_output = (img_in, encoder_output)

        self.decoder_savers = []
        self.decoder_ckpt_paths = self.cfg['model_paths']
        print("Building decoder")
        for i in [0, 1]:
            
            scope_name = 'transfer_{}_{}'.format(i, i+1)
            # with tf.variable_scope('transfer_{}_{}'.format(i, i+1)) as scope:
            encoder_output = self.build_encoder(encoder_output, is_training, scope_name)
            current_vars = set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
            self.encoder_output = encoder_output

            scope_name = 'decoder_{}'.format(i)
            with tf.variable_scope(scope_name) as scope: 
                self.decoders[i].secret_scope = scope_name
                self.decoders[i].build_model(encoder_output, is_training=False, targets=targets[i], masks=masks[i], privileged_input=img_in)
                new_vars = set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) - current_vars
                for v in tuple(new_vars):
                    if 'global_step' in v.name:
                        new_vars.remove(v)
                
                def name_in_checkpoint(var):
                    return var.op.name.replace("decoder_{}/".format(i), "")

                variables_to_restore = {name_in_checkpoint(var):var for var in new_vars}
                self.decoder_savers.append(tf.train.Saver(variables_to_restore))
                current_vars |= new_vars
        print("Finished building decoder")

        self.decoder_saver = self.build_saver()    
        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True

        # get losses
        self.regularization_loss = tf.add_n( 
            tf.losses.get_regularization_losses(scope=self.encoder_scope), 
            name='losses/{}_regularization_loss'.format(self.encoder_scope) )
        self.input_images = img_in
        self.input_representations = representations
        self.target_images = targets
        self.losses = [d.total_loss for d in self.decoders] #self.decoder.losses
        self.total_loss = sum(self.losses) + self.regularization_loss #total_loss
        # self.init_op = tf.global_variables_initializer()

        # add summaries
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        slim.summarize_tensor( self.regularization_loss, tag='losses/{}_regularizaton_loss'.format(self.encoder_scope) )
        slim.summarize_tensor( self.total_loss, tag='losses/{}_total_loss'.format(self.encoder_scope) )
        self.model_built = True
Ejemplo n.º 9
0
    def build_model(self,
                    input_imgs,
                    is_training,
                    targets,
                    masks=None,
                    privileged_input=None):
        '''Builds the model. Assumes that the input is from range [0, 1].
        Args:
            input_imgs: list of input images (scaled between -1 and 1) with the
                       dimensions specified in the cfg
            is_training: flag for whether the model is in training mode or not
            mask: mask used for computing sum of squares loss. If None, we assume
                  it is np.ones.
        '''
        print('building model')
        cfg = self.cfg
        self.is_training = is_training

        if self.decoder_only:
            encoder_output = input_imgs  # Assume that the input is the representation
        else:
            encoder_output = self.build_encoder(input_imgs, is_training)

        final_output_12 = self.build_siamese_output_postprocess(
            encoder_output, is_training, scope="three_layer_fc_network12")

        final_output_23 = self.build_siamese_output_postprocess(
            encoder_output, is_training, scope="three_layer_fc_network23")

        final_output_13 = self.calculate_combined_relative_camera_pose(
            self.denormalize_fixated_camera_pose(final_output_12),
            self.denormalize_fixated_camera_pose(final_output_23))

        final_output_13 = self.normalize_fixated_camera_pose(final_output_13)

        #final_output = tf.concat(1, [final_output_12, final_output_13, final_output_23])

        target12 = tf.slice(targets, [0, 0], [self.cfg['batch_size'], 6])
        target13 = tf.slice(targets, [0, 6], [self.cfg['batch_size'], 6])
        target23 = tf.slice(targets, [0, 12], [self.cfg['batch_size'], 6])

        final_output = [final_output_12, final_output_13, final_output_23]
        target_total = [target12, target13, target23]

        losses = self.get_losses(final_output,
                                 target_total,
                                 is_softmax='l2_loss' not in cfg)
        # use weight regularization
        if 'omit_weight_reg' in cfg and cfg['omit_weight_reg']:
            add_reg = False
        else:
            add_reg = True

        # get losses
        regularization_loss = tf.add_n(slim.losses.get_regularization_losses(),
                                       name='losses/regularization_loss')
        total_loss = slim.losses.get_total_loss(
            add_regularization_losses=add_reg, name='losses/total_loss')

        self.input_images = input_imgs
        self.targets = targets
        self.encoder_output = encoder_output
        self.losses = losses
        self.total_loss = total_loss
        self.decoder_output = final_output
        # add summaries
        if self.extended_summaries:
            slim.summarize_variables()
            slim.summarize_weights()
            slim.summarize_biases()
            slim.summarize_activations()
        slim.summarize_collection(tf.GraphKeys.LOSSES)
        tf.summary.scalar('accuracy', self.accuracy)
        slim.summarize_tensor(regularization_loss)
        slim.summarize_tensor(total_loss)
        self.model_built = True