コード例 #1
0
ファイル: training.py プロジェクト: paulfun92/simplelearn
    def __init__(self,
                 shared_value,
                 final_value,
                 epochs_to_saturation):
        assert_is_instance(shared_value,
                           theano.tensor.sharedvar.SharedVariable)
        assert_is_subdtype(shared_value.dtype, numpy.floating)

        assert_equal(shared_value.ndim == 0, numpy.isscalar(final_value))

        if numpy.isscalar(final_value):
            assert_floating(final_value)
        else:
            assert_is_subdtype(final_value.dtype, numpy.floating)
            assert_equal(final_value.shape,
                         shared_value.get_value().shape)

        assert_integer(epochs_to_saturation)
        assert_greater(epochs_to_saturation, 0)

        self.shared_value = shared_value

        cast = numpy.cast[shared_value.dtype]
        self._final_value = cast(final_value)

        self._epochs_to_saturation = epochs_to_saturation

        self._num_epochs_seen = None
        self._initial_value = None
コード例 #2
0
def main():

    args = parse_args()

    images = load_npy_file(args.images)
    labels = load_npy_file(args.labels)

    blank_image = numpy.zeros(images.shape[1:])

    num_examples = labels.shape[0]

    images_fmt = DenseFormat(axes=('b', '0', '1', 'c'),
                             shape=((-1, ) + images.shape[1:]),
                             dtype=images.dtype)

    labels_fmt = DenseFormat(axes=('b', 'f'),
                             shape=(-1, labels.shape[1]),
                             dtype=labels.dtype)

    print("Allocating output file.")

    output_memmap = make_memmap_file(args.output,
                                     num_examples + 1,
                                     ['images', 'labels'],
                                     [images_fmt, labels_fmt])

    print("Copying {} images and labels.".format(num_examples))

    assert_equal(images_fmt.axes.index('b'), 0)
    assert_equal(labels_fmt.axes, ('b', 'f'))
    assert_is_subdtype(labels_fmt.dtype, numpy.signedinteger)
    max_category = labels[:, 0].max()

    blank_label = numpy.empty(labels.shape[1], dtype=labels.dtype)
    blank_label[0] = max_category + 1
    blank_label[1] = 0
    blank_label[2:] = -1

    output_memmap['images'][0, ...] = 0
    output_memmap['labels'][0, ...] = blank_label

    output_memmap['images'][1:, ...] = images
    output_memmap['labels'][1:, ...] = labels

    print("Wrote output to {}".format(args.output))
コード例 #3
0
    def __init__(self,
                 parameter,
                 gradient,  # see (*) below
                 learning_rate):

        # (*): We pass in the gradient, rather than the cost, since there are
        # different ways to generate the gradient expression, and we want to
        # allow the user to choose different ones, rather than generating the
        # gradient here ourselves. In particular, the 'consider_constant'
        # argument to theano.gradient.grad() could be of interest to the user.
        # (It's a list of symbols to consider constant, and thus not
        # backpropagate through to their inputs.)
        '''
        Parameters
        ----------
        parameter: A theano symbol
          A parameter being optimized by an Sgd trainer.

        gradient: A theano symbol
          The gradient of the loss function w.r.t. the above parameter.

        learing_rate: float
          The initial value of the learning rate.

        momentum: float
          A parameter affecting how smeared the update direction is over
          multiple batches. Use 0.0 for momentum-less SGD.

        use_nesterov: bool
          If true, use Nesterov momentum. (See "Advances in Optimizing
          Recurrent Networks", Yoshua Bengio, et al.)
        '''

        #
        # sanity-check args
        #

        assert_is_instance(parameter, theano.tensor.sharedvar.SharedVariable)
        assert_is_instance(gradient, theano.gof.Variable)
        assert_equal(parameter.broadcastable, gradient.broadcastable,
                     "If an Op's .grad() method is buggy, it can return "
                     "broadcast masks.")
        assert_is_subdtype(gradient.dtype, numpy.floating)
        assert_greater_equal(learning_rate, 0)

        floatX = theano.config.floatX

        if str(gradient.dtype) != str(floatX):
            gradient = theano.tensor.cast(gradient, floatX)

        #
        # define updates, set members
        #

        def concat(str0, str1):
            '''
            Like str0 + str1, except returns None if either is None.
            '''
            if str0 is None or str1 is None:
                return None
            else:
                return str0 + str1

        def make_shared_floatX(numeric_var, name, **kwargs):
            return theano.shared(numpy.asarray(numeric_var, dtype=floatX),
                                 name=name,
                                 **kwargs)

        self.learning_rate = make_shared_floatX(learning_rate,
                                                concat(parameter.name,
                                                       ' learning rate'))

 

        step = - self.learning_rate * gradient

        self.averaged_param = make_shared_floatX(
            parameter.get_value(),
            concat(parameter.name, ' average'),
            broadcastable=parameter.broadcastable)

        self.parameter = parameter

        self.iteration_number = make_shared_floatX(1.0, 'iteration counter')

        self.parameter_temp = make_shared_floatX(
            0 * parameter.get_value(),
            concat(parameter.name, ' temp'),
            broadcastable=parameter.broadcastable)

        assert_equal(parameter.broadcastable,
                     step.broadcastable)

        new_parameter = parameter + step
        new_parameter.name = concat('new ', parameter.name)

        new_averaged_param = (1.0/self.iteration_number)*self.averaged_param +  ((self.iteration_number-1)/self.iteration_number)*new_parameter
        new_iteration_number = self.iteration_number.get_value() + 1.0

        updates = OrderedDict([(self.parameter, new_parameter),
                               (self.averaged_param, new_averaged_param),
                               (self.iteration_number, new_iteration_number)])

        super(SgdParameterUpdater, self).__init__(updates)
コード例 #4
0
    def __init__(self,
                 parameter,
                 gradient,
                 gradient_at_old_params,
                 learning_rate,
                 momentum,
                 method,
                 input_iterator,
                 input_iterator_full,
                 use_nesterov):

        #
        # sanity-check args
        #

        assert_is_instance(parameter, theano.tensor.sharedvar.SharedVariable)
        assert_is_instance(gradient, theano.gof.Variable)
        assert_is_instance(gradient_at_old_params, theano.gof.Variable)
        assert_equal(parameter.broadcastable, gradient.broadcastable,
                     "If an Op's .grad() method is buggy, it can return "
                     "broadcast masks.")
        assert_is_subdtype(gradient.dtype, numpy.floating)
        assert_is_subdtype(gradient_at_old_params.dtype, numpy.floating)
        assert_greater_equal(learning_rate, 0)
        assert_greater_equal(momentum, 0)
        assert_is_instance(use_nesterov, bool)

        floatX = theano.config.floatX

        if str(gradient.dtype) != str(floatX):
            gradient = theano.tensor.cast(gradient, floatX)

        #
        # define updates, set members
        #

        def concat(str0, str1):
            '''
            Like str0 + str1, except returns None if either is None.
            '''
            if str0 is None or str1 is None:
                return None
            else:
                return str0 + str1

        def make_shared_floatX(numeric_var, name, **kwargs):
            return theano.shared(numpy.asarray(numeric_var, dtype=floatX),
                                 name=name,
                                 **kwargs)

        self.learning_rate = make_shared_floatX(learning_rate,
                                                concat(parameter.name,
                                                       ' learning rate'))

        self.momentum = make_shared_floatX(momentum,
                                           concat(parameter.name, ' momentum'))

        self._velocity = make_shared_floatX(
            0.0 * parameter.get_value(),
            concat(parameter.name, ' velocity'),
            broadcastable=parameter.broadcastable)

        self.full_gradient = make_shared_floatX(
            0.0 * parameter.get_value(),
            concat(parameter.name, ' full gradient'),
            broadcastable=parameter.broadcastable)

        # This variable takes value 1 if S2GD is used and 0 otherwise.
        self.method = method
        if self.method == 'SGD' or self.method == 'S2GD_plus':
            multiplier = 0.0
        elif self.method == 'S2GD' or self.method == 'S2GD_rolling':
            multiplier = 1.0
        else:
            raise Exception('Please enter a valid method: "SGD", "S2GD", "S2GD_plus", or "S2GD_rolling"')

        self.S2GD_on = make_shared_floatX(numeric_var=multiplier, name='use_S2GD')

        # updated_full_gradient = 0
        if self.method == 'S2GD_rolling':
            total_size_dataset = float(input_iterator.dataset.tensors[0].shape[0])
            batch_size = float(input_iterator.batch_size)
            updated_full_gradient = (gradient*batch_size + self.full_gradient*total_size_dataset - gradient_at_old_params*batch_size)/ total_size_dataset
            new_velocity = self.momentum* self._velocity - self.learning_rate * updated_full_gradient
            new_velocity.name = concat('new ', self._velocity.name)
        else:
            new_velocity = self.momentum* self._velocity - self.learning_rate * (gradient + self.S2GD_on * (self.full_gradient - gradient_at_old_params))
            new_velocity.name = concat('new ', self._velocity.name)


        assert_equal(str(new_velocity.dtype), str(floatX))
        assert_equal(self._velocity.broadcastable, new_velocity.broadcastable)

        step = (self.momentum * new_velocity - self.learning_rate * gradient
                if use_nesterov
                else new_velocity)

        assert_equal(parameter.broadcastable,
                     step.broadcastable)

        new_parameter = parameter + step
        new_parameter.name = concat('new ', parameter.name)

        # self.updates = 0
        if self.method == 'S2GD_rolling':
            updates = OrderedDict([(parameter, new_parameter),
                                        (self._velocity, new_velocity),
                                        (self.full_gradient, updated_full_gradient)])
        else:
            updates = OrderedDict([(parameter, new_parameter),
                                        (self._velocity, new_velocity)])

        total_size_dataset = input_iterator_full.dataset.tensors[0].shape[0]
        batch_size = input_iterator_full.batch_size
        steps = total_size_dataset/batch_size

        self.full_gradient_updates = OrderedDict([(self.full_gradient, self.full_gradient + (gradient/steps))])

        super(SemiSgdParameterUpdater, self).__init__(updates)
コード例 #5
0
    def __init__(self, image_node, yaml_dict, numpy_rng, theano_rng):
        '''
        Parameters
        ----------
        image_node: InputNode
        yaml_dict: dict
        '''

        super(IdAndCameraDirModel, self).__init__()

        #
        # Build the model nodes, and initialize their weights.
        #

        # preprocessing layers
        shared_layers = []
        shared_layers.append(RgbToGray(image_node))
        shared_layers.append(Lcn(shared_layers[-1]))

        assert_is_subdtype(image_node.output_format.dtype, numpy.floating)

        use_dropout = yaml_dict['hyperparams']['use_dropout']

        def get_num_classes(yaml_dict):
            fg_path = yaml_dict['datasets']['training']['fg_path']
            dataset = MemmapDataset(os.path.join(data_path, fg_path))
            label_to_id = NorbLabelToObjectIdConverter(dataset.tensors[1])
            return label_to_id.num_unique_ids

        add_conv_layers(shared_layers[-1],
                        yaml_dict['model']['shared_layers']['conv'],
                        use_dropout,
                        numpy_rng,
                        theano_rng,
                        shared_layers)

        add_affine_layers(shared_layers[-1],
                          yaml_dict['model']['shared_layers']['affine'],
                          use_dropout,
                          numpy_rng,
                          theano_rng,
                          shared_layers)

        id_layers = []
        add_classifier_mlp(shared_layers[-1],
                           yaml_dict['model']['id_layers'],
                           get_num_classes(yaml_dict),
                           use_dropout,
                           numpy_rng,
                           theano_rng,
                           id_layers)

        cam_dir_layers = []
        add_regressor_mlp(shared_layers[-1],
                          yaml_dict['model']['cam_dir_layers'],
                          3,
                          use_dropout,
                          numpy_rng,
                          theano_rng,
                          cam_dir_layers)

        self.input_node = image_node
        self.shared_layers = shared_layers
        self.id_layers = id_layers
        self.cam_dir_layers = cam_dir_layers
コード例 #6
0
    def __init__(self,
                 small_conv_model,
                 image_node,
                 yaml_dict,
                 numpy_rng,
                 theano_rng):
        assert_is_instance(small_conv_model, IdAndCameraDirModel)
        assert_equal(image_node.output_format.axes, ('b', '0', '1', 'c'))

        self.shared_layers = []

        self.shared_layers.append(RgbToGray(image_node))
        self.shared_layers.append(Lcn(self.shared_layers[-1]))

        assert_is_subdtype(image_node.output_format.dtype, numpy.floating)

        use_dropout = yaml_dict['hyperparams']['use_dropout']

        def get_num_classes(yaml_dict):
            fg_path = yaml_dict['datasets']['training']['fg_path']
            dataset = MemmapDataset(os.path.join(data_path, fg_path))
            label_to_id = NorbLabelToObjectIdConverter(dataset.tensors[1])
            return label_to_id.num_unique_ids

        add_conv_layers(self.shared_layers[-1],
                        yaml_dict['model']['shared_layers']['conv'],
                        use_dropout,
                        numpy_rng,
                        theano_rng,
                        self.shared_layers)

        def get_first_affine_layer_filter_shape(small_conv_model):
            first_affine_layer = \
                small_conv_model.shared_layers[len(self.shared_layers)]

            assert_is_instance(first_affine_layer, AffineLayer)
            assert_is_instance(first_affine_layer.inputs[0], Conv2dLayer)
            assert_equal(first_affine_layer.inputs[0].output_format.axes,
                         ('b', 'c', '0', '1'))
            return first_affine_layer.inputs[0].output_format.shape[2:]

        first_filter_shape = \
            get_first_affine_layer_filter_shape(small_conv_model)

        assert_equal(first_filter_shape, (2, 2))

        add_affine_layers_conv(self.shared_layers[-1],
                               yaml_dict['model']['shared_layers']['affine'],
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               first_filter_shape=first_filter_shape,
                               output_list=self.shared_layers)

        self.id_layers = []
        add_classifier_mlp_conv(self.shared_layers[-1],
                                yaml_dict['model']['id_layers'],
                                get_num_classes(yaml_dict),
                                use_dropout,
                                numpy_rng,
                                theano_rng,
                                self.id_layers)

        self.cam_dir_layers = []
        add_regressor_mlp_conv(self.shared_layers[-1],
                               yaml_dict['model']['cam_dir_layers'],
                               3,
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.cam_dir_layers)

        self.input_node = image_node
コード例 #7
0
    def __init__(self, image_node, yaml_dict, numpy_rng, theano_rng):
        '''
        Parameters
        ----------
        image_node: InputNode
        yaml_dict: dict
        '''

        super(IdPoseLightingModel, self).__init__()

        self.input_node = image_node

        # preprocessing layers
        self.shared_layers = []
        self.shared_layers.append(RgbToGray(image_node))
        self.shared_layers.append(Lcn(self.shared_layers[-1]))

        assert_is_subdtype(image_node.output_format.dtype, numpy.floating)

        use_dropout = yaml_dict['hyperparams']['use_dropout']

        add_conv_layers(self.shared_layers[-1],
                        yaml_dict['model']['shared_layers']['conv'],
                        use_dropout,
                        numpy_rng,
                        theano_rng,
                        self.shared_layers)

        add_affine_layers_conv(self.shared_layers[-1],
                               yaml_dict['model']['shared_layers']['affine'],
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.shared_layers)

        def get_num_classes(yaml_dict):
            fg_path = yaml_dict['datasets']['training']['fg_path']
            dataset = MemmapDataset(os.path.join(data_path, fg_path))
            label_to_id = NorbLabelToObjectIdConverter(dataset.tensors[1])
            return label_to_id.num_unique_ids

        self.id_layers = []
        add_classifier_mlp_conv(self.shared_layers[-1],
                                yaml_dict['model']['id_layers'],
                                get_num_classes(yaml_dict),
                                use_dropout,
                                numpy_rng,
                                theano_rng,
                                self.id_layers)

        self.cam_dir_layers = []
        add_regressor_mlp_conv(self.shared_layers[-1],
                               yaml_dict['model']['cam_dir_layers'],
                               3,
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.cam_dir_layers)


        def get_num_lightings(yaml_dict):
            '''
            Returns the number of non-blank lighting values.
            '''
            fg_path = yaml_dict['datasets']['training']['fg_path']
            dataset = MemmapDataset(os.path.join(data_path, fg_path))
            lighting_labels = dataset.tensors[1][:, 4]
            assert_equal(lighting_labels[0], -1)
            assert_array_compare(numpy.greater_equal, lighting_labels[1:], 0)
            assert_array_compare(numpy.less, lighting_labels[1:], 4)
            num_valid_lighting_values = len(frozenset(lighting_labels[1:]))
            assert_equal(num_valid_lighting_values, 4)
            return num_valid_lighting_values

        self.lighting_layers = []
        add_classifier_mlp_conv(self.shared_layers[-1],
                                yaml_dict['model']['lighting_layers'],
                                get_num_lightings(yaml_dict),
                                use_dropout,
                                numpy_rng,
                                theano_rng,
                                self.lighting_layers)

        self.rc_shift_layers = []
        add_regressor_mlp_conv(self.shared_layers[-1],
                               yaml_dict['model']['rc_shift_layers'],
                               2,
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.rc_shift_layers)

        self.scale_layers = []
        add_regressor_mlp_conv(self.shared_layers[-1],
                               yaml_dict['model']['scale_layers'],
                               1,
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.scale_layers)

        self.roll_layers = []
        add_regressor_mlp_conv(self.shared_layers[-1],
                               yaml_dict['model']['roll_layers'],
                               1,
                               use_dropout,
                               numpy_rng,
                               theano_rng,
                               self.roll_layers)
コード例 #8
0
    def __init__(self,
                 parameter,
                 gradient,  # see (*) below
                 learning_rate,
                 momentum,
                 use_nesterov):

        # (*): We pass in the gradient, rather than the cost, since there are
        # different ways to generate the gradient expression, and we want to
        # allow the user to choose different ones, rather than generating the
        # gradient here ourselves. In particular, the 'consider_constant'
        # argument to theano.gradient.grad() could be of interest to the user.
        # (It's a list of symbols to consider constant, and thus not
        # backpropagate through.)
        '''
        Parameters
        ----------
        parameter: A theano symbol
          A parameter being optimized by an Sgd trainer.

        gradient: A theano symbol
          The gradient of the loss function w.r.t. the above parameter.

        learing_rate: float
          The initial value of the learning rate.

        momentum: float
          A parameter affecting how smeared the update direction is over
          multiple batches. Use 0.0 for momentum-less SGD.

        use_nesterov: bool
          If true, use Nesterov momentum. (See "Advances in Optimizing
          Recurrent Networks", Yoshua Bengio, et al.)
        '''

        #
        # sanity-check args
        #

        assert_is_instance(parameter, theano.tensor.sharedvar.SharedVariable)
        assert_is_instance(gradient, theano.gof.Variable)
        assert_equal(parameter.broadcastable, gradient.broadcastable,
                     "If an Op's .grad() method is buggy, it can return "
                     "broadcast masks.")
        assert_is_subdtype(gradient.dtype, numpy.floating)
        assert_greater_equal(learning_rate, 0)
        assert_greater_equal(momentum, 0)
        assert_is_instance(use_nesterov, bool)

        floatX = theano.config.floatX

        if str(gradient.dtype) != str(floatX):
            gradient = theano.tensor.cast(gradient, floatX)

        #
        # define updates, set members
        #

        def concat(str0, str1):
            '''
            Like str0 + str1, except returns None if either is None.
            '''
            if str0 is None or str1 is None:
                return None
            else:
                return str0 + str1

        def make_shared_floatX(numeric_var, name, **kwargs):
            return theano.shared(numpy.asarray(numeric_var, dtype=floatX),
                                 name=name,
                                 **kwargs)

        self.learning_rate = make_shared_floatX(learning_rate,
                                                concat(parameter.name,
                                                       ' learning rate'))

        self.momentum = make_shared_floatX(momentum,
                                           concat(parameter.name, ' momentum'))

        decay_rate = 0.1
        self.decay_rate = make_shared_floatX(decay_rate,
                                           concat(parameter.name, ' decay rate'))

        self._velocity = make_shared_floatX(
            0.0 * parameter.get_value(),
            concat(parameter.name, ' velocity'),
            broadcastable=parameter.broadcastable)

        self.mean_square = make_shared_floatX(
            0.0 * parameter.get_value(),
            concat(parameter.name, '  MeanSquare'),
            broadcastable=parameter.broadcastable)


        new_mean_square = self.decay_rate * self.mean_square + (1-self.decay_rate) * pow(gradient,2)
        new_mean_square.name = concat('new ', self.mean_square.name)

        new_velocity = (self.momentum * self._velocity -
                        self.learning_rate * (gradient / (pow(new_mean_square, 0.5) + 0.6)) )
        new_velocity.name = concat('new ', self._velocity.name)

        assert_equal(str(new_velocity.dtype), str(floatX))
        assert_equal(self._velocity.broadcastable, new_velocity.broadcastable)

        step = (self.momentum * new_velocity - self.learning_rate * gradient
                if use_nesterov
                else new_velocity)

        #step2 = self.learning_rate * (gradient / ( pow(new_mean_square, 0.5) + 0.6) )

        assert_equal(parameter.broadcastable,
                     step.broadcastable)

        new_parameter = parameter + step
        new_parameter.name = concat('new ', parameter.name)

        self.updates = OrderedDict([(parameter, new_parameter),
                                    (self._velocity, new_velocity),
                                    (self.mean_square, new_mean_square)])