예제 #1
0
    def __init__(self, layer_sizes, active_funcs, through=False):
        """Initialize the auto-encoder

        Args:
            layer_sizes: A list of integers. The first one is the input size.
                The last one is the middlest layer's size.
            active_funcs: A list of layer-wise active functions
            through: True if the output should be passed
        """

        n_layers = len(active_funcs)
        assert n_layers == len(layer_sizes) - 1

        self._blocks, self._params = [], []
        self._through = through

        # Build feature extraction layers
        for i in xrange(n_layers):
            layer = FullConnLayer(input_size=layer_sizes[i],
                                  output_size=layer_sizes[i + 1],
                                  active_func=active_funcs[i])
            self._blocks.append(layer)
            self._params.extend(layer.parameters)

        # Build reconstruction layers
        for i in xrange(n_layers - 1, -1, -1):
            layer = FullConnLayer(input_size=layer_sizes[i + 1],
                                  output_size=layer_sizes[i],
                                  active_func=active_funcs[i],
                                  W=self._blocks[i].parameters[0].T)
            self._blocks.append(layer)
            self._params.append(layer.parameters[1])
예제 #2
0
def _train_model(nndata, load_from_cache=False, save_to_cache=False):
    print "Training Model ..."
    print "=================="

    if load_from_cache:
        with open(_cached_model, 'rb') as f:
            model = cPickle.load(f)
    else:
        layers = [
            ConvPoolLayer((20, 3, 5, 5), (2, 2), (3, 80, 40), actfuncs.tanh,
                          False),
            ConvPoolLayer((50, 20, 5, 5), (2, 2), None, actfuncs.tanh, True),
            FullConnLayer(5950, 500, actfuncs.tanh),
            FullConnLayer(500, 3, actfuncs.softmax)
        ]

        model = NeuralNet(layers)

        sgd.train(model,
                  nndata,
                  costfuncs.mean_negative_loglikelihood,
                  costfuncs.mean_number_misclassified,
                  regularize=1e-3,
                  batch_size=500,
                  n_epoch=200,
                  learning_rate=1e-1,
                  momentum=0.9,
                  learning_rate_decr=0.95,
                  never_stop=True)

    if save_to_cache:
        with open(_cached_model, 'wb') as f:
            cPickle.dump(model, f, protocol=cPickle.HIGHEST_PROTOCOL)

    return model
예제 #3
0
 def attribute_classification():
     fcl_1 = FullConnLayer(4352, 1024, af.tanh)
     fcl_2 = FullConnLayer(1024, 104)
     decomp = DecompLayer(
         [(sz,) for sz in output_sizes],
         [af.softmax] * len(attrconf.unival) + \
         [af.sigmoid] * len(attrconf.multival)
     )
     return NeuralNet([fcl_1, fcl_2, decomp], through=True)
예제 #4
0
def _train_model(datasets, load_from_cache=False, save_to_cache=False):
    if load_from_cache:
        with open(_cached_model, 'rb') as f:
            model = cPickle.load(f)
    else:
        # Build model
        print "Building model ..."

        # model = AutoEncoder(layer_sizes=[9600, 2400, 2400, 2400, 2400],
        #                     active_funcs=[actfuncs.sigmoid, actfuncs.sigmoid, actfuncs.sigmoid, actfuncs.sigmoid],
        #                     cost_func=costfuncs.mean_square_error,
        #                     error_func=costfuncs.mean_square_error)

        layers = [
            ConvPoolLayer((128, 3, 5, 5), (2, 2), (3, 80, 40),
                          actfuncs.sigmoid, False),
            ConvPoolLayer((64, 128, 5, 5), (1, 1), None, actfuncs.sigmoid,
                          False),
            ConvPoolLayer((32, 64, 5, 5), (1, 1), None, actfuncs.sigmoid,
                          True),
            FullConnLayer(9600, 9600, actfuncs.sigmoid),
            FullConnLayer(9600, 9600, actfuncs.sigmoid)
        ]

        model = NeuralNet(layers, costfuncs.mean_square_error,
                          costfuncs.mean_square_error)

        import copy
        pretrain_datasets = copy.copy(datasets)
        pretrain_datasets.train_y = pretrain_datasets.train_x
        pretrain_datasets.valid_y = pretrain_datasets.valid_x
        pretrain_datasets.test_y = pretrain_datasets.test_x

        sgd.train(model,
                  pretrain_datasets,
                  n_epoch=10,
                  learning_rate=1e-3,
                  learning_rate_decr=1.0)

        sgd.train(model,
                  datasets,
                  n_epoch=100,
                  learning_rate=1e-3,
                  learning_rate_decr=1.0)

    if save_to_cache:
        with open(_cached_model, 'wb') as f:
            cPickle.dump(model, f, protocol=cPickle.HIGHEST_PROTOCOL)

    return model
예제 #5
0
def _train_model(datasets, load_from_cache=False, save_to_cache=False):
    if load_from_cache:
        with open(_cached_model, 'rb') as f:
            model, threshold = cPickle.load(f)
    else:
        # Build model
        print "Building model ..."

        layers = [
            FullConnLayer(input_size=38400,
                          output_size=1024,
                          active_func=actfuncs.sigmoid),
            FullConnLayer(input_size=1024,
                          output_size=12800,
                          active_func=actfuncs.sigmoid)
        ]

        model = NeuralNet(layers)

        sgd.train(model,
                  datasets,
                  cost_func=costfuncs.mean_binary_cross_entropy,
                  error_func=costfuncs.mean_binary_cross_entropy,
                  n_epoch=100,
                  learning_rate=1e-3,
                  learning_rate_decr=1.0)

        threshold = _choose_threshold(model, datasets, verbose=False)

    if save_to_cache:
        with open(_cached_model, 'wb') as f:
            cPickle.dump((model, threshold),
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    return (model, threshold)
예제 #6
0
 def person_reidentification():
     fp = FilterParingLayer((64, 17, 4), 4, (2, 2), True)
     fcl_1 = FullConnLayer(1088, 256, af.tanh)
     return NeuralNet([fp, fcl_1])
예제 #7
0
def train_model(batch_dir):
    """Train deep model

    Args:
        dataset: Dataset X = X1_X2, Y = A1_A2_(0/1)

    Returns:
        The trained deep model
    """

    print "Training ..."

    model = cachem.load('model')

    if model is not None: return model

    import reid.models.active_functions as af
    import reid.models.cost_functions as cf
    from reid.models.layers import ConvPoolLayer, FullConnLayer, IdentityLayer, FilterParingLayer
    from reid.models.layers import CompLayer, DecompLayer, CloneLayer
    from reid.models.neural_net import NeuralNet, MultiwayNeuralNet
    from reid.models.evaluate import Evaluator
    from reid.optimization import sgd

    output_sizes = [len(grp) for grp in attrconf.unival + attrconf.multival]
    target_sizes = [1] * len(attrconf.unival) + [
        len(grp) for grp in attrconf.multival
    ]

    # Feature extraction module
    def feature_extraction():
        decomp = DecompLayer([(2, 80, 30)] * len(bodyconf.groups))
        column = MultiwayNeuralNet([
            NeuralNet([
                ConvPoolLayer((64, 2, 5, 5), (2, 2), (2, 80, 30), af.tanh,
                              False),
                ConvPoolLayer((64, 64, 5, 5), (2, 2), None, af.tanh, True)
            ]) for __ in xrange(len(bodyconf.groups))
        ])
        comp = CompLayer(strategy='Maxout')
        return NeuralNet([decomp, column, comp])

    fe = feature_extraction()
    feat_module = NeuralNet([
        DecompLayer([(2 * 80 * 30 * len(bodyconf.groups), )] * 2),
        MultiwayNeuralNet([fe, fe]),
        CompLayer()
    ])

    # Attribute classification module
    def attribute_classification():
        fcl_1 = FullConnLayer(4352, 1024, af.tanh)
        fcl_2 = FullConnLayer(1024, 104)
        decomp = DecompLayer(
            [(sz,) for sz in output_sizes],
            [af.softmax] * len(attrconf.unival) + \
            [af.sigmoid] * len(attrconf.multival)
        )
        return NeuralNet([fcl_1, fcl_2, decomp], through=True)

    ac = NeuralNet([attribute_classification(), CompLayer()])
    attr_module = NeuralNet([
        DecompLayer([(4352, )] * 2),
        MultiwayNeuralNet([ac, ac]),
        CompLayer()
    ])

    # Person re-identification module
    def person_reidentification():
        fp = FilterParingLayer((64, 17, 4), 4, (2, 2), True)
        fcl_1 = FullConnLayer(1088, 256, af.tanh)
        return NeuralNet([fp, fcl_1])

    reid_module = person_reidentification()

    # Combine them together
    model = NeuralNet([
        feat_module,
        CloneLayer(2),
        MultiwayNeuralNet([attr_module, reid_module]),
        CompLayer(),
        FullConnLayer(104 + 104 + 256, 256, af.tanh),
        FullConnLayer(256, 2, af.softmax)
    ])

    # Fine-tuning
    def reid_cost(output, target):
        return 6.0 * cf.mean_negative_loglikelihood(output, target)

    def reid_error(output, target):
        return 6.0 * cf.mean_negative_loglikelihood(output, target)

    def target_adapter():
        d1 = DecompLayer([(sum(target_sizes), ), (sum(target_sizes), ), (1, )])
        d2 = DecompLayer([(sz, ) for sz in target_sizes])
        return NeuralNet([d1, MultiwayNeuralNet([d2, d2, IdentityLayer()])])

    cost_func = [
        [cf.mean_negative_loglikelihood] * len(attrconf.unival) + \
            [cf.mean_binary_cross_entropy] * len(attrconf.multival),
        [cf.mean_negative_loglikelihood] * len(attrconf.unival) + \
            [cf.mean_binary_cross_entropy] * len(attrconf.multival),
        reid_cost
    ]
    error_func = [
        [cf.mean_number_misclassified] * len(attrconf.unival) + \
            [cf.mean_zeroone_error_rate] * len(attrconf.multival),
        [cf.mean_number_misclassified] * len(attrconf.unival) + \
            [cf.mean_zeroone_error_rate] * len(attrconf.multival),
        reid_error
    ]

    evaluator = Evaluator(model,
                          cost_func,
                          error_func,
                          target_adapter(),
                          regularize=1e-3)

    sgd.train_batch(evaluator,
                    batch_dir,
                    learning_rate=1e-4,
                    momentum=0.9,
                    batch_size=300,
                    n_epoch=100,
                    learning_rate_decr=1.0,
                    patience_incr=1.5)

    return model
예제 #8
0
def train_model(dataset):
    """Train deep model

    This function will build up a deep neural network and train it using given
    dataset.

    Args:
        dataset: A Dataset object returned by ``create_dataset``

    Returns:
        The trained deep model.
    """

    model = cachem.load('model')

    if model is None:
        import reid.models.active_functions as actfuncs
        import reid.models.cost_functions as costfuncs
        from reid.models.layers import ConvPoolLayer, FullConnLayer
        from reid.models.layers import CompLayer, DecompLayer
        from reid.models.neural_net import NeuralNet, MultiwayNeuralNet
        from reid.models.evaluate import Evaluator
        from reid.optimization import sgd

        output_sizes = [len(grp) for grp in attrconf.unival + attrconf.multival]
        target_sizes = [1] * len(attrconf.unival) + [len(grp) for grp in attrconf.multival]

        # Build up model
        input_decomp = DecompLayer([(3,80,30)] * len(bodyconf.groups))

        columns = MultiwayNeuralNet([NeuralNet([
            ConvPoolLayer((64,3,3,3), (2,2), (3,80,30), actfuncs.tanh, False),
            ConvPoolLayer((64,64,3,3), (2,2), None, actfuncs.tanh, True)
        ]) for __ in xrange(len(bodyconf.groups))])

        feature_comp = CompLayer(strategy='Maxout')

        classify_1 = FullConnLayer(6912, 99)
        classify_2 = FullConnLayer(99, 99)

        attr_decomp = DecompLayer(
            [(sz,) for sz in output_sizes],
            [actfuncs.softmax] * len(attrconf.unival) + \
            [actfuncs.sigmoid] * len(attrconf.multival)
        )

        model = NeuralNet([input_decomp, columns, feature_comp, classify_1, classify_2, attr_decomp])

        # Build up adapter
        adapter = DecompLayer([(sz,) for sz in target_sizes])

        # Build up evaluator
        cost_functions = [costfuncs.mean_negative_loglikelihood] * len(attrconf.unival) + \
                         [costfuncs.mean_binary_cross_entropy] * len(attrconf.multival)

        error_functions = [costfuncs.mean_number_misclassified] * len(attrconf.unival) + \
                          [costfuncs.mean_zeroone_error_rate] * len(attrconf.multival)

        evaluator = Evaluator(model, cost_functions, error_functions, adapter,
                              regularize=1e-3)

        # Train the feature extraction model
        sgd.train(evaluator, dataset,
                  learning_rate=1e-3, momentum=0.9,
                  batch_size=300, n_epoch=200,
                  learning_rate_decr=1.0, patience_incr=1.5)

    return model
예제 #9
0
with open(os.path.join('..', 'data', 'mnist', 'mnist.pkl'), 'rb') as f:
    train_set, valid_set, test_set = cPickle.load(f)

train_set = (train_set[0], train_set[1].reshape(train_set[1].shape[0], 1))
valid_set = (valid_set[0], valid_set[1].reshape(valid_set[1].shape[0], 1))
test_set = (test_set[0], test_set[1].reshape(test_set[1].shape[0], 1))

datasets = Datasets(train_set=train_set,
                    valid_set=valid_set,
                    test_set=test_set)

# Build up the model and evaluator
layers = [
    ConvPoolLayer((20, 1, 5, 5), (2, 2), (1, 28, 28), actfuncs.tanh, False),
    ConvPoolLayer((50, 20, 5, 5), (2, 2), None, actfuncs.tanh, True),
    FullConnLayer(800, 500, actfuncs.tanh),
    FullConnLayer(500, 10, actfuncs.softmax)
]

model = NeuralNet(layers)

evaluator = Evaluator(model, costfuncs.mean_negative_loglikelihood,
                      costfuncs.mean_number_misclassified)

# Train the model
sgd.train(evaluator,
          datasets,
          learning_rate=0.1,
          batch_size=500,
          n_epoch=200,
          learning_rate_decr=1.0)
예제 #10
0
from reid.models.evaluate import Evaluator

# Setup test sample
X = numpy.asarray([[2, 1, 1, 3]], dtype=numpy.float32)
Y = numpy.asarray([[6, 9, 10, 4]], dtype=numpy.float32)
Z = numpy.asarray([[5, 8, 10, 6]], dtype=numpy.float32)
W1 = theano.shared(numpy.asarray([[1, 2], [3, 4]], dtype=numpy.float32),
                   borrow=True)
W2 = theano.shared(numpy.asarray([[4, 3], [2, 1]], dtype=numpy.float32),
                   borrow=True)

# Build up model
decomp = DecompLayer([(2, ), (2, )])

columns = MultiwayNeuralNet(
    [FullConnLayer(2, 2, W=W1),
     FullConnLayer(2, 2, W=W2)])

comp = CompLayer()

multitask = DecompLayer([(1, ), (3, )])

model = NeuralNet([decomp, columns, comp, multitask])

# Build up the target value adapter
adapter = DecompLayer([(1, ), (3, )])

# Build up evaluator
evaluator = Evaluator(model, [mse, mse], [mse, mse], adapter)

# Compute the expression by using the model