Exemple #1
0
def create_models():
    ali = create_model_brick()
    x = tensor.tensor4('features')
    z = ali.theano_rng.normal(size=(x.shape[0], NLAT, 1, 1))

    def _create_model(with_dropout):
        cg = ComputationGraph(ali.compute_losses(x, z))
        if with_dropout:
            inputs = VariableFilter(
                bricks=([ali.discriminator.x_discriminator.layers[0],
                         ali.discriminator.z_discriminator.layers[0]]),
                roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.2)
            inputs = VariableFilter(
                bricks=(ali.discriminator.x_discriminator.layers[2::3] +
                        ali.discriminator.z_discriminator.layers[2::2] +
                        ali.discriminator.joint_discriminator.layers[::2]),
                roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.5)
        return Model(cg.outputs)

    model = _create_model(with_dropout=False)
    with batch_normalization(ali):
        bn_model = _create_model(with_dropout=True)

    pop_updates = list(
        set(get_batch_normalization_updates(bn_model, allow_duplicates=True)))
    bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates]

    return model, bn_model, bn_updates
Exemple #2
0
def create_models():
    gan = create_model_brick()
    x = tensor.matrix('features')
    z = gan.theano_rng.normal(size=(x.shape[0], NLAT))

    def _create_model(with_dropout):
        cg = ComputationGraph(gan.compute_losses(x, z))
        if with_dropout:
            inputs = VariableFilter(
                bricks=gan.discriminator.children[1:],
                roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.5)
            inputs = VariableFilter(
                bricks=[gan.discriminator],
                roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.2)
        return Model(cg.outputs)

    model = _create_model(with_dropout=False)
    with batch_normalization(gan):
        bn_model = _create_model(with_dropout=False)

    pop_updates = list(
        set(get_batch_normalization_updates(bn_model, allow_duplicates=True)))
    bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates]

    return model, bn_model, bn_updates
Exemple #3
0
def create_models():
    gan = create_model_brick()
    x = tensor.matrix('features')
    z = gan.theano_rng.normal(size=(x.shape[0], NLAT))

    def _create_model(with_dropout):
        cg = ComputationGraph(gan.compute_losses(x, z))
        if with_dropout:
            inputs = VariableFilter(bricks=gan.discriminator.children[1:],
                                    roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.5)
            inputs = VariableFilter(bricks=[gan.discriminator],
                                    roles=[INPUT])(cg.variables)
            cg = apply_dropout(cg, inputs, 0.2)
        return Model(cg.outputs)

    model = _create_model(with_dropout=False)
    with batch_normalization(gan):
        bn_model = _create_model(with_dropout=False)

    pop_updates = list(
        set(get_batch_normalization_updates(bn_model, allow_duplicates=True)))
    bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_updates]

    return model, bn_model, bn_updates
Exemple #4
0
    def create_models(self):
        gan = self.create_model_brick()
        x = tensor.matrix('features')
        zs = []
        for i in range(self._config["num_packing"]):
            z = circle_gaussian_mixture(num_modes=self._config["num_zmode"],
                                        num_samples=x.shape[0],
                                        dimension=self._config["num_zdim"],
                                        r=self._config["z_mode_r"],
                                        std=self._config["z_mode_std"])
            zs.append(z)

        def _create_model(with_dropout):
            cg = ComputationGraph(gan.compute_losses(x, zs))
            if with_dropout:
                inputs = VariableFilter(bricks=gan.discriminator.children[1:],
                                        roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.5)
                inputs = VariableFilter(bricks=[gan.discriminator],
                                        roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.2)
            return Model(cg.outputs)

        model = _create_model(with_dropout=False)
        with batch_normalization(gan):
            bn_model = _create_model(with_dropout=False)

        pop_updates = list(
            set(
                get_batch_normalization_updates(bn_model,
                                                allow_duplicates=True)))

        # merge same variables
        names = []
        counts = []
        pop_update_merges = []
        pop_update_merges_finals = []
        for pop_update in pop_updates:
            b = False
            for i in range(len(names)):
                if (pop_update[0].auto_name == names[i]):
                    counts[i] += 1
                    pop_update_merges[i][1] += pop_update[1]
                    b = True
                    break
            if not b:
                names.append(pop_update[0].auto_name)
                counts.append(1)
                pop_update_merges.append([pop_update[0], pop_update[1]])
        for i in range(len(pop_update_merges)):
            pop_update_merges_finals.append(
                (pop_update_merges[i][0], pop_update_merges[i][1] / counts[i]))

        bn_updates = [(p, m * 0.05 + p * 0.95)
                      for p, m in pop_update_merges_finals]

        return model, bn_model, bn_updates
Exemple #5
0
def main(mode, save_to, num_epochs, load_params=None,
         feature_maps=None, mlp_hiddens=None,
         conv_sizes=None, pool_sizes=None, stride=None, repeat_times=None,
         batch_size=None, num_batches=None, algo=None,
         test_set=None, valid_examples=None,
         dropout=None, max_norm=None, weight_decay=None,
         batch_norm=None):
    if feature_maps is None:
        feature_maps = [20, 50, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2, 2]
    if repeat_times is None:
        repeat_times = [1, 1, 1]
    if batch_size is None:
        batch_size = 500
    if valid_examples is None:
        valid_examples = 2500
    if stride is None:
        stride = 1
    if test_set is None:
        test_set = 'test'
    if algo is None:
        algo = 'rmsprop'
    if batch_norm is None:
        batch_norm = False

    image_size = (128, 128)
    output_size = 2

    if (len(feature_maps) != len(conv_sizes) or
        len(feature_maps) != len(pool_sizes) or
        len(feature_maps) != len(repeat_times)):
        raise ValueError("OMG, inconsistent arguments")

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 3, image_size,
                    stride=stride,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    repeat_times=repeat_times,
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    batch_norm=batch_norm,
                    weights_init=Glorot(),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.initialize()
    logging.info("Input dim: {} {} {}".format(
        *convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(
                i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))


    single_x = tensor.tensor3('image_features')
    x = tensor.tensor4('image_features')
    single_y = tensor.lvector('targets')
    y = tensor.lmatrix('targets')

    # Training
    with batch_normalization(convnet):
        probs = convnet.apply(x)
    cost = (CategoricalCrossEntropy().apply(y.flatten(), probs)
            .copy(name='cost'))
    error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                  .copy(name='error_rate'))

    cg = ComputationGraph([cost, error_rate])
    extra_updates = []

    if batch_norm: # batch norm:
        logger.debug("Apply batch norm")
        pop_updates = get_batch_normalization_updates(cg)
        # p stands for population mean
        # m stands for minibatch
        alpha = 0.005
        extra_updates = [(p, m * alpha + p * (1 - alpha))
                         for p, m in pop_updates]
        population_statistics = [p for p, m in extra_updates]
    if dropout:
        relu_outputs = VariableFilter(bricks=[Rectifier], roles=[OUTPUT])(cg)
        cg = apply_dropout(cg, relu_outputs, dropout)
    cost, error_rate = cg.outputs
    if weight_decay:
        logger.debug("Apply weight decay {}".format(weight_decay))
        cost += weight_decay * l2_norm(cg.parameters)
        cost.name = 'cost'

    # Validation
    valid_probs = convnet.apply_5windows(single_x)
    valid_cost = (CategoricalCrossEntropy().apply(single_y, valid_probs)
            .copy(name='cost'))
    valid_error_rate = (MisclassificationRate().apply(
        single_y, valid_probs).copy(name='error_rate'))

    model = Model([cost, error_rate])
    if load_params:
        logger.info("Loaded params from {}".format(load_params))
        with open(load_params, 'r') as src:
            model.set_parameter_values(load_parameters(src))

    # Training stream with random cropping
    train = DogsVsCats(("train",), subset=slice(None, 25000 - valid_examples, None))
    train_str =  DataStream(
        train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size))
    train_str = add_transformers(train_str, random_crop=True)

    # Validation stream without cropping
    valid = DogsVsCats(("train",), subset=slice(25000 - valid_examples, None, None))
    valid_str = DataStream(
        valid, iteration_scheme=SequentialExampleScheme(valid.num_examples))
    valid_str = add_transformers(valid_str)

    if mode == 'train':
        directory, _ = os.path.split(sys.argv[0])
        env = dict(os.environ)
        env['THEANO_FLAGS'] = 'floatX=float32'
        port = numpy.random.randint(1025, 10000)
        server = subprocess.Popen(
            [directory + '/server.py',
             str(25000 - valid_examples), str(batch_size), str(port)],
            env=env, stderr=subprocess.STDOUT)
        train_str = ServerDataStream(
            ('image_features', 'targets'), produces_examples=False,
            port=port)

        save_to_base, save_to_extension = os.path.splitext(save_to)

        # Train with simple SGD
        if algo == 'rmsprop':
            step_rule = RMSProp(decay_rate=0.999, learning_rate=0.0003)
        elif algo == 'adam':
            step_rule = Adam()
        else:
            assert False
        if max_norm:
            conv_params = VariableFilter(bricks=[Convolutional], roles=[WEIGHT])(cg)
            linear_params = VariableFilter(bricks=[Linear], roles=[WEIGHT])(cg)
            step_rule = CompositeRule(
                [step_rule,
                 Restrict(VariableClipping(max_norm, axis=0), linear_params),
                 Restrict(VariableClipping(max_norm, axis=(1, 2, 3)), conv_params)])

        algorithm = GradientDescent(
            cost=cost, parameters=model.parameters,
            step_rule=step_rule)
        algorithm.add_updates(extra_updates)
        # `Timing` extension reports time for reading data, aggregating a batch
        # and monitoring;
        # `ProgressBar` displays a nice progress bar during training.
        extensions = [Timing(every_n_batches=100),
                    FinishAfter(after_n_epochs=num_epochs,
                                after_n_batches=num_batches),
                    DataStreamMonitoring(
                        [valid_cost, valid_error_rate],
                        valid_str,
                        prefix="valid"),
                    TrainingDataMonitoring(
                        [cost, error_rate,
                        aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    TrackTheBest("valid_error_rate"),
                    Checkpoint(save_to, save_separately=['log'],
                               parameters=cg.parameters +
                               (population_statistics if batch_norm else []),
                               before_training=True, after_epoch=True)
                        .add_condition(
                            ['after_epoch'],
                            OnLogRecord("valid_error_rate_best_so_far"),
                            (save_to_base + '_best' + save_to_extension,)),
                    Printing(every_n_batches=100)]

        model = Model(cost)

        main_loop = MainLoop(
            algorithm,
            train_str,
            model=model,
            extensions=extensions)
        try:
            main_loop.run()
        finally:
            server.terminate()
    elif mode == 'test':
        classify = theano.function([single_x], valid_probs.argmax())
        test = DogsVsCats((test_set,))
        test_str = DataStream(
            test, iteration_scheme=SequentialExampleScheme(test.num_examples))
        test_str = add_transformers(test_str)
        correct = 0
        with open(save_to, 'w') as dst:
            print("id", "label", sep=',', file=dst)
            for index, example in enumerate(test_str.get_epoch_iterator()):
                image = example[0]
                prediction = classify(image)
                print(index + 1, classify(image), sep=',', file=dst)
                if len(example) > 1 and prediction == example[1]:
                    correct += 1
        print(correct / float(test.num_examples))
    else:
        assert False