Beispiel #1
0
def test_misclassification_rate():
    y = tensor.vector(dtype="int32")
    yhat = tensor.matrix(theano.config.floatX)
    top1_brick = MisclassificationRate()
    top2_brick = MisclassificationRate(top_k=2)
    top3_brick = MisclassificationRate(top_k=3)
    f = theano.function([y, yhat], [top1_brick.apply(y, yhat), top2_brick.apply(y, yhat), top3_brick.apply(y, yhat)])
    y_ = numpy.array([2, 1, 0, 1, 2], dtype="int32")
    yhat_ = numpy.array([[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]], dtype="float32")
    top1_error = 0.6
    top2_error = 0.4
    top3_error = 0.2
    assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
Beispiel #2
0
    def apply(self, input_lb, input_un, target):
        batch_size = input_lb.shape[0]
        get_labeled = lambda x: x[:batch_size] if x is not None else x
        input = T.concatenate([input_lb, input_un], axis=0)
        self.layer_dims = {0: self.input_dim}
        self.lr = self.shared(self.default_lr, "learning_rate", role=None)
        top = len(self.layers) - 1

        clean = self.encoder(input, noise_std=[0])
        corr = self.encoder(input, noise_std=self.noise_std)

        ests, costs = self.decoder(clean, corr, batch_size)

        # Costs
        y = target.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(y, get_labeled(clean.h[top]))
        costs.class_clean.name = "CE_clean"

        costs.class_corr = CategoricalCrossEntropy().apply(y, get_labeled(corr.h[top]))
        costs.class_corr.name = "CE_corr"

        costs.total = costs.class_corr * 1.0
        for i in range(len(self.layers)):
            costs.total += costs.denois[i] * self.denoising_cost_x[i]
        costs.total.name = "Total_cost"

        self.costs = costs

        # Classification error
        mr = MisclassificationRate()
        self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.0)
        self.error.name = "Error_rate"
Beispiel #3
0
    def apply(self, input_lb, input_un, target):
        batch_size = input_lb.shape[0]
        get_labeled = lambda x: x[:batch_size] if x is not None else x
        input = T.concatenate([input_lb, input_un], axis=0)
        self.layer_dims = {0: self.input_dim}
        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)
        top = len(self.layers) - 1

        clean = self.encoder(input, noise_std=[0])
        corr = self.encoder(input, noise_std=self.noise_std)

        ests, costs = self.decoder(clean, corr, batch_size)

        # Costs
        y = target.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, get_labeled(clean.h[top]))
        costs.class_clean.name = 'CE_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, get_labeled(corr.h[top]))
        costs.class_corr.name = 'CE_corr'

        costs.total = costs.class_corr * 1.0
        for i in range(len(self.layers)):
            costs.total += costs.denois[i] * self.denoising_cost_x[i]
        costs.total.name = 'Total_cost'

        self.costs = costs

        # Classification error
        mr = MisclassificationRate()
        self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.)
        self.error.name = 'Error_rate'
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Beispiel #5
0
def test_misclassification_rate():
    y = tensor.vector(dtype='int32')
    yhat = tensor.matrix(theano.config.floatX)
    top1_brick = MisclassificationRate()
    top2_brick = MisclassificationRate(top_k=2)
    top3_brick = MisclassificationRate(top_k=3)
    f = theano.function([y, yhat], [
        top1_brick.apply(y, yhat),
        top2_brick.apply(y, yhat),
        top3_brick.apply(y, yhat)
    ])
    y_ = numpy.array([2, 1, 0, 1, 2], dtype='int32')
    yhat_ = numpy.array(
        [[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]],
        dtype='float32')
    top1_error = 0.6
    top2_error = 0.4
    top3_error = 0.2
    assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
Beispiel #6
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        self.layer_dims = {0: self.input_dim}
        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)
        top = len(self.layers) - 1

        num_labeled = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:num_labeled] if x is not None else x
        self.unlabeled = lambda x: x[num_labeled:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        clean = self.encoder(input_concat, 'clean',
                             input_noise_std=0.0,
                             noise_std=[])
        corr = self.encoder(input_concat, 'corr',
                            input_noise_std=self.super_noise_std,
                            noise_std=self.f_local_noise_std)

        est, costs = self.decoder(clean, corr)

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, clean.labeled.h[top])
        costs.class_clean.name = 'CE_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, corr.labeled.h[top])
        costs.class_corr.name = 'CE_corr'

        costs.total = costs.class_corr * 1.0
        for i in range(len(self.layers)):
            costs.total += costs.denois[i] * self.denoising_cost_x[i]
        costs.total.name = 'Total_cost'

        self.costs = costs

        # Classification error
        mr = MisclassificationRate()
        self.error = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.name = 'Error_rate'
def training_model_mnist(learning_rate, momentum, iteration, batch_size, epoch_end, iter_batch):

    x = T.tensor4('features')
    y = T.imatrix('targets')

    classifier = build_model_mnist()

    predict = classifier.apply(x)
    y_hat = Softmax().apply(predict)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    cost.name = "cost"
    cg = ComputationGraph(cost)
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error"


    train_set = MNIST(('train', ))
    test_set = MNIST(("test",))

    if iteration =="slice":
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme_slice(train_set.num_examples,
                                                            batch_size))
        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme_slice(test_set.num_examples,
                                                            batch_size))
    else:
        data_stream = DataStream.default_stream(
                train_set, iteration_scheme=SequentialScheme(train_set.num_examples,
                                                            batch_size))

        data_stream_test = DataStream.default_stream(
                test_set, iteration_scheme=SequentialScheme(test_set.num_examples,
                                                            batch_size))

    step_rule = Momentum(learning_rate=learning_rate,
                         momentum=momentum)

    start = time.clock()
    time_spent = shared_floatx(np.float32(0.), name="time_spent")
    time_extension = Time_reference(start, time_spent, every_n_batches=1)

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_epochs=iter_batch)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate, time_spent], data_stream=data_stream_test, prefix="valid",
        every_n_epochs=iter_batch)

    # add a monitor variable about the time
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=epoch_end),
                    Printing(every_n_epochs=iter_batch),
                    time_extension
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
Beispiel #8
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.target_labeled = target_labeled
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.default_lr

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat, 'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i+1][1][0] if len(layers) > i+1 else (None, None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost

                if z_clean_s and self.p.zestbn == 'bugfix':
                    z_est_norm = (z_est - z_clean_m) / T.sqrt(z_clean_s + np.float32(1e-10))
                elif z_clean_s is None or self.p.zestbn == 'no':
                    z_est_norm = z_est
                else:
                    assert False, 'Not supported path'

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' % (
                i, l_type,
                denois_print,
                self.layer_dims.get(i+1),
                self.layer_dims.get(i)
                ))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
Beispiel #9
0
    def apply(self, input_labeled, target_labeled, input_unlabeled):
        self.layer_counter = 0
        input_dim = self.p.encoder_layers[0]

        # Store the dimension tuples in the same order as layers.
        layers = self.layers
        self.layer_dims = {0: input_dim}

        self.lr = self.shared(self.default_lr, 'learning_rate', role=None)

        self.costs = costs = AttributeDict()
        self.costs.denois = AttributeDict()

        self.act = AttributeDict()
        self.error = AttributeDict()

        top = len(layers) - 1

        N = input_labeled.shape[0]
        self.join = lambda l, u: T.concatenate([l, u], axis=0)
        self.labeled = lambda x: x[:N] if x is not None else x
        self.unlabeled = lambda x: x[N:] if x is not None else x
        self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x))

        input_concat = self.join(input_labeled, input_unlabeled)

        def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
            h = input_

            logger.info('  0: noise %g' % input_noise_std)
            if input_noise_std > 0.:
                h = h + self.noise_like(h) * input_noise_std

            d = AttributeDict()
            d.unlabeled = self.new_activation_dict()
            d.labeled = self.new_activation_dict()
            d.labeled.z[0] = self.labeled(h)
            d.unlabeled.z[0] = self.unlabeled(h)
            prev_dim = input_dim
            for i, (spec, _, act_f) in layers[1:]:
                d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
                noise = noise_std[i] if i < len(noise_std) else 0.
                curr_dim, z, m, s, h = self.f(h,
                                              prev_dim,
                                              spec,
                                              i,
                                              act_f,
                                              path_name=path_name,
                                              noise_std=noise)
                assert self.layer_dims.get(i) in (None, curr_dim)
                self.layer_dims[i] = curr_dim
                d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
                d.unlabeled.s[i] = s
                d.unlabeled.m[i] = m
                prev_dim = curr_dim
            d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
            return d

        # Clean, supervised
        logger.info('Encoder: clean, labeled')
        clean = self.act.clean = encoder(input_concat, 'clean')

        # Corrupted, supervised
        logger.info('Encoder: corr, labeled')
        corr = self.act.corr = encoder(input_concat,
                                       'corr',
                                       input_noise_std=self.p.super_noise_std,
                                       noise_std=self.p.f_local_noise_std)
        est = self.act.est = self.new_activation_dict()

        # Decoder path in opposite order
        logger.info('Decoder: z_corr -> z_est')
        for i, ((_, spec), l_type, act_f) in layers[::-1]:
            z_corr = corr.unlabeled.z[i]
            z_clean = clean.unlabeled.z[i]
            z_clean_s = clean.unlabeled.s.get(i)
            z_clean_m = clean.unlabeled.m.get(i)
            fspec = layers[i + 1][1][0] if len(layers) > i + 1 else (None,
                                                                     None)

            if i == top:
                ver = corr.unlabeled.h[i]
                ver_dim = self.layer_dims[i]
                top_g = True
            else:
                ver = est.z.get(i + 1)
                ver_dim = self.layer_dims.get(i + 1)
                top_g = False

            z_est = self.g(z_lat=z_corr,
                           z_ver=ver,
                           in_dims=ver_dim,
                           out_dims=self.layer_dims[i],
                           l_type=l_type,
                           num=i,
                           fspec=fspec,
                           top_g=top_g)

            if z_est is not None:
                # Denoising cost
                if z_clean_s:
                    z_est_norm = (z_est - z_clean_m) / z_clean_s
                else:
                    z_est_norm = z_est

                se = SquaredError('denois' + str(i))
                costs.denois[i] = se.apply(z_est_norm.flatten(2),
                                           z_clean.flatten(2)) \
                    / np.prod(self.layer_dims[i], dtype=floatX)
                costs.denois[i].name = 'denois' + str(i)
                denois_print = 'denois %.2f' % self.p.denoising_cost_x[i]
            else:
                denois_print = ''

            # Store references for later use
            est.h[i] = self.apply_act(z_est, act_f)
            est.z[i] = z_est
            est.s[i] = None
            est.m[i] = None
            logger.info('  g%d: %10s, %s, dim %s -> %s' %
                        (i, l_type, denois_print, self.layer_dims.get(i + 1),
                         self.layer_dims.get(i)))

        # Costs
        y = target_labeled.flatten()

        costs.class_clean = CategoricalCrossEntropy().apply(
            y, clean.labeled.h[top])
        costs.class_clean.name = 'cost_class_clean'

        costs.class_corr = CategoricalCrossEntropy().apply(
            y, corr.labeled.h[top])
        costs.class_corr.name = 'cost_class_corr'

        # This will be used for training
        costs.total = costs.class_corr * 1.0
        for i in range(top + 1):
            if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0:
                costs.total += costs.denois[i] * self.p.denoising_cost_x[i]
        costs.total.name = 'cost_total'

        # Classification error
        mr = MisclassificationRate()
        self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.)
        self.error.clean.name = 'error_rate_clean'
Beispiel #10
0
def build_submodel(input_shape,
                   output_dim,
                   L_dim_conv_layers,
                   L_filter_size,
                   L_pool_size,
                   L_activation_conv,
                   L_dim_full_layers,
                   L_activation_full,
                   L_exo_dropout_conv_layers,
                   L_exo_dropout_full_layers,
                   L_endo_dropout_conv_layers,
                   L_endo_dropout_full_layers,
                   L_border_mode=None,
                   L_filter_step=None,
                   L_pool_step=None):


    # TO DO : target size and name of the features

    x = T.tensor4('features')
    y = T.imatrix('targets')

    assert len(input_shape) == 3, "input_shape must be a 3d tensor"

    num_channels = input_shape[0]
    image_size = tuple(input_shape[1:])
    print image_size
    print num_channels
    prediction = output_dim

    # CONVOLUTION
    output_conv = x
    output_dim = num_channels*np.prod(image_size)
    conv_layers = []
    assert len(L_dim_conv_layers) == len(L_filter_size)
    if L_filter_step is None:
        L_filter_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_size)
    if L_pool_step is None:
        L_pool_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_step)
    assert len(L_dim_conv_layers) == len(L_activation_conv)
    if L_border_mode is None:
        L_border_mode = ["valid"] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_border_mode)
    assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers)
    assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers)

    # regarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we need to have the first dropout value of L_exo_dropout_full_layers
    
    # the first value has to be 0.0 in this context, and we'll
    # assume that it is, but let's have an assert
    assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts."

    # here modifitication of L_exo_dropout_conv_layers
    L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]]

    if len(L_dim_conv_layers):
        for (num_filters, filter_size, filter_step,
            pool_size, pool_step, activation_str, border_mode,
            dropout, index) in zip(L_dim_conv_layers,
                                  L_filter_size,
                                  L_filter_step,
                                  L_pool_size,
                                  L_pool_step,
                                  L_activation_conv,
                                  L_border_mode,
                                  L_exo_dropout_conv_layers,
                                  xrange(len(L_dim_conv_layers))
                                  ):

            # convert filter_size and pool_size in tuple
            filter_size = tuple(filter_size)

            if filter_step is None:
                filter_step = (1, 1)
            else:
                filter_step = tuple(filter_step)

            if pool_size is None:
                pool_size = (0,0)
            else:
                pool_size = tuple(pool_size)

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier().apply
            elif activation_str.lower() == 'tanh':
                activation = Tanh().apply
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic().apply
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity().apply
            else:
                raise Exception("unknown activation function : %s", activation_str)

            assert 0.0 <= dropout and dropout < 1.0
            num_filters = num_filters - int(num_filters*dropout)

            print "border_mode : %s" % border_mode

            # filter_step
            # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv

            kwargs = {}
            if filter_step is None or filter_step == (1,1):
                pass
            else:
                # there's a bit of a mix of names because `Convolutional` takes
                # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument
                kwargs['conv_step'] = filter_step

            if (pool_size[0] == 0 and pool_size[1] == 0):
                layer_conv = ConvolutionalActivation(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                name="layer_%d" % index,
                                                **kwargs)
            else:
                if pool_step is None:
                    pass
                else:
                    kwargs['pooling_step'] = tuple(pool_step)

                layer_conv = ConvolutionalLayer(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                pooling_size=pool_size,
                                                name="layer_%d" % index,
                                                **kwargs)

            conv_layers.append(layer_conv)

        convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels,
                                    image_size=image_size,
                                    weights_init=Uniform(width=0.1),
                                    biases_init=Constant(0.0),
                                    name="conv_section")
        convnet.push_allocation_config()
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))
        output_conv = convnet.apply(output_conv)
        


    output_conv = Flattener().apply(output_conv)

    # FULLY CONNECTED
    output_mlp = output_conv
    full_layers = []
    assert len(L_dim_full_layers) == len(L_activation_full)
    assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers)
    assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers)

    # reguarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we throw away the first value of L_exo_dropout_full_layers
    L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:]
    pre_dim = output_dim
    print "When constructing the model, the output_dim of the conv section is %d." % output_dim
    if len(L_dim_full_layers):
        for (dim, activation_str,
            dropout, index) in zip(L_dim_full_layers,
                                  L_activation_full,
                                  L_exo_dropout_full_layers,
                                  range(len(L_dim_conv_layers),
                                        len(L_dim_conv_layers)+ 
                                        len(L_dim_full_layers))
                                   ):
                                          
                # TO DO : leaky relu
                if activation_str.lower() == 'rectifier':
                    activation = Rectifier().apply
                elif activation_str.lower() == 'tanh':
                    activation = Tanh().apply
                elif activation_str.lower() in ['sigmoid', 'logistic']:
                    activation = Logistic().apply
                elif activation_str.lower() in ['id', 'identity']:
                    activation = Identity().apply
                else:
                    raise Exception("unknown activation function : %s", activation_str)

                assert 0.0 <= dropout and dropout < 1.0
                dim = dim - int(dim*dropout)
                print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim)

                layer_full = MLP(activations=[activation], dims=[pre_dim, dim],
                                 weights_init=Uniform(width=0.1),
                                 biases_init=Constant(0.0),
                                name="layer_%d" % index)
                layer_full.initialize()
                full_layers.append(layer_full)
                pre_dim = dim

        for layer in full_layers:
            output_mlp = layer.apply(output_mlp)

        output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1])

    # COST FUNCTION
    output_layer = Linear(output_dim, prediction,
                          weights_init=Uniform(width=0.1),
                          biases_init=Constant(0.0),
                          name="layer_"+str(len(L_dim_conv_layers)+ 
                                            len(L_dim_full_layers))
                          )
    output_layer.initialize()
    full_layers.append(output_layer)
    y_pred = output_layer.apply(output_mlp)
    y_hat = Softmax().apply(y_pred)
    # SOFTMAX and log likelihood
    y_pred = Softmax().apply(y_pred)
    # be careful. one version expects the output of a softmax; the other expects just the
    # output of the network
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred)
    #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred)
    cost.name = "cost"

    # Misclassification
    error_rate_brick = MisclassificationRate()
    error_rate = error_rate_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"

    # put names

    D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers)
    # test computation graph
    

    cg = ComputationGraph(cost)

    # DROPOUT
    L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers

    cg_dropout = cg
    inputs = VariableFilter(roles=[INPUT])(cg.variables)

    for (index, drop_rate) in enumerate(L_endo_dropout):
        for input_ in inputs:
            m = re.match(r"layer_(\d+)_apply.*", input_.name)
            if m and index == int(m.group(1)):
                if drop_rate < 0.0001:
                    print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name
                    break
                else:
                    cg_dropout = apply_dropout(cg, [input_], drop_rate)
                    print "Applied dropout %f on %s." % (drop_rate, input_.name)
                    break


    cg = cg_dropout

    return (cg, error_rate, cost, D_params, D_kind)
def test_communication(path_vae_mnist,
                       path_maxout_mnist):
                       
    # load models
    vae_mnist = load(path_vae_mnist)
    # get params : to be remove from the computation graph

    # write an object maxout
    classifier = Maxout()
    # get params : to be removed from the computation graph

    # vae whose prior is a zero mean unit variance normal distribution
    activation = Rectifier()
    full_weights_init = Orthogonal()
    weights_init = full_weights_init

    # SVHN en niveau de gris
    layers = [32*32, 200, 200, 200, 50]
    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_SVHN_encode", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Rectifier()],
              decoder_layers,
              name="MLP_SVHN_decode", biases_init=Constant(0.), weights_init=weights_init)

    
    vae_svhn = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae_svhn.initialize()

    # do the connection
    
    x = T.tensor4('x') # SVHN samples preprocessed with local contrast normalization
    x_ = (T.sum(x, axis=1)).flatten(ndim=2)
    y = T.imatrix('y')
    batch_size = 512

    svhn_z, _ = vae_svhn.sampler.sample(vae_svhn.encoder_mlp.apply(x_))
    mnist_decode = vae_mnist.decoder_mlp.apply(svhn_z)
    # reshape
    shape = mnist_decode.shape
    mnist_decode = mnist_decode.reshape((shape[0], 1, 28, 28))
    prediction = classifier.apply(mnist_decode)
    y_hat = Softmax().apply(prediction)

    x_recons, kl_terms = vae_svhn.reconstruct(x_)
    recons_term = BinaryCrossEntropy().apply(x_, T.clip(x_recons, 1e-4, 1 - 1e-4))
    recons_term.name = "recons_term"

    cost_A = recons_term + kl_terms.mean()
    cost_A.name = "cost_A"

    cost_B = Softmax().categorical_cross_entropy(y.flatten(), prediction)
    cost_B.name = 'cost_B'

    cost = cost_B
    cost.name = "cost"
    cg = ComputationGraph(cost) # probably discard some of the parameters
    parameters = cg.parameters
    params = []
    for t in parameters:
        if not re.match(".*mnist", t.name):
            params.append(t)

    """
    f = theano.function([x], cost_A)
    value_x = np.random.ranf((1, 3, 32, 32)).astype("float32")
    print f(value_x)
    
    return
    """
    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"
    
    # training here
    step_rule = RMSProp(0.001,0.99)

    dataset_hdf5_file="/Tmp/ducoffem/SVHN/"
    train_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='train')
    test_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='valid')
    
    data_stream = DataStream.default_stream(
        train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))
        
    data_stream_test = DataStream.default_stream(
        test_set, iteration_scheme=SequentialScheme(2000, batch_size))


    algorithm = GradientDescent(cost=cost, params=params,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_batches=10)
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="valid", every_n_batches=10)

    # drawing_samples = ImagesSamplesSave("../data_svhn", vae, (3, 32, 32), every_n_epochs=1)
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_batches=10000),
                    Printing(every_n_batches=10)
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()
Beispiel #12
0
class LookUpTrain(Initializable, Feedforward):
    @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden'])
    def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs):
        self.dwin = dwin
        self.n_mot = n_mot
        self.vect_size = vect_size
        if isinstance(n_hidden, int):
            self.n_hidden = [n_hidden]
        else:
            self.n_hidden = n_hidden
        self.n_out = n_out
        self.window = Window(self.dwin,
                             self.n_mot,
                             self.vect_size,
                             self.n_hidden,
                             self.n_out,
                             weights_init=IsotropicGaussian(0.001))
        super(LookUpTrain, self).__init__(**kwargs)
        self.softmax = Softmax()
        self.error = MisclassificationRate()
        self.children = [self.window, self.softmax, self.error]

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        return self.window.apply(input_)

    @application(inputs=['x', 'y'], outputs=['output'])
    def cost(self, x, y):
        return self.softmax.categorical_cross_entropy(y, self.apply(x))

    @application(inputs=['x', 'y'], outputs=['output'])
    def errors(self, x, y):
        return self.error.apply(y, self.apply(x))

    @application(inputs=['x'], outputs=['output'])
    def predict(self, x):
        return T.argmax(self.apply(x), axis=1)

    @application(inputs=['x'], outputs=['output'])
    def predict_confidency(self, x):
        return T.max(self.apply(x), axis=1)

    def update_lookup_weights(self):
        self.window.update_lookup_weights()

    @application(inputs=['input_', 'input_corrupt'], outputs=['output'])
    def score(self, input_, input_corrupt):
        # modify the input_ with an incorrect central word ?
        return (1 - -self.apply(input_)).norm(2) + (
            self.apply(input_corrupt)).norm(2)
        #return T.maximum(0,1 - self.apply(input_)+self.apply(input_corrupt) )[0]
        return T.maximum(0, 1 - self.apply(input_))[0] + 0.1 * T.maximum(
            0, 1 + self.apply(input_corrupt))[0] + 0.1 * T.maximum(
                0, 1 - self.apply(input_) + self.apply(input_corrupt))[0]
        # change that !!!!

    def _initialize(self):
        self.window.initialize()

    @application(inputs=['input_'], outputs=['output'])
    def embedding(self, input_):
        return self.window.embedding(input_)

    def _allocate(self):
        self.window.allocate()

    def load(self, repo, filename):
        params = getParams(self, T.itensor3())
        with closing(open(os.path.join(repo, filename), 'rb')) as f:
            params_value = pickle.load(f)
        for p, p_value in zip(params, params_value):
            p.set_value(p_value.get_value())

    def get_Params(self):
        return self.window.get_Params()

    def save(self, repo, filename):
        params = getParams(self, T.itensor3())
        index = 0
        while os.path.isfile(os.path.join(repo, filename + "_" + str(index))):
            index += 1
        filename = filename + "_" + str(index)
        with closing(open(os.path.join(repo, filename), 'wb')) as f:
            pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)