def test_misclassification_rate(): y = tensor.vector(dtype="int32") yhat = tensor.matrix(theano.config.floatX) top1_brick = MisclassificationRate() top2_brick = MisclassificationRate(top_k=2) top3_brick = MisclassificationRate(top_k=3) f = theano.function([y, yhat], [top1_brick.apply(y, yhat), top2_brick.apply(y, yhat), top3_brick.apply(y, yhat)]) y_ = numpy.array([2, 1, 0, 1, 2], dtype="int32") yhat_ = numpy.array([[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]], dtype="float32") top1_error = 0.6 top2_error = 0.4 top3_error = 0.2 assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
def apply(self, input_lb, input_un, target): batch_size = input_lb.shape[0] get_labeled = lambda x: x[:batch_size] if x is not None else x input = T.concatenate([input_lb, input_un], axis=0) self.layer_dims = {0: self.input_dim} self.lr = self.shared(self.default_lr, "learning_rate", role=None) top = len(self.layers) - 1 clean = self.encoder(input, noise_std=[0]) corr = self.encoder(input, noise_std=self.noise_std) ests, costs = self.decoder(clean, corr, batch_size) # Costs y = target.flatten() costs.class_clean = CategoricalCrossEntropy().apply(y, get_labeled(clean.h[top])) costs.class_clean.name = "CE_clean" costs.class_corr = CategoricalCrossEntropy().apply(y, get_labeled(corr.h[top])) costs.class_corr.name = "CE_corr" costs.total = costs.class_corr * 1.0 for i in range(len(self.layers)): costs.total += costs.denois[i] * self.denoising_cost_x[i] costs.total.name = "Total_cost" self.costs = costs # Classification error mr = MisclassificationRate() self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.0) self.error.name = "Error_rate"
def apply(self, input_lb, input_un, target): batch_size = input_lb.shape[0] get_labeled = lambda x: x[:batch_size] if x is not None else x input = T.concatenate([input_lb, input_un], axis=0) self.layer_dims = {0: self.input_dim} self.lr = self.shared(self.default_lr, 'learning_rate', role=None) top = len(self.layers) - 1 clean = self.encoder(input, noise_std=[0]) corr = self.encoder(input, noise_std=self.noise_std) ests, costs = self.decoder(clean, corr, batch_size) # Costs y = target.flatten() costs.class_clean = CategoricalCrossEntropy().apply( y, get_labeled(clean.h[top])) costs.class_clean.name = 'CE_clean' costs.class_corr = CategoricalCrossEntropy().apply( y, get_labeled(corr.h[top])) costs.class_corr.name = 'CE_corr' costs.total = costs.class_corr * 1.0 for i in range(len(self.layers)): costs.total += costs.denois[i] * self.denoising_cost_x[i] costs.total.name = 'Total_cost' self.costs = costs # Classification error mr = MisclassificationRate() self.error = mr.apply(y, get_labeled(clean.h[top])) * np.float32(100.) self.error.name = 'Error_rate'
def maxout_vae_mnist_test(path_vae_mnist): # load vae model on mnist vae_mnist = load(path_vae_mnist) maxout = Maxout() x = T.matrix('features') y = T.imatrix('targets') batch_size = 128 z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x)) predict = maxout.apply(z) cost = Softmax().categorical_cross_entropy(y.flatten(), predict) y_hat = Softmax().apply(predict) cost.name = 'cost' cg = ComputationGraph(cost) temp = cg.parameters for t, i in zip(temp, range(len(temp))): t.name = t.name+str(i)+"maxout" error_brick = MisclassificationRate() error_rate = error_brick.apply(y, y_hat) # training step_rule = RMSProp(0.01, 0.9) #step_rule = Momentum(0.2, 0.9) train_set = MNIST('train') test_set = MNIST("test") data_stream_train = Flatten(DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))) data_stream_test =Flatten(DataStream.default_stream( test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size))) algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=step_rule) monitor_train = TrainingDataMonitoring( variables=[cost], data_stream=data_stream_train, prefix="train") monitor_valid = DataStreamMonitoring( variables=[cost, error_rate], data_stream=data_stream_test, prefix="test") extensions = [ monitor_train, monitor_valid, FinishAfter(after_n_epochs=50), Printing(every_n_epochs=1) ] main_loop = MainLoop(data_stream=data_stream_train, algorithm=algorithm, model = Model(cost), extensions=extensions) main_loop.run() # save here from blocks.serialization import dump with closing(open('../data_mnist/maxout', 'w')) as f: dump(maxout, f)
def test_misclassification_rate(): y = tensor.vector(dtype='int32') yhat = tensor.matrix(theano.config.floatX) top1_brick = MisclassificationRate() top2_brick = MisclassificationRate(top_k=2) top3_brick = MisclassificationRate(top_k=3) f = theano.function([y, yhat], [ top1_brick.apply(y, yhat), top2_brick.apply(y, yhat), top3_brick.apply(y, yhat) ]) y_ = numpy.array([2, 1, 0, 1, 2], dtype='int32') yhat_ = numpy.array( [[3, 2, 1, 0], [1, 8, 2, 1], [3, 8, 1, 2], [1, 6, 4, 2], [9, 7, 5, 5]], dtype='float32') top1_error = 0.6 top2_error = 0.4 top3_error = 0.2 assert_allclose([top1_error, top2_error, top3_error], f(y_, yhat_))
def apply(self, input_labeled, target_labeled, input_unlabeled): self.layer_counter = 0 self.layer_dims = {0: self.input_dim} self.lr = self.shared(self.default_lr, 'learning_rate', role=None) top = len(self.layers) - 1 num_labeled = input_labeled.shape[0] self.join = lambda l, u: T.concatenate([l, u], axis=0) self.labeled = lambda x: x[:num_labeled] if x is not None else x self.unlabeled = lambda x: x[num_labeled:] if x is not None else x self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x)) input_concat = self.join(input_labeled, input_unlabeled) clean = self.encoder(input_concat, 'clean', input_noise_std=0.0, noise_std=[]) corr = self.encoder(input_concat, 'corr', input_noise_std=self.super_noise_std, noise_std=self.f_local_noise_std) est, costs = self.decoder(clean, corr) # Costs y = target_labeled.flatten() costs.class_clean = CategoricalCrossEntropy().apply( y, clean.labeled.h[top]) costs.class_clean.name = 'CE_clean' costs.class_corr = CategoricalCrossEntropy().apply( y, corr.labeled.h[top]) costs.class_corr.name = 'CE_corr' costs.total = costs.class_corr * 1.0 for i in range(len(self.layers)): costs.total += costs.denois[i] * self.denoising_cost_x[i] costs.total.name = 'Total_cost' self.costs = costs # Classification error mr = MisclassificationRate() self.error = mr.apply(y, clean.labeled.h[top]) * np.float32(100.) self.error.name = 'Error_rate'
def training_model_mnist(learning_rate, momentum, iteration, batch_size, epoch_end, iter_batch): x = T.tensor4('features') y = T.imatrix('targets') classifier = build_model_mnist() predict = classifier.apply(x) y_hat = Softmax().apply(predict) cost = Softmax().categorical_cross_entropy(y.flatten(), predict) cost.name = "cost" cg = ComputationGraph(cost) error_brick = MisclassificationRate() error_rate = error_brick.apply(y.flatten(), y_hat) error_rate.name = "error" train_set = MNIST(('train', )) test_set = MNIST(("test",)) if iteration =="slice": data_stream = DataStream.default_stream( train_set, iteration_scheme=SequentialScheme_slice(train_set.num_examples, batch_size)) data_stream_test = DataStream.default_stream( test_set, iteration_scheme=SequentialScheme_slice(test_set.num_examples, batch_size)) else: data_stream = DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)) data_stream_test = DataStream.default_stream( test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)) step_rule = Momentum(learning_rate=learning_rate, momentum=momentum) start = time.clock() time_spent = shared_floatx(np.float32(0.), name="time_spent") time_extension = Time_reference(start, time_spent, every_n_batches=1) algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=step_rule) monitor_train = TrainingDataMonitoring( variables=[cost], prefix="train", every_n_epochs=iter_batch) monitor_valid = DataStreamMonitoring( variables=[cost, error_rate, time_spent], data_stream=data_stream_test, prefix="valid", every_n_epochs=iter_batch) # add a monitor variable about the time extensions = [ monitor_train, monitor_valid, FinishAfter(after_n_epochs=epoch_end), Printing(every_n_epochs=iter_batch), time_extension ] main_loop = MainLoop(data_stream=data_stream, algorithm=algorithm, model = Model(cost), extensions=extensions) main_loop.run()
def apply(self, input_labeled, target_labeled, input_unlabeled): self.target_labeled = target_labeled self.layer_counter = 0 input_dim = self.p.encoder_layers[0] # Store the dimension tuples in the same order as layers. layers = self.layers self.layer_dims = {0: input_dim} self.lr = self.default_lr self.costs = costs = AttributeDict() self.costs.denois = AttributeDict() self.act = AttributeDict() self.error = AttributeDict() top = len(layers) - 1 N = input_labeled.shape[0] self.join = lambda l, u: T.concatenate([l, u], axis=0) self.labeled = lambda x: x[:N] if x is not None else x self.unlabeled = lambda x: x[N:] if x is not None else x self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x)) input_concat = self.join(input_labeled, input_unlabeled) def encoder(input_, path_name, input_noise_std=0, noise_std=[]): h = input_ logger.info(' 0: noise %g' % input_noise_std) if input_noise_std > 0.: h = h + self.noise_like(h) * input_noise_std d = AttributeDict() d.unlabeled = self.new_activation_dict() d.labeled = self.new_activation_dict() d.labeled.z[0] = self.labeled(h) d.unlabeled.z[0] = self.unlabeled(h) prev_dim = input_dim for i, (spec, _, act_f) in layers[1:]: d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h) noise = noise_std[i] if i < len(noise_std) else 0. curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f, path_name=path_name, noise_std=noise) assert self.layer_dims.get(i) in (None, curr_dim) self.layer_dims[i] = curr_dim d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z) d.unlabeled.s[i] = s d.unlabeled.m[i] = m prev_dim = curr_dim d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h) return d # Clean, supervised logger.info('Encoder: clean, labeled') clean = self.act.clean = encoder(input_concat, 'clean') # Corrupted, supervised logger.info('Encoder: corr, labeled') corr = self.act.corr = encoder(input_concat, 'corr', input_noise_std=self.p.super_noise_std, noise_std=self.p.f_local_noise_std) est = self.act.est = self.new_activation_dict() # Decoder path in opposite order logger.info('Decoder: z_corr -> z_est') for i, ((_, spec), l_type, act_f) in layers[::-1]: z_corr = corr.unlabeled.z[i] z_clean = clean.unlabeled.z[i] z_clean_s = clean.unlabeled.s.get(i) z_clean_m = clean.unlabeled.m.get(i) fspec = layers[i+1][1][0] if len(layers) > i+1 else (None, None) if i == top: ver = corr.unlabeled.h[i] ver_dim = self.layer_dims[i] top_g = True else: ver = est.z.get(i + 1) ver_dim = self.layer_dims.get(i + 1) top_g = False z_est = self.g(z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], l_type=l_type, num=i, fspec=fspec, top_g=top_g) if z_est is not None: # Denoising cost if z_clean_s and self.p.zestbn == 'bugfix': z_est_norm = (z_est - z_clean_m) / T.sqrt(z_clean_s + np.float32(1e-10)) elif z_clean_s is None or self.p.zestbn == 'no': z_est_norm = z_est else: assert False, 'Not supported path' se = SquaredError('denois' + str(i)) costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) \ / np.prod(self.layer_dims[i], dtype=floatX) costs.denois[i].name = 'denois' + str(i) denois_print = 'denois %.2f' % self.p.denoising_cost_x[i] else: denois_print = '' # Store references for later use est.h[i] = self.apply_act(z_est, act_f) est.z[i] = z_est est.s[i] = None est.m[i] = None logger.info(' g%d: %10s, %s, dim %s -> %s' % ( i, l_type, denois_print, self.layer_dims.get(i+1), self.layer_dims.get(i) )) # Costs y = target_labeled.flatten() costs.class_clean = CategoricalCrossEntropy().apply(y, clean.labeled.h[top]) costs.class_clean.name = 'cost_class_clean' costs.class_corr = CategoricalCrossEntropy().apply(y, corr.labeled.h[top]) costs.class_corr.name = 'cost_class_corr' # This will be used for training costs.total = costs.class_corr * 1.0 for i in range(top + 1): if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0: costs.total += costs.denois[i] * self.p.denoising_cost_x[i] costs.total.name = 'cost_total' # Classification error mr = MisclassificationRate() self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.) self.error.clean.name = 'error_rate_clean'
def apply(self, input_labeled, target_labeled, input_unlabeled): self.layer_counter = 0 input_dim = self.p.encoder_layers[0] # Store the dimension tuples in the same order as layers. layers = self.layers self.layer_dims = {0: input_dim} self.lr = self.shared(self.default_lr, 'learning_rate', role=None) self.costs = costs = AttributeDict() self.costs.denois = AttributeDict() self.act = AttributeDict() self.error = AttributeDict() top = len(layers) - 1 N = input_labeled.shape[0] self.join = lambda l, u: T.concatenate([l, u], axis=0) self.labeled = lambda x: x[:N] if x is not None else x self.unlabeled = lambda x: x[N:] if x is not None else x self.split_lu = lambda x: (self.labeled(x), self.unlabeled(x)) input_concat = self.join(input_labeled, input_unlabeled) def encoder(input_, path_name, input_noise_std=0, noise_std=[]): h = input_ logger.info(' 0: noise %g' % input_noise_std) if input_noise_std > 0.: h = h + self.noise_like(h) * input_noise_std d = AttributeDict() d.unlabeled = self.new_activation_dict() d.labeled = self.new_activation_dict() d.labeled.z[0] = self.labeled(h) d.unlabeled.z[0] = self.unlabeled(h) prev_dim = input_dim for i, (spec, _, act_f) in layers[1:]: d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h) noise = noise_std[i] if i < len(noise_std) else 0. curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f, path_name=path_name, noise_std=noise) assert self.layer_dims.get(i) in (None, curr_dim) self.layer_dims[i] = curr_dim d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z) d.unlabeled.s[i] = s d.unlabeled.m[i] = m prev_dim = curr_dim d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h) return d # Clean, supervised logger.info('Encoder: clean, labeled') clean = self.act.clean = encoder(input_concat, 'clean') # Corrupted, supervised logger.info('Encoder: corr, labeled') corr = self.act.corr = encoder(input_concat, 'corr', input_noise_std=self.p.super_noise_std, noise_std=self.p.f_local_noise_std) est = self.act.est = self.new_activation_dict() # Decoder path in opposite order logger.info('Decoder: z_corr -> z_est') for i, ((_, spec), l_type, act_f) in layers[::-1]: z_corr = corr.unlabeled.z[i] z_clean = clean.unlabeled.z[i] z_clean_s = clean.unlabeled.s.get(i) z_clean_m = clean.unlabeled.m.get(i) fspec = layers[i + 1][1][0] if len(layers) > i + 1 else (None, None) if i == top: ver = corr.unlabeled.h[i] ver_dim = self.layer_dims[i] top_g = True else: ver = est.z.get(i + 1) ver_dim = self.layer_dims.get(i + 1) top_g = False z_est = self.g(z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], l_type=l_type, num=i, fspec=fspec, top_g=top_g) if z_est is not None: # Denoising cost if z_clean_s: z_est_norm = (z_est - z_clean_m) / z_clean_s else: z_est_norm = z_est se = SquaredError('denois' + str(i)) costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) \ / np.prod(self.layer_dims[i], dtype=floatX) costs.denois[i].name = 'denois' + str(i) denois_print = 'denois %.2f' % self.p.denoising_cost_x[i] else: denois_print = '' # Store references for later use est.h[i] = self.apply_act(z_est, act_f) est.z[i] = z_est est.s[i] = None est.m[i] = None logger.info(' g%d: %10s, %s, dim %s -> %s' % (i, l_type, denois_print, self.layer_dims.get(i + 1), self.layer_dims.get(i))) # Costs y = target_labeled.flatten() costs.class_clean = CategoricalCrossEntropy().apply( y, clean.labeled.h[top]) costs.class_clean.name = 'cost_class_clean' costs.class_corr = CategoricalCrossEntropy().apply( y, corr.labeled.h[top]) costs.class_corr.name = 'cost_class_corr' # This will be used for training costs.total = costs.class_corr * 1.0 for i in range(top + 1): if costs.denois.get(i) and self.p.denoising_cost_x[i] > 0: costs.total += costs.denois[i] * self.p.denoising_cost_x[i] costs.total.name = 'cost_total' # Classification error mr = MisclassificationRate() self.error.clean = mr.apply(y, clean.labeled.h[top]) * np.float32(100.) self.error.clean.name = 'error_rate_clean'
def build_submodel(input_shape, output_dim, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, L_exo_dropout_conv_layers, L_exo_dropout_full_layers, L_endo_dropout_conv_layers, L_endo_dropout_full_layers, L_border_mode=None, L_filter_step=None, L_pool_step=None): # TO DO : target size and name of the features x = T.tensor4('features') y = T.imatrix('targets') assert len(input_shape) == 3, "input_shape must be a 3d tensor" num_channels = input_shape[0] image_size = tuple(input_shape[1:]) print image_size print num_channels prediction = output_dim # CONVOLUTION output_conv = x output_dim = num_channels*np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) if L_filter_step is None: L_filter_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_size) if L_pool_step is None: L_pool_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_activation_conv) if L_border_mode is None: L_border_mode = ["valid"] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_border_mode) assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers) assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers) # regarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we need to have the first dropout value of L_exo_dropout_full_layers # the first value has to be 0.0 in this context, and we'll # assume that it is, but let's have an assert assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts." # here modifitication of L_exo_dropout_conv_layers L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]] if len(L_dim_conv_layers): for (num_filters, filter_size, filter_step, pool_size, pool_step, activation_str, border_mode, dropout, index) in zip(L_dim_conv_layers, L_filter_size, L_filter_step, L_pool_size, L_pool_step, L_activation_conv, L_border_mode, L_exo_dropout_conv_layers, xrange(len(L_dim_conv_layers)) ): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if filter_step is None: filter_step = (1, 1) else: filter_step = tuple(filter_step) if pool_size is None: pool_size = (0,0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters*dropout) print "border_mode : %s" % border_mode # filter_step # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv kwargs = {} if filter_step is None or filter_step == (1,1): pass else: # there's a bit of a mix of names because `Convolutional` takes # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument kwargs['conv_step'] = filter_step if (pool_size[0] == 0 and pool_size[1] == 0): layer_conv = ConvolutionalActivation(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, name="layer_%d" % index, **kwargs) else: if pool_step is None: pass else: kwargs['pooling_step'] = tuple(pool_step) layer_conv = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, pooling_size=pool_size, name="layer_%d" % index, **kwargs) conv_layers.append(layer_conv) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) output_conv = convnet.apply(output_conv) output_conv = Flattener().apply(output_conv) # FULLY CONNECTED output_mlp = output_conv full_layers = [] assert len(L_dim_full_layers) == len(L_activation_full) assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers) assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers) # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:] pre_dim = output_dim print "When constructing the model, the output_dim of the conv section is %d." % output_dim if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip(L_dim_full_layers, L_activation_full, L_exo_dropout_full_layers, range(len(L_dim_conv_layers), len(L_dim_conv_layers)+ len(L_dim_full_layers)) ): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim*dropout) print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim) layer_full = MLP(activations=[activation], dims=[pre_dim, dim], weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_%d" % index) layer_full.initialize() full_layers.append(layer_full) pre_dim = dim for layer in full_layers: output_mlp = layer.apply(output_mlp) output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1]) # COST FUNCTION output_layer = Linear(output_dim, prediction, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_"+str(len(L_dim_conv_layers)+ len(L_dim_full_layers)) ) output_layer.initialize() full_layers.append(output_layer) y_pred = output_layer.apply(output_mlp) y_hat = Softmax().apply(y_pred) # SOFTMAX and log likelihood y_pred = Softmax().apply(y_pred) # be careful. one version expects the output of a softmax; the other expects just the # output of the network cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred) #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred) cost.name = "cost" # Misclassification error_rate_brick = MisclassificationRate() error_rate = error_rate_brick.apply(y.flatten(), y_hat) error_rate.name = "error_rate" # put names D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers) # test computation graph cg = ComputationGraph(cost) # DROPOUT L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers cg_dropout = cg inputs = VariableFilter(roles=[INPUT])(cg.variables) for (index, drop_rate) in enumerate(L_endo_dropout): for input_ in inputs: m = re.match(r"layer_(\d+)_apply.*", input_.name) if m and index == int(m.group(1)): if drop_rate < 0.0001: print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name break else: cg_dropout = apply_dropout(cg, [input_], drop_rate) print "Applied dropout %f on %s." % (drop_rate, input_.name) break cg = cg_dropout return (cg, error_rate, cost, D_params, D_kind)
def test_communication(path_vae_mnist, path_maxout_mnist): # load models vae_mnist = load(path_vae_mnist) # get params : to be remove from the computation graph # write an object maxout classifier = Maxout() # get params : to be removed from the computation graph # vae whose prior is a zero mean unit variance normal distribution activation = Rectifier() full_weights_init = Orthogonal() weights_init = full_weights_init # SVHN en niveau de gris layers = [32*32, 200, 200, 200, 50] encoder_layers = layers[:-1] encoder_mlp = MLP([activation] * (len(encoder_layers)-1), encoder_layers, name="MLP_SVHN_encode", biases_init=Constant(0.), weights_init=weights_init) enc_dim = encoder_layers[-1] z_dim = layers[-1] sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init) decoder_layers = layers[:] ## includes z_dim as first layer decoder_layers.reverse() decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Rectifier()], decoder_layers, name="MLP_SVHN_decode", biases_init=Constant(0.), weights_init=weights_init) vae_svhn = VAEModel(encoder_mlp, sampler, decoder_mlp) vae_svhn.initialize() # do the connection x = T.tensor4('x') # SVHN samples preprocessed with local contrast normalization x_ = (T.sum(x, axis=1)).flatten(ndim=2) y = T.imatrix('y') batch_size = 512 svhn_z, _ = vae_svhn.sampler.sample(vae_svhn.encoder_mlp.apply(x_)) mnist_decode = vae_mnist.decoder_mlp.apply(svhn_z) # reshape shape = mnist_decode.shape mnist_decode = mnist_decode.reshape((shape[0], 1, 28, 28)) prediction = classifier.apply(mnist_decode) y_hat = Softmax().apply(prediction) x_recons, kl_terms = vae_svhn.reconstruct(x_) recons_term = BinaryCrossEntropy().apply(x_, T.clip(x_recons, 1e-4, 1 - 1e-4)) recons_term.name = "recons_term" cost_A = recons_term + kl_terms.mean() cost_A.name = "cost_A" cost_B = Softmax().categorical_cross_entropy(y.flatten(), prediction) cost_B.name = 'cost_B' cost = cost_B cost.name = "cost" cg = ComputationGraph(cost) # probably discard some of the parameters parameters = cg.parameters params = [] for t in parameters: if not re.match(".*mnist", t.name): params.append(t) """ f = theano.function([x], cost_A) value_x = np.random.ranf((1, 3, 32, 32)).astype("float32") print f(value_x) return """ error_brick = MisclassificationRate() error_rate = error_brick.apply(y.flatten(), y_hat) error_rate.name = "error_rate" # training here step_rule = RMSProp(0.001,0.99) dataset_hdf5_file="/Tmp/ducoffem/SVHN/" train_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='train') test_set = H5PYDataset(os.path.join(dataset_hdf5_file, "all.h5"), which_set='valid') data_stream = DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)) data_stream_test = DataStream.default_stream( test_set, iteration_scheme=SequentialScheme(2000, batch_size)) algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule) monitor_train = TrainingDataMonitoring( variables=[cost], prefix="train", every_n_batches=10) monitor_valid = DataStreamMonitoring( variables=[cost, error_rate], data_stream=data_stream_test, prefix="valid", every_n_batches=10) # drawing_samples = ImagesSamplesSave("../data_svhn", vae, (3, 32, 32), every_n_epochs=1) extensions = [ monitor_train, monitor_valid, FinishAfter(after_n_batches=10000), Printing(every_n_batches=10) ] main_loop = MainLoop(data_stream=data_stream, algorithm=algorithm, model = Model(cost), extensions=extensions) main_loop.run()
class LookUpTrain(Initializable, Feedforward): @lazy(allocation=['dwin', 'n_mot', 'vect_size', 'n_hidden']) def __init__(self, dwin, n_mot, vect_size, n_hidden, n_out=2, **kwargs): self.dwin = dwin self.n_mot = n_mot self.vect_size = vect_size if isinstance(n_hidden, int): self.n_hidden = [n_hidden] else: self.n_hidden = n_hidden self.n_out = n_out self.window = Window(self.dwin, self.n_mot, self.vect_size, self.n_hidden, self.n_out, weights_init=IsotropicGaussian(0.001)) super(LookUpTrain, self).__init__(**kwargs) self.softmax = Softmax() self.error = MisclassificationRate() self.children = [self.window, self.softmax, self.error] @application(inputs=['input_'], outputs=['output']) def apply(self, input_): return self.window.apply(input_) @application(inputs=['x', 'y'], outputs=['output']) def cost(self, x, y): return self.softmax.categorical_cross_entropy(y, self.apply(x)) @application(inputs=['x', 'y'], outputs=['output']) def errors(self, x, y): return self.error.apply(y, self.apply(x)) @application(inputs=['x'], outputs=['output']) def predict(self, x): return T.argmax(self.apply(x), axis=1) @application(inputs=['x'], outputs=['output']) def predict_confidency(self, x): return T.max(self.apply(x), axis=1) def update_lookup_weights(self): self.window.update_lookup_weights() @application(inputs=['input_', 'input_corrupt'], outputs=['output']) def score(self, input_, input_corrupt): # modify the input_ with an incorrect central word ? return (1 - -self.apply(input_)).norm(2) + ( self.apply(input_corrupt)).norm(2) #return T.maximum(0,1 - self.apply(input_)+self.apply(input_corrupt) )[0] return T.maximum(0, 1 - self.apply(input_))[0] + 0.1 * T.maximum( 0, 1 + self.apply(input_corrupt))[0] + 0.1 * T.maximum( 0, 1 - self.apply(input_) + self.apply(input_corrupt))[0] # change that !!!! def _initialize(self): self.window.initialize() @application(inputs=['input_'], outputs=['output']) def embedding(self, input_): return self.window.embedding(input_) def _allocate(self): self.window.allocate() def load(self, repo, filename): params = getParams(self, T.itensor3()) with closing(open(os.path.join(repo, filename), 'rb')) as f: params_value = pickle.load(f) for p, p_value in zip(params, params_value): p.set_value(p_value.get_value()) def get_Params(self): return self.window.get_Params() def save(self, repo, filename): params = getParams(self, T.itensor3()) index = 0 while os.path.isfile(os.path.join(repo, filename + "_" + str(index))): index += 1 filename = filename + "_" + str(index) with closing(open(os.path.join(repo, filename), 'wb')) as f: pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)