def generate_theano_func(args, network, penalty, input_dict, target_var): prediction = get_output(network, input_dict) # loss = T.mean( target_var * ( T.log(target_var) - prediction )) loss = T.mean(categorical_crossentropy(prediction, target_var)) # loss += 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(network) ) # penalty = sum ( T.sum(lstm_param**2) for lstm_param in lstm_params ) # penalty = regularize_layer_params(l_forward_1_lstm, l2) # penalty = T.sum(lstm_param**2 for lstm_param in lstm_params) # penalty = 0.0001 * sum (T.sum(layer_params ** 2) for layer_params in get_all_params(l_forward_1) ) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, input_dict, deterministic=True) # test_prediction = get_output(network, deterministic=True) # test_loss = T.mean( target_var * ( T.log(target_var) - test_prediction)) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) train_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], loss, updates=updates, allow_input_downcast=True, ) if args.task == "sts": val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_prediction], allow_input_downcast=True, ) elif args.task == "ent": # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) val_fn = theano.function( [input1_var, input1_mask_var, input2_var, input2_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True, ) return train_fn, val_fn
def build_model(self): import theano.tensor as T self.x = T.ftensor4('x') self.y = T.lvector('y') self.lr = T.scalar('lr') net = build_model_vgg16(input_shape=(None, 3, 224, 224), verbose=self.verbose) self.output_layer = net['prob'] from lasagne.layers import get_output self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False) self.cost = lasagne.objectives.categorical_crossentropy(self.output, self.y).mean() from lasagne.objectives import categorical_accuracy self.error = 1-categorical_accuracy(self.output, self.y, top_k=1).mean() self.error_top_5 = 1-categorical_accuracy(self.output, self.y, top_k=5).mean()
def test_categorical_accuracy(): from lasagne.objectives import categorical_accuracy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') c = categorical_accuracy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(100, 5).astype(floatX) cls_predictions = np.argmax(predictions, axis=1) targets = np.random.random_integers(0, 4, (100,)).astype("int8") accuracy = cls_predictions == targets # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) one_hot = np.zeros((100, 5)).astype("int8") one_hot[np.arange(100), targets] = 1 t = theano.tensor.imatrix('t') c = categorical_accuracy(p, t) assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
def test_categorical_accuracy(): from lasagne.objectives import categorical_accuracy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') c = categorical_accuracy(p, t) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) cls_predictions = np.argmax(predictions, axis=1) targets = np.random.random_integers(0, 19, (10,)).astype("int8") accuracy = cls_predictions == targets # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) one_hot = np.zeros((10, 20)).astype("int8") one_hot[np.arange(10), targets] = 1 t = theano.tensor.imatrix('t') c = categorical_accuracy(p, t) assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
def compute_cost(self, deterministic=False): output = get_output(self.net, deterministic=deterministic) cost = categorical_crossentropy(output, self.tg).mean() cost.name = 'negll' accuracy = categorical_accuracy(output, self.tg).mean() accuracy.name = 'accuracy' return cost, accuracy
def compile_val(self): if self.verbose: print('compiling validation function...') import theano from lasagne.layers import get_output output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True) from lasagne.objectives import categorical_accuracy, categorical_crossentropy cost = categorical_crossentropy(output_val, self.y).mean() error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean() error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean() self.val_fn= theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], givens=[(self.x, self.shared_x_slice), (self.y, self.shared_y_slice)] )
def test_categorical_accuracy_top_k(): from lasagne.objectives import categorical_accuracy p = theano.tensor.matrix('p') t = theano.tensor.ivector('t') top_k = 4 c = categorical_accuracy(p, t, top_k=top_k) # numeric version floatX = theano.config.floatX predictions = np.random.rand(10, 20).astype(floatX) cls_predictions = np.argsort(predictions, axis=1).astype("int8") # (construct targets such that top-1 to top-10 predictions are in there) targets = cls_predictions[np.arange(10), -np.random.permutation(10)] top_predictions = cls_predictions[:, -top_k:] accuracy = np.any(top_predictions == targets[:, np.newaxis], axis=1) # compare assert np.allclose(accuracy, c.eval({p: predictions, t: targets})) one_hot = np.zeros((10, 20)).astype("int8") one_hot[np.arange(10), targets] = 1 t = theano.tensor.imatrix('t') c = categorical_accuracy(p, t, top_k=top_k) assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
def build_model(self): import theano.tensor as T self.x = T.ftensor4('x') self.y = T.lvector('y') self.lr = T.scalar('lr') net = build_model_vgg16(input_shape=(None, 3, 224, 224), verbose=self.verbose) self.output_layer = net['prob'] from lasagne.layers import get_output self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False) self.cost = lasagne.objectives.categorical_crossentropy( self.output, self.y).mean() from lasagne.objectives import categorical_accuracy self.error = 1 - categorical_accuracy(self.output, self.y, top_k=1).mean() self.error_top_5 = 1 - categorical_accuracy( self.output, self.y, top_k=5).mean()
def compile_val(self): if self.verbose: print('compiling validation function...') import theano from lasagne.layers import get_output output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True) from lasagne.objectives import categorical_accuracy, categorical_crossentropy cost = categorical_crossentropy(output_val, self.y).mean() error = 1 - categorical_accuracy(output_val, self.y, top_k=1).mean() error_top_5 = 1 - categorical_accuracy(output_val, self.y, top_k=5).mean() self.val_fn = theano.function([self.subb_ind], [cost, error, error_top_5], updates=[], givens=[(self.x, self.shared_x_slice), (self.y, self.shared_y_slice)])
def build(layer_heads, params): """""" fns = {} # model methods x = T.tensor4('input') for target in params['targets']: fns[target['name']] = {} out_layer = layer_heads[target['name']] y = T.matrix('target') o = L.get_output(out_layer, inputs=x) o_vl = L.get_output(out_layer, inputs=x, deterministic=True) if 'class_weight' in params and params['class_weight']: loss_fn = partial(weighted_cce, weights=params['class_weight']) else: loss_fn = obj.categorical_crossentropy loss = loss_fn(o, y).mean() loss_vl = loss_fn(o_vl, y).mean() wd_l2 = reg.regularize_network_params(out_layer, reg.l2) wd_l2 *= params['beta'] acc_vl = obj.categorical_accuracy(o_vl, y).mean() updates_ = updates.adam(loss + wd_l2, L.get_all_params(out_layer, trainable=True), learning_rate=params['learning_rate'], epsilon=params['epsilon']) fns[target['name']]['train'] = theano.function( [x, y], updates=updates_, allow_input_downcast=True) fns[target['name']]['predict'] = theano.function( [x], o_vl, allow_input_downcast=True) fns[target['name']]['cost'] = theano.function( [x, y], loss_vl, allow_input_downcast=True) fns[target['name']]['acc'] = theano.function([x, y], acc_vl, allow_input_downcast=True) fns[target['name']]['transform'] = theano.function( [x], L.get_output(L.get_all_layers(layer_heads[target['name']])[-2], inputs=x, deterministic=True), allow_input_downcast=True) return fns, layer_heads
def build_model(self, train_set, test_set, validation_set=None): super(CNN, self).build_model(train_set, test_set, validation_set) epsilon = 1e-8 y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1) loss_cc = aggregate(categorical_crossentropy(y_train, self.sym_t), mode='mean') loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean() y = T.clip(get_output(self.model, self.sym_x, deterministic=True), epsilon, 1) loss_eval = aggregate(categorical_crossentropy(y, self.sym_t), mode='mean') loss_acc = categorical_accuracy(y, self.sym_t).mean() all_params = get_all_params(self.model, trainable=True) sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') grads = T.grad(loss_cc, all_params) grads = [T.clip(g, -5, 5) for g in grads] updates = rmsprop(grads, all_params, self.sym_lr, sym_beta1, sym_beta2) inputs = [ self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1, sym_beta2 ] f_train = theano.function( inputs, [loss_cc, loss_train_acc], updates=updates, givens={ self.sym_x: self.sh_train_x[self.batch_slice], self.sym_t: self.sh_train_t[self.batch_slice], }, ) f_test = theano.function( [self.sym_index, self.sym_batchsize], [loss_eval, loss_acc], givens={ self.sym_x: self.sh_test_x[self.batch_slice], self.sym_t: self.sh_test_t[self.batch_slice], }, ) f_validate = None if validation_set is not None: f_validate = theano.function( [self.sym_index, self.sym_batchsize], [loss_eval, loss_acc], givens={ self.sym_x: self.sh_valid_x[self.batch_slice], self.sym_t: self.sh_valid_t[self.batch_slice], }, ) self.train_args['inputs']['batchsize'] = 128 self.train_args['inputs']['learningrate'] = 1e-3 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 0.999 self.train_args['outputs']['loss_cc'] = '%0.6f' self.train_args['outputs']['loss_train_acc'] = '%0.6f' self.test_args['inputs']['batchsize'] = 128 self.test_args['outputs']['loss_eval'] = '%0.6f' self.test_args['outputs']['loss_acc'] = '%0.6f' self.validate_args['inputs']['batchsize'] = 128 # self.validate_args['outputs']['loss_eval'] = '%0.6f' # self.validate_args['outputs']['loss_acc'] = '%0.6f' return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
def build_model(self, train_set, test_set, validation_set=None, weights=None): super(BRNN, self).build_model(train_set, test_set, validation_set) def brier_score(given, predicted, weight_vector, mask): return T.mean( T.power(given - predicted, 2.0).dot(weight_vector) * mask) epsilon = 1e-8 mask = get_output(self.mask, self.sym_x) y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1) train_brier = brier_score(y_train, self.sym_t, weights, mask) train_cc = aggregate(categorical_crossentropy(y_train, self.sym_t), mode='mean') loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean() y_test = T.clip(get_output(self.model, self.sym_x, deterministic=True), epsilon, 1) test_brier = brier_score(y_test, self.sym_t, weights, mask) test_cc = aggregate(categorical_crossentropy(y_test, self.sym_t), mode='mean') test_acc = categorical_accuracy(y_test, self.sym_t).mean() all_params = get_all_params(self.model, trainable=True) sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') grads = T.grad(train_brier, all_params) grads = [T.clip(g, -1, 1) for g in grads] updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2) inputs = [ self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1, sym_beta2 ] f_train = theano.function( inputs, [train_cc, train_brier], updates=updates, givens={ self.sym_x: self.sh_train_x[self.batch_slice], self.sym_t: self.sh_train_t[self.batch_slice], }, ) f_test = theano.function( [], [test_cc, test_brier], givens={ self.sym_x: self.sh_test_x, self.sym_t: self.sh_test_t, }, ) f_validate = None if validation_set is not None: f_validate = theano.function( [self.sym_index, self.sym_batchsize], [test_cc, test_acc], givens={ self.sym_x: self.sh_valid_x[self.batch_slice], self.sym_t: self.sh_valid_t[self.batch_slice], }, ) predict = theano.function([self.sym_x], [y_test]) self.train_args['inputs']['batchsize'] = 64 self.train_args['inputs']['learningrate'] = 1e-3 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 0.999 # 1e-6 self.train_args['outputs']['train_cc'] = '%0.4f' # self.train_args['outputs']['train_acc'] = '%0.4f' self.train_args['outputs']['train_brier'] = '%0.4f' # self.test_args['inputs']['batchsize'] = 64 self.test_args['outputs']['test_cc'] = '%0.4f' # self.test_args['outputs']['test_acc'] = '%0.4f' self.test_args['outputs']['test_brier'] = '%0.4f' # self.validate_args['inputs']['batchsize'] = 64 # self.validate_args['outputs']['loss_eval'] = '%0.6f' # self.validate_args['outputs']['test_acc'] = '%0.6f' return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args, predict
def build_model(self, train_set_unlabeled, train_set_labeled, test_set, validation_set=None): """ Build the auxiliary deep generative model from the initialized hyperparameters. Define the lower bound term and compile it into a training function. :param train_set_unlabeled: Unlabeled train set containing variables x, t. :param train_set_labeled: Unlabeled train set containing variables x, t. :param test_set: Test set containing variables x, t. :param validation_set: Validation set containing variables x, t. :return: train, test, validation function and dicts of arguments. """ super(CSDGM, self).build_model(train_set_unlabeled, test_set, validation_set) sh_train_x_l = theano.shared(np.asarray(train_set_labeled[0], dtype=theano.config.floatX), borrow=True) sh_train_t_l = theano.shared(np.asarray(train_set_labeled[1], dtype=theano.config.floatX), borrow=True) n = self.sh_train_x.shape[0].astype( theano.config.floatX) # no. of data points n_l = sh_train_x_l.shape[0].astype( theano.config.floatX) # no. of labeled data points # Define the layers for the density estimation used in the lower bound. l_log_qa = GaussianLogDensityLayer(self.l_qa, self.l_qa_mu, self.l_qa_logvar) l_log_qz = GaussianLogDensityLayer(self.l_qz, self.l_qz_mu, self.l_qz_logvar) l_log_qy = MultinomialLogDensityLayer(self.l_qy, self.l_y_in, eps=1e-8) l_log_pz = StandardNormalLogDensityLayer(self.l_qz) l_log_pa = GaussianLogDensityLayer(self.l_qa, self.l_pa_mu, self.l_pa_logvar) l_x_in = ReshapeLayer(self.l_x_in, (-1, self.n_l * self.n_c)) l_px = DimshuffleLayer(self.l_px, (0, 3, 1, 2, 4)) l_px = ReshapeLayer(l_px, (-1, self.sym_samples, 1, self.n_c)) if self.x_dist == 'bernoulli': l_log_px = BernoulliLogDensityLayer(self.l_px, self.l_x_in) elif self.x_dist == 'multinomial': l_log_px = MultinomialLogDensityLayer(l_px, l_x_in) l_log_px = ReshapeLayer(l_log_px, (-1, self.n_l, 1, 1, 1)) l_log_px = MeanLayer(l_log_px, axis=1) elif self.x_dist == 'gaussian': l_px_mu = ReshapeLayer( DimshuffleLayer(self.l_px_mu, (0, 2, 3, 1, 4)), (-1, self.sym_samples, 1, self.n_l * self.n_c)) l_px_logvar = ReshapeLayer( DimshuffleLayer(self.l_px_logvar, (0, 2, 3, 1, 4)), (-1, self.sym_samples, 1, self.n_l * self.n_c)) l_log_px = GaussianLogDensityLayer(l_x_in, l_px_mu, l_px_logvar) def lower_bound(log_pa, log_qa, log_pz, log_qz, log_py, log_px): lb = log_px + log_py + (log_pz + log_pa - log_qa - log_qz) * (1.1 - self.sym_warmup) return lb # Lower bound for labeled data out_layers = [ l_log_pa, l_log_pz, l_log_qa, l_log_qz, l_log_px, l_log_qy ] inputs = {self.l_x_in: self.sym_x_l, self.l_y_in: self.sym_t_l} out = get_output(out_layers, inputs, batch_norm_update_averages=False, batch_norm_use_averages=False) log_pa_l, log_pz_l, log_qa_x_l, log_qz_axy_l, log_px_zy_l, log_qy_ax_l = out # Prior p(y) expecting that all classes are evenly distributed py_l = softmax(T.zeros((self.sym_x_l.shape[0], self.n_y))) log_py_l = -categorical_crossentropy(py_l, self.sym_t_l).reshape( (-1, 1)).dimshuffle((0, 'x', 'x', 1)) lb_l = lower_bound(log_pa_l, log_qa_x_l, log_pz_l, log_qz_axy_l, log_py_l, log_px_zy_l) lb_l = lb_l.mean(axis=(1, 2)) # Mean over the sampling dimensions log_qy_ax_l *= ( self.sym_beta * (n / n_l) ) # Scale the supervised cross entropy with the alpha constant lb_l += log_qy_ax_l.mean(axis=( 1, 2 )) # Collect the lower bound term and mean over sampling dimensions # Lower bound for unlabeled data bs_u = self.sym_x_u.shape[0] # For the integrating out approach, we repeat the input matrix x, and construct a target (bs * n_y) x n_y # Example of input and target matrix for a 3 class problem and batch_size=2. 2D tensors of the form # x_repeat t_repeat # [[x[0,0], x[0,1], ..., x[0,n_x]] [[1, 0, 0] # [x[1,0], x[1,1], ..., x[1,n_x]] [1, 0, 0] # [x[0,0], x[0,1], ..., x[0,n_x]] [0, 1, 0] # [x[1,0], x[1,1], ..., x[1,n_x]] [0, 1, 0] # [x[0,0], x[0,1], ..., x[0,n_x]] [0, 0, 1] # [x[1,0], x[1,1], ..., x[1,n_x]]] [0, 0, 1]] t_eye = T.eye(self.n_y, k=0) t_u = t_eye.reshape((self.n_y, 1, self.n_y)).repeat(bs_u, axis=1).reshape( (-1, self.n_y)) x_u = self.sym_x_u.reshape( (1, bs_u, self.n_l, self.n_c)).repeat(self.n_y, axis=0).reshape( (-1, self.n_l, self.n_c)) # Since the expectation of var a is outside the integration we calculate E_q(a|x) first a_x_u = get_output(self.l_qa, self.sym_x_u, batch_norm_update_averages=True, batch_norm_use_averages=False) a_x_u_rep = a_x_u.reshape( (1, bs_u * self.sym_samples, self.n_a)).repeat(self.n_y, axis=0).reshape( (-1, self.n_a)) out_layers = [l_log_pa, l_log_pz, l_log_qa, l_log_qz, l_log_px] inputs = {self.l_x_in: x_u, self.l_y_in: t_u, self.l_a_in: a_x_u_rep} out = get_output(out_layers, inputs, batch_norm_update_averages=False, batch_norm_use_averages=False) log_pa_u, log_pz_u, log_qa_x_u, log_qz_axy_u, log_px_zy_u = out # Prior p(y) expecting that all classes are evenly distributed py_u = softmax(T.zeros((bs_u * self.n_y, self.n_y))) log_py_u = -categorical_crossentropy(py_u, t_u).reshape( (-1, 1)).dimshuffle((0, 'x', 'x', 1)) lb_u = lower_bound(log_pa_u, log_qa_x_u, log_pz_u, log_qz_axy_u, log_py_u, log_px_zy_u) lb_u = lb_u.reshape( (self.n_y, 1, 1, bs_u)).transpose(3, 1, 2, 0).mean(axis=(1, 2)) inputs = { self.l_x_in: self.sym_x_u, self.l_a_in: a_x_u.reshape((-1, self.n_a)) } y_u = get_output(self.l_qy, inputs, batch_norm_update_averages=True, batch_norm_use_averages=False).mean(axis=(1, 2)) y_u += 1e-8 # Ensure that we get no NANs when calculating the entropy y_u /= T.sum(y_u, axis=1, keepdims=True) lb_u = (y_u * (lb_u - T.log(y_u))).sum(axis=1) # Regularizing with weight priors p(theta|N(0,1)), collecting and clipping gradients weight_priors = 0.0 for p in self.trainable_model_params: if 'W' not in str(p): continue weight_priors += log_normal(p, 0, 1).sum() # Collect the lower bound and scale it with the weight priors. elbo = ((lb_l.mean() + lb_u.mean()) * n + weight_priors) / -n lb_labeled = -lb_l.mean() lb_unlabeled = -lb_u.mean() log_px = log_px_zy_l.mean() + log_px_zy_u.mean() log_pz = log_pz_l.mean() + log_pz_u.mean() log_qz = log_qz_axy_l.mean() + log_qz_axy_u.mean() log_pa = log_pa_l.mean() + log_pa_u.mean() log_qa = log_qa_x_l.mean() + log_qa_x_u.mean() grads_collect = T.grad(elbo, self.trainable_model_params) params_collect = self.trainable_model_params sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') clip_grad, max_norm = 1, 5 mgrads = total_norm_constraint(grads_collect, max_norm=max_norm) mgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads] updates = adam(mgrads, params_collect, self.sym_lr, sym_beta1, sym_beta2) # Training function indices = self._srng.choice(size=[self.sym_bs_l], a=sh_train_x_l.shape[0], replace=False) x_batch_l = sh_train_x_l[indices] t_batch_l = sh_train_t_l[indices] x_batch_u = self.sh_train_x[self.batch_slice] if self.x_dist == 'bernoulli': # Sample bernoulli input. x_batch_u = self._srng.binomial(size=x_batch_u.shape, n=1, p=x_batch_u, dtype=theano.config.floatX) x_batch_l = self._srng.binomial(size=x_batch_l.shape, n=1, p=x_batch_l, dtype=theano.config.floatX) givens = { self.sym_x_l: x_batch_l, self.sym_x_u: x_batch_u, self.sym_t_l: t_batch_l } inputs = [ self.sym_index, self.sym_batchsize, self.sym_bs_l, self.sym_beta, self.sym_lr, sym_beta1, sym_beta2, self.sym_samples, self.sym_warmup ] outputs = [ elbo, lb_labeled, lb_unlabeled, log_px, log_pz, log_qz, log_pa, log_qa ] f_train = theano.function(inputs=inputs, outputs=outputs, givens=givens, updates=updates) # Default training args. Note that these can be changed during or prior to training. self.train_args['inputs']['batchsize_unlabeled'] = 100 self.train_args['inputs']['batchsize_labeled'] = 100 self.train_args['inputs']['beta'] = 0.1 self.train_args['inputs']['learningrate'] = 3e-4 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 0.999 self.train_args['inputs']['samples'] = 1 self.train_args['inputs']['warmup'] = 0.1 self.train_args['outputs']['lb'] = '%0.3f' self.train_args['outputs']['lb-l'] = '%0.3f' self.train_args['outputs']['lb-u'] = '%0.3f' self.train_args['outputs']['px'] = '%0.3f' self.train_args['outputs']['pz'] = '%0.3f' self.train_args['outputs']['qz'] = '%0.3f' self.train_args['outputs']['pa'] = '%0.3f' self.train_args['outputs']['qa'] = '%0.3f' # Validation and test function y = get_output(self.l_qy, self.sym_x_l, deterministic=True).mean(axis=(1, 2)) class_err = (1. - categorical_accuracy(y, self.sym_t_l).mean()) * 100 givens = {self.sym_x_l: self.sh_test_x, self.sym_t_l: self.sh_test_t} f_test = theano.function(inputs=[self.sym_samples], outputs=[class_err], givens=givens) # Test args. Note that these can be changed during or prior to training. self.test_args['inputs']['samples'] = 1 self.test_args['outputs']['test'] = '%0.2f%%' f_validate = None if validation_set is not None: givens = { self.sym_x_l: self.sh_valid_x, self.sym_t_l: self.sh_valid_t } f_validate = theano.function(inputs=[self.sym_samples], outputs=[class_err], givens=givens) # Default validation args. Note that these can be changed during or prior to training. self.validate_args['inputs']['samples'] = 1 self.validate_args['outputs']['validation'] = '%0.2f%%' return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
loss_gen_source = T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.zeros(shape=(args.batch_size,), dtype='int32'))) loss_lab_class = T.mean(categorical_crossentropy(predictions=softmax(class_lab), targets=labels)) loss_lab_source = T.mean(categorical_crossentropy(predictions=softmax(source_lab), targets=T.zeros(shape=(args.batch_size,), dtype='int32'))) +\ T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.ones(shape=(args.batch_size,), dtype='int32'))) weight_gen_loss = th.shared(np.float32(0.)) # output_lab = ll.get_output(disc_layers[-2],x_lab) # output_gen = ll.get_output(disc_layers[-2],gen_dat) # m1 = T.mean(output_lab,axis=0) # m2 = T.mean(output_gen,axis=0) # feature_loss = T.mean(abs(m1-m2)) loss_gen = (1-weight_gen_loss)*loss_gen_source # loss_gen = (1-weight_gen_loss)*feature_loss loss_lab = (1-weight_gen_loss)*loss_lab_source + weight_gen_loss*(loss_lab_class+0.5*loss_gen_class) #network performance D_acc_on_real = T.mean(categorical_accuracy(predictions=source_lab, targets=T.zeros(shape=(args.batch_size,)))) D_acc_on_fake = T.mean(categorical_accuracy(predictions=source_gen, targets=T.ones(shape=(args.batch_size,)))) G_acc_on_fake = T.mean(categorical_accuracy(predictions=source_gen, targets=T.zeros(shape=(args.batch_size,)))) performfun = th.function(inputs=[x_lab, labels, labels_gen], outputs=[D_acc_on_real, D_acc_on_fake, G_acc_on_fake]) train_err = T.mean(T.neq(T.argmax(class_lab, axis=1), labels)) # Theano functions for training the disc net learning_rate_var = th.shared(np.cast[th.config.floatX](args.learning_rate)) disc_params = ll.get_all_params(disc_layers[-1], trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab, lr=learning_rate_var, mom1=0.5) disc_param_avg = [th.shared(np.cast[th.config.floatX](0. * p.get_value()),broadcastable=p.broadcastable) for p in disc_params] disc_avg_updates = [(a, a + 0.01 * (p - a)) for p, a in zip(disc_params, disc_param_avg)] disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)] # init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates,on_unused_input='ignore') train_batch_disc = th.function(inputs=[x_lab, labels, labels_gen], outputs=[loss_lab, train_err], updates=disc_param_updates + disc_avg_updates) test_batch = th.function(inputs=[x_lab, labels], outputs=train_err, givens=disc_avg_givens)
def test_categorial_accuracy_invalid(): from lasagne.objectives import categorical_accuracy with pytest.raises(TypeError) as exc: categorical_accuracy(theano.tensor.vector(), theano.tensor.matrix()) assert 'rank mismatch' in exc.value.args[0]
def build_model(self, train_set, test_set, validation_set=None, weights=None): super(wconvRNN, self).build_model(train_set, test_set, validation_set) def brier_score(given, predicted, weight_vector): return T.power(given - predicted, 2.0).dot(weight_vector).mean() epsilon = 1e-8 y_train = T.clip(get_output(self.model, self.sym_x), epsilon, 1) loss_brier_train = brier_score(y_train, self.sym_t, weights) loss_cc = aggregate(categorical_crossentropy(y_train, self.sym_t), mode='mean') loss_train_acc = categorical_accuracy(y_train, self.sym_t).mean() y_test = T.clip(get_output(self.model, self.sym_x, deterministic=True), epsilon, 1) loss_brier_test = brier_score(y_test, self.sym_t, weights) loss_eval = aggregate(categorical_crossentropy(y_test, self.sym_t), mode='mean') loss_acc = categorical_accuracy(y_test, self.sym_t).mean() all_params = get_all_params(self.model, trainable=True) sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') grads = T.grad(loss_brier_train, all_params) grads = [T.clip(g, -5, 5) for g in grads] updates = rmsprop(grads, all_params, self.sym_lr, sym_beta1, sym_beta2) inputs = [ self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1, sym_beta2 ] f_train = theano.function( inputs, [loss_brier_train], updates=updates, givens={ self.sym_x: self.sh_train_x[self.batch_slice], self.sym_t: self.sh_train_t[self.batch_slice], }, ) f_test = theano.function( [], [loss_brier_test], givens={ self.sym_x: self.sh_test_x, self.sym_t: self.sh_test_t, }, on_unused_input='ignore', ) f_validate = None if validation_set is not None: f_validate = theano.function( [self.sym_batchsize], [loss_brier_test], givens={ self.sym_x: self.sh_valid_x, self.sym_t: self.sh_valid_t, }, on_unused_input='ignore', ) predict = theano.function([self.sym_x], [y_test]) self.train_args['inputs']['batchsize'] = 128 self.train_args['inputs']['learningrate'] = 1e-3 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 0.999 self.train_args['outputs']['loss_brier_train'] = '%0.6f' # self.train_args['outputs']['loss_train_acc'] = '%0.6f' # self.test_args['inputs']['batchsize'] = 128 self.test_args['outputs']['loss_brier_test'] = '%0.6f' # self.test_args['outputs']['loss_acc'] = '%0.6f' # self.validate_args['inputs']['batchsize'] = 128 # self.validate_args['outputs']['loss_eval'] = '%0.6f' # self.validate_args['outputs']['loss_acc'] = '%0.6f' return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args, predict
def transfer(sources, learning_rate, epsilon, beta, n_epochs, batch_sz, train_id=None, test_sources=None): """""" if train_id is None: train_id = uuid.uuid4() # launch logger logger = tblog.Logger('runs/{}'.format(train_id)) # launch model # net = fcn_transfer({'inputs': sources}) net = fcn_transfer({'inputs': range(2)}) input = T.matrix('input') target = T.matrix('target') o = L.get_output(net, inputs=input) o_vl = L.get_output(net, inputs=input, deterministic=True) loss = obj.categorical_crossentropy(o, target).mean() loss_vl = obj.categorical_crossentropy(o_vl, target).mean() wd_l2 = reg.regularize_network_params(net, reg.l2) wd_l2 *= beta acc_vl = obj.categorical_accuracy(o_vl, target).mean() updates_ = updates.adam(loss + wd_l2, L.get_all_params(net, trainable=True), learning_rate=learning_rate, epsilon=epsilon) Model = namedtuple('Model', 'partial_fit predict cost acc') model = Model(partial_fit=theano.function([input, target], updates=updates_, allow_input_downcast=True), predict=theano.function([input], o_vl, allow_input_downcast=True), cost=theano.function([input, target], loss_vl, allow_input_downcast=True), acc=theano.function([input, target], acc_vl, allow_input_downcast=True)) # load data D = [joblib.load(fn) for fn in sources] # prepare data trn_ix = np.where(D[0][2] == 'train')[0] val_ix = np.where(D[0][2] == 'valid')[0] ids_map = copy.deepcopy(D[0][3]) y = copy.deepcopy(D[0][1]) X = np.concatenate([d[0] for d in D], axis=1) lb = LabelBinarizer().fit(y) del D # TRAIN! iters = 0 try: epoch = trange(n_epochs, desc='[Loss : -.--] Epoch', ncols=80) for n in epoch: np.random.shuffle(trn_ix) np.random.shuffle(val_ix) for i, X_, y_ in prepare_batch(X, y, batch_sz, lb): if iters % 100 == 0: ix = np.random.choice(val_ix, batch_sz, replace=False) X_v, y_v = X[ix], lb.transform(y[ix]) c = model.cost(X_, y_).item() cv = model.cost(X_v, y_v).item() a = model.acc(X_, y_).item() av = model.acc(X_v, y_v).item() logger.log_value('trns_cost_tr', c, iters) logger.log_value('trns_cost_vl', cv, iters) logger.log_value('trns_acc_tr', a, iters) logger.log_value('trns_acc_vl', av, iters) epoch.set_description( '[v_loss: {:.4f} / v_acc: {:.4f}]Epoch'.format(cv, av)) model.partial_fit(X_, y_) iters += 1 except KeyboardInterrupt as kbe: print('User Stopped!') # evaluate uniq_ix_set = list(set(ids_map[val_ix])) Y_pred = [] y_true = [] Xvl = X[val_ix] yvl = y[val_ix] for i in tqdm(uniq_ix_set): ix = np.where(ids_map[val_ix] == i)[0] Y_pred.append(model.predict(Xvl[ix]).mean(axis=0)) y_true.append(yvl[ix][0]) Y_true = lb.transform(y_true) y_pred = [lb.classes_[i] for i in np.argmax(Y_pred, axis=1)] f1 = f1_score(y_true, y_pred, average='macro') ll = -np.mean(np.sum(Y_true * np.log(np.maximum(Y_pred, 1e-8)), axis=1)) # TODO: this is under-dev functionality. not generally working if test_sources is not None: del X, y # delete training data from memory # process test data test_fns = glob.glob(os.path.join(TEST_ROOT, '*.npy')) tids = map(lambda fn: os.path.basename(fn).split('.')[0], test_fns) Y_pred = [] for fn in tqdm(test_fns, ncols=80): X = np.load(fn) Y_pred.append(model.predict(X).mean(axis=0)) out_df = pd.DataFrame(Y_pred, columns=lb.classes_, index=tids) out_df.index.name = 'file_id' out_df.sort_index(inplace=True) out_df.to_csv('results/{}.csv'.format(train_id)) # return result return train_id, f1, ll
forgetgate=forget_gate_parameters, cell=cell_parameters, outgate=gate_parameters, learn_init=True, grad_clipping=100.0) l_merge = ConcatLayer([l_lstm_up, l_lstm_down]) l_rshp1 = ReshapeLayer(l_merge, (-1, 2 * num_states)) l_dense = DenseLayer(l_rshp1, 1, W=HeNormal(gain='relu'), nonlinearity=elu) l_rshp2 = ReshapeLayer(l_dense, (-1, 20)) l_y = NonlinearityLayer(l_rshp2, softmax) prediction = get_output(l_y) loss = T.mean(categorical_crossentropy(prediction, y)) accuracy = T.mean(categorical_accuracy(prediction, y)) params = get_all_params(l_y, trainable=True) updates = adam(loss, params, learning_rate=3e-4) print "... compiling" train_fn = theano.function(inputs=[X, y], outputs=loss, updates=updates) val_fn = theano.function(inputs=[X, y], outputs=[loss, accuracy]) op_fn = theano.function([X], outputs=prediction) meta_data = {} meta_data["n_iter"] = 50000 meta_data["num_output"] = 20 meta_data, params = train(train_fn, val_fn,
# # Define and compile theano functions # print "Defining and compiling training functions" prediction = lasagne.layers.get_output(simple_net_output[0]) loss = categorical_crossentropy(prediction, target_var) loss = loss.mean() if weight_decay > 0: weightsl2 = regularize_network_params(simple_net_output, lasagne.regularization.l2) loss += weight_decay * weightsl2 train_acc = T.mean(categorical_accuracy(prediction, target_var)) params = lasagne.layers.get_all_params(simple_net_output, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=learn_step) train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates) print "Done" # In[11]: print "Defining and compiling valid functions" valid_prediction = lasagne.layers.get_output(simple_net_output[0], deterministic=True) valid_loss = categorical_crossentropy(valid_prediction, target_var)
def event_dr_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats): print("Building model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen - kw + 1 stride = 1 #important context words as channels #CNN_sentence config filter_size = wordDim pool_size = seqlen - filter_size + 1 input = InputLayer((None, seqlen, num_feats), input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim) #print get_output_shape(emb) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats * wordDim)) #print get_output_shape(reshape) conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()) #nOutputFrame = num_flters, #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1 #print get_output_shape(conv1d) conv1d = DimshuffleLayer(conv1d, (0, 2, 1)) #print get_output_shape(conv1d) pool_size = num_filters maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) #print get_output_shape(maxpool) #forward = FlattenLayer(maxpool) #print get_output_shape(forward) hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=5, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(categorical_crossentropy(prediction, target_var)) lambda_val = 0.5 * 1e-4 layers = { emb: lambda_val, conv1d: lambda_val, hid: lambda_val, network: lambda_val } penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def main(): args = build_parser().parse_args() assert args.num_individuals >= 1, ( 'Must have at least one member in ensemble' ) assert args.max_epochs >= 1, ( 'Must have at least 1 epoch.' ) assert args.base_power >= 0, ( 'Cannot have fractional filters!' ) import lasagne np.random.seed(args.seed) lasagne.random.set_rng(np.random.RandomState(args.seed)) experiment_timestamp = str(time.time()).replace('.', '-') experiment_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'experiments', experiment_timestamp ) if os.path.exists(experiment_path): print('Experiment directory exists!') sys.exit(1) else: os.makedirs(experiment_path) # Save the commit hash used for these experiments. commit_hash = str( subprocess.check_output(['git', 'rev-parse', 'HEAD']), 'utf-8' ) commit_file_path = os.path.join(experiment_path, 'exp_commit.txt') with open(commit_file_path, 'w') as fd: fd.write('\n'.join((__file__, commit_hash))) args_file_path = os.path.join(experiment_path, 'provided_args.json') with open(args_file_path, 'w') as fd: json.dump(vars(args), fd, indent=4) # Initial dataset setup dataset_mean = load_mean(args.mean_path) X, y = load_data( args.dataset_directory, dataset_mean, mean_normalise=True, four_dim=True) train_X, train_y, val_X, val_y = train_val_split(X, y) print( 'Train X shape: {}\ttrain y shape: {}' 'Test X shape: {}\tTest y shape: {}' ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y))) ) # Network setup input_var = T.tensor4('input', dtype=theano.config.floatX) target = T.vector('target', dtype='int32') for lbd_val in np.linspace(0., 1., 6): path_for_lambda = os.path.join( experiment_path, '{:.2f}'.format(lbd_val)) os.makedirs(path_for_lambda) print('Lambda of {:.2f}'.format(lbd_val)) for num_individuals in range(1, args.num_individuals + 1): network_kwargs = { 'input_var': input_var, 'base_power': args.base_power, 'num_individuals': num_individuals } model = NCEnsemble(**network_kwargs) network = model.network prediction = get_output(network['output']) # TODO Remove hardcoded number of classes. loss = model.get_loss( target, np.array(lbd_val, dtype=theano.config.floatX), 10 ).mean() accuracy = np.array(100., dtype=theano.config.floatX) * ( categorical_accuracy(prediction, target).mean()) params = get_all_params(network['output'], trainable=True) updates = adam(loss, params) print('Starting theano function compliation') train_function = theano.function( [input_var, target], loss, updates=updates ) loss_function = theano.function( [input_var, target], loss ) accuracy_function = theano.function( [input_var, target], accuracy ) print('Finished theano function compliation') train_network = make_training_function( train_function, loss_function, accuracy_function, network, val_X, val_y, args.max_epochs, args.early_stopping_epochs ) ( best_params, training_losses, validation_losses, validation_accuracies ) = train_network( train_X, train_y, True, False) ensemble_accuracy = validation_accuracies[ np.argmin(validation_losses)] print( 'Ensemble at {:.2f}% with {} members' ''.format(ensemble_accuracy, num_individuals) ) print() sys.stdout.flush() member_path = os.path.join( path_for_lambda, 'ensemble_{}'.format(num_individuals)) os.makedirs(member_path) stats = { 'training_losses': training_losses, 'validation_losses': validation_losses, 'validation_accuracies': validation_accuracies } stats_path = os.path.join(member_path, 'train_stats.json') with open(stats_path, 'w') as fd: json.dump(stats, fd, indent=4) model_save_path = os.path.join(member_path, 'model.npz') np.savez( model_save_path, *get_all_param_values(model.final_layer) ) model_hash = md5(model_save_path) model_hash_path = os.path.join(member_path, 'model_hash.txt') with open(model_hash_path, 'w') as fd: fd.write(model_hash + '\n')
def model_class(ds, paths, param_arch, param_cost, param_updates, param_train): # create a log file containing the architecture configuration formatter = logging.Formatter('%(message)s') logger = logging.getLogger('log_config') if 'start_from_epoch' in param_train: name_tmp = 'config_from_epoch=%04d.log' % ( param_train['start_from_epoch']) else: name_tmp = 'config.log' path_tmp = os.path.join(paths['exp'], name_tmp) if not os.path.isfile(path_tmp): handler = logging.FileHandler( path_tmp, mode='w') # to append at the end of the file use: mode='a' else: raise Exception('[e] the log file ', name_tmp, ' already exists!') handler.setFormatter(formatter) handler.setLevel(logging.INFO) logger.addHandler(handler) logger.setLevel(logging.INFO) # input dimensions dim_desc = ds.descs_train[0].shape[1] dim_labels = ds.labels_train[0].shape[0] print(dim_labels) # architecture definition: print(("[i] architecture definition... "), end=' ') tic = time.time() if param_arch['type'] == 0: desc, patch_op, cla, net, logger = arch_class_00( dim_desc, dim_labels, param_arch, logger) elif param_arch['type'] == 1: desc, patch_op, cla, net, logger = arch_class_01( dim_desc, dim_labels, param_arch, logger) elif param_arch['type'] == 2: desc, patch_op, cla, net, logger = arch_class_02( dim_desc, dim_labels, param_arch, logger) else: raise Exception('[e] architecture not supported!') print(("%02.2fs" % (time.time() - tic))) # cost function definition: print(("[i] cost function definition... "), end=' ') tic = time.time() pred = LL.get_output(cla, deterministic=True) # in case we use dropout feat = LL.get_output(net) target = T.ivector('target') # data term if param_cost['cost_func'] == 'cross_entropy': if param_arch['non_linearity'] == 'softmax': cost_dataterm = T.mean( LO.categorical_crossentropy(pred, target) ) # in the original code we were using *.mean() instead of T.mean(*) elif param_arch['non_linearity'] == 'log_softmax': cost_dataterm = T.mean( categorical_crossentropy_logdomain(pred, target)) elif param_cost['cost_func'] == 'cross_entropy_stable': if param_arch['non_linearity'] == 'softmax': cost_dataterm = T.mean( categorical_crossentropy_stable(pred, target)) else: raise Exception( '[e] the chosen cost function is not implemented for the chosen non-linearity!' ) else: raise Exception('[e] the chosen cost function is not supported!') # classification accuracy acc = LO.categorical_accuracy(pred, target).mean() # regularization cost_reg = param_cost['mu'] * LR.regularize_network_params(cla, LR.l2) # cost function cost = cost_dataterm + cost_reg # get params params = LL.get_all_params(cla) # gradient definition grad = T.grad(cost, params) grad_norm = T.nlinalg.norm(T.concatenate([g.flatten() for g in grad]), 2) print(("%02.2fs" % (time.time() - tic))) # updates definition: print(("[i] gradient updates definition... "), end=' ') tic = time.time() if param_updates['method'] == 'momentum': if param_updates.get('learning_rate') is not None: learning_rate = param_updates['learning_rate'] # default: 1.0 else: raise Exception('[e] missing learning_rate parameter!') if param_updates.get('momentum') is not None: momentum = param_updates['momentum'] # default: 0.9 else: raise Exception('[e] missing learning_rate parameter!') updates = LU.momentum(grad, params, learning_rate, momentum) elif param_updates['method'] == 'adagrad': if param_updates.get('learning_rate') is not None: learning_rate = param_updates['learning_rate'] # default: 1.0 else: raise Exception('[e] missing learning_rate parameter!') updates = LU.adagrad(grad, params, learning_rate) elif param_updates['method'] == 'adadelta': if param_updates.get('learning_rate') is not None: learning_rate = param_updates['learning_rate'] # default: 1.0 else: raise Exception('[e] missing learning_rate parameter!') updates = LU.adadelta(grad, params, learning_rate) elif param_updates['method'] == 'adam': if param_updates.get('learning_rate') is not None: learning_rate = param_updates['learning_rate'] # default: 1e-03 else: raise Exception('[e] missing learning_rate parameter!') if param_updates.get('beta1') is not None: beta1 = param_updates['beta1'] # default: 0.9 else: raise Exception('[e] missing beta1 parameter!') if param_updates.get('beta2') is not None: beta2 = param_updates['beta2'] # default: 0.999 else: raise Exception('[e] missing beta2 parameter!') if param_updates.get('epsilon') is not None: epsilon = param_updates['epsilon'] # default: 1e-08 else: raise Exception('[e] missing epsilon parameter!') updates = LU.adam(grad, params, learning_rate, beta1, beta2, epsilon) else: raise Exception('[e] updates method not supported!') print(("%02.2fs" % (time.time() - tic))) # train / test functions: funcs = dict() print(("[i] compiling function 'train'... "), end=' ') tic = time.time() funcs['train'] = theano.function( [desc.input_var, patch_op.input_var, target], [cost, cost_dataterm, cost_reg, grad_norm, acc], updates=updates, allow_input_downcast=True, on_unused_input='warn') print(("%02.2fs" % (time.time() - tic))) print(("[i] compiling function 'fwd'... "), end=' ') tic = time.time() funcs['fwd'] = theano.function( [desc.input_var, patch_op.input_var, target], [cost, grad_norm, acc], allow_input_downcast=True, on_unused_input='ignore') print(("%02.2fs" % (time.time() - tic))) print(("[i] compiling function 'pred'... "), end=' ') tic = time.time() funcs['pred'] = theano.function( [desc.input_var, patch_op.input_var, target], [pred], allow_input_downcast=True, on_unused_input='ignore') print(("%02.2fs" % (time.time() - tic))) print(("[i] compiling function 'feat'... "), end=' ') tic = time.time() funcs['feat'] = theano.function( [desc.input_var, patch_op.input_var, target], [feat], allow_input_downcast=True, on_unused_input='ignore') print(("%02.2fs" % (time.time() - tic))) # save cost function parameters to a config file logger.info('\nCost function parameters:') logger.info(' cost function = %s' % param_cost['cost_func']) logger.info(' mu = %e' % param_cost['mu']) # save updates parameters to a config file logger.info('\nUpdates parameters:') logger.info(' method = %s' % param_updates['method']) logger.info(' learning rate = %e' % param_updates['learning_rate']) if param_updates['method'] == 'momentum': logger.info(' momentum = %e' % param_updates['momentum']) if param_updates['method'] == 'adam': logger.info(' beta1 = %e' % param_updates['beta1']) logger.info(' beta2 = %e' % param_updates['beta2']) logger.info(' epsilon = %e' % param_updates['epsilon']) # save training parameters to a config file logger.info('\nTraining parameters:') logger.info(' epoch size = %d' % ds.epoch_size) return funcs, cla, updates
def main(): setup_train_experiment(logger, FLAGS, "%(model)s_at") logger.info("Loading data...") data = mnist_load(FLAGS.train_size, FLAGS.seed) X_train, y_train = data.X_train, data.y_train X_val, y_val = data.X_val, data.y_val X_test, y_test = data.X_test, data.y_test img_shape = [None, 1, 28, 28] train_images = T.tensor4('train_images') train_labels = T.lvector('train_labels') val_images = T.tensor4('valid_labels') val_labels = T.lvector('valid_labels') layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")] num_classes = layer_dims[-1] net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims) model = with_end_points(net) train_outputs = model(train_images) val_outputs = model(val_images, deterministic=True) # losses train_ce = categorical_crossentropy(train_outputs['prob'], train_labels).mean() train_at = adversarial_training(lambda x: model(x)['prob'], train_images, train_labels, epsilon=FLAGS.epsilon).mean() train_loss = train_ce + FLAGS.lmbd * train_at val_ce = categorical_crossentropy(val_outputs['prob'], val_labels).mean() val_deepfool_images = deepfool( lambda x: model(x, deterministic=True)['logits'], val_images, val_labels, num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) # metrics train_acc = categorical_accuracy(train_outputs['logits'], train_labels).mean() train_err = 1.0 - train_acc val_acc = categorical_accuracy(val_outputs['logits'], val_labels).mean() val_err = 1.0 - val_acc # deepfool robustness reduc_ind = range(1, train_images.ndim) l2_deepfool = (val_deepfool_images - val_images).norm(2, axis=reduc_ind) l2_deepfool_norm = l2_deepfool / val_images.norm(2, axis=reduc_ind) train_metrics = OrderedDict([('loss', train_loss), ('nll', train_ce), ('at', train_at), ('err', train_err)]) val_metrics = OrderedDict([('nll', val_ce), ('err', val_err)]) summary_metrics = OrderedDict([('l2', l2_deepfool.mean()), ('l2_norm', l2_deepfool_norm.mean())]) lr = theano.shared(floatX(FLAGS.initial_learning_rate), 'learning_rate') train_params = get_all_params(net, trainable=True) train_updates = adam(train_loss, train_params, lr) logger.info("Compiling theano functions...") train_fn = theano.function([train_images, train_labels], outputs=train_metrics.values(), updates=train_updates) val_fn = theano.function([val_images, val_labels], outputs=val_metrics.values()) summary_fn = theano.function([val_images, val_labels], outputs=summary_metrics.values() + [val_deepfool_images]) logger.info("Starting training...") try: samples_per_class = FLAGS.summary_samples_per_class summary_images, summary_labels = select_balanced_subset( X_val, y_val, num_classes, samples_per_class) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) epoch = 0 batch_index = 0 while epoch < FLAGS.num_epochs: epoch += 1 start_time = time.time() train_iterator = batch_iterator(X_train, y_train, FLAGS.batch_size, shuffle=True) epoch_outputs = np.zeros(len(train_fn.outputs)) for batch_index, (images, labels) in enumerate(train_iterator, batch_index + 1): batch_outputs = train_fn(images, labels) epoch_outputs += batch_outputs epoch_outputs /= X_train.shape[0] // FLAGS.batch_size logger.info( build_result_str( "Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time), train_metrics.keys(), epoch_outputs)) # update learning rate if epoch > FLAGS.start_learning_rate_decay: new_lr_value = lr.get_value( ) * FLAGS.learning_rate_decay_factor lr.set_value(floatX(new_lr_value)) logger.debug("learning rate was changed to {:.10f}".format( new_lr_value)) # validation start_time = time.time() val_iterator = batch_iterator(X_val, y_val, FLAGS.test_batch_size, shuffle=False) val_epoch_outputs = np.zeros(len(val_fn.outputs)) for images, labels in val_iterator: val_epoch_outputs += val_fn(images, labels) val_epoch_outputs /= X_val.shape[0] // FLAGS.test_batch_size logger.info( build_result_str( "Test epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time), val_metrics.keys(), val_epoch_outputs)) if epoch % FLAGS.summary_frequency == 0: summary = summary_fn(summary_images, summary_labels) logger.info( build_result_str( "Epoch [{}] adversarial statistics:".format(epoch), summary_metrics.keys(), summary[:-1])) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) df_images = summary[-1] save_images(df_images, save_path) if epoch % FLAGS.checkpoint_frequency == 0: save_network(net, epoch=epoch) except KeyboardInterrupt: logger.debug("Keyboard interrupt. Stopping training...") finally: save_network(net) # evaluate final model on test set test_iterator = batch_iterator(X_test, y_test, FLAGS.test_batch_size, shuffle=False) test_results = np.zeros(len(val_fn.outputs)) for images, labels in test_iterator: test_results += val_fn(images, labels) test_results /= X_test.shape[0] // FLAGS.test_batch_size logger.info( build_result_str("Final test results:", val_metrics.keys(), test_results))
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val=0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen - kw + 1 stride = 1 filter_size = wordDim pool_size = num_filters input = InputLayer((None, seqlen, num_feats), input_var=input_var) batchsize, _, _ = input.input_var.shape #span emb1 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim)) conv1d_1 = DimshuffleLayer( Conv1DLayer(reshape1, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) """ #DocTimeRel emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim)) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax) """ #Type emb3 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim)) conv1d_3 = DimshuffleLayer( Conv1DLayer(reshape3, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax) #Degree emb4 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim)) conv1d_4 = DimshuffleLayer( Conv1DLayer(reshape4, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax) #Polarity emb5 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim)) conv1d_5 = DimshuffleLayer( Conv1DLayer(reshape5, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax) #ContextualModality emb6 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim)) conv1d_6 = DimshuffleLayer( Conv1DLayer(reshape6, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh, W=GlorotUniform()), (0, 2, 1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax) """ #ContextualAspect emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim)) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax) """ """ #Permanence emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim)) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax) """ # Is this important? """ network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) """ network_1_out = get_output(network_1) network_3_out = get_output(network_3) network_4_out = get_output(network_4) network_5_out = get_output(network_5) network_6_out = get_output(network_6) loss_1 = T.mean(binary_crossentropy( network_1_out, target_var)) + regularize_layer_params_weighted( { emb1: lambda_val, conv1d_1: lambda_val, hid_1: lambda_val, network_1: lambda_val }, l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean( binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) """ loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) """ loss_3 = T.mean(categorical_crossentropy( network_3_out, target_var)) + regularize_layer_params_weighted( { emb3: lambda_val, conv1d_3: lambda_val, hid_3: lambda_val, network_3: lambda_val }, l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean( categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy( network_4_out, target_var)) + regularize_layer_params_weighted( { emb4: lambda_val, conv1d_4: lambda_val, hid_4: lambda_val, network_4: lambda_val }, l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean( categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(categorical_crossentropy( network_5_out, target_var)) + regularize_layer_params_weighted( { emb5: lambda_val, conv1d_5: lambda_val, hid_5: lambda_val, network_5: lambda_val }, l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean( categorical_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy( network_6_out, target_var)) + regularize_layer_params_weighted( { emb6: lambda_val, conv1d_6: lambda_val, hid_6: lambda_val, network_6: lambda_val }, l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean( categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) """ loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) """ """ return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8 """ return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6
def main(): args = build_parser().parse_args() assert args.num_individuals >= 1, ( 'Must have at least one member in ensemble') assert args.max_epochs >= 1, ('Must have at least 1 epoch.') assert args.base_power >= 0, ('Cannot have fractional filters!') np.random.seed(args.seed) import lasagne lasagne.random.set_rng(np.random.RandomState(args.seed)) experiment_timestamp = str(time.time()).replace('.', '-') experiment_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'experiments', experiment_timestamp) if os.path.exists(experiment_path): print('Experiment directory exists!') sys.exit(1) else: os.makedirs(experiment_path) # Save the commit hash used for these experiments. commit_hash = str(subprocess.check_output(['git', 'rev-parse', 'HEAD']), 'utf-8') commit_file_path = os.path.join(experiment_path, 'exp_commit.txt') with open(commit_file_path, 'w') as fd: fd.write(commit_hash) args_file_path = os.path.join(experiment_path, 'provided_args.json') with open(args_file_path, 'w') as fd: json.dump(vars(args), fd, indent=4) # Initial dataset setup dataset_mean = load_mean(args.mean_path) X, y = load_data(args.dataset_directory, dataset_mean, mean_normalise=True, four_dim=True) train_X, train_y, val_X, val_y = train_val_split(X, y) print('Train X shape: {}\ttrain y shape: {}' 'Test X shape: {}\tTest y shape: {}' ''.format(*(mat.shape for mat in (train_X, train_y, val_X, val_y)))) # Network setup input_var = T.tensor4('input', dtype=theano.config.floatX) target = T.vector('target', dtype='int32') network_kwargs = {'input_var': input_var, 'base_power': args.base_power} model = MiniVGG(**network_kwargs) model.pretty_print_network() network = model.network prediction = get_output(network['output']) loss = categorical_crossentropy(prediction, target).mean() accuracy = np.array(100., dtype=theano.config.floatX) * ( categorical_accuracy(prediction, target).mean()) params = get_all_params(network['output'], trainable=True) updates = adam(loss, params) print('Starting theano function compliation') train_function = theano.function([input_var, target], loss, updates=updates) loss_function = theano.function([input_var, target], loss) accuracy_function = theano.function([input_var, target], accuracy) pred_function = theano.function([input_var], prediction) print('Finished theano function compliation') ensemble_prediction = make_ens_predictor(network, pred_function, val_X, val_y) train_network = make_training_function(train_function, loss_function, accuracy_function, network, val_X, val_y, args.max_epochs, args.early_stopping_epochs) # Setup bootstraps initialisations = get_k_network_initialisations(args.num_individuals, input_var=input_var, base_power=args.base_power) bootstraps = [ get_bootstrap(train_X, train_y) for _ in range(args.num_individuals) ] ensembles = zip(initialisations, bootstraps) # Train models trained_parameters = [] for index, (initialisation, bootstrap) in enumerate(ensembles): (best_params, training_losses, validation_losses, validation_accuracies) = train_network(*bootstrap, initialisation, True, False) trained_parameters.append(best_params) max_accuracy = validation_accuracies[np.argmin(validation_losses)] ensemble_accuracy = ensemble_prediction(trained_parameters) print('New member at {:.2f}% validation accuracy'.format(max_accuracy)) print('Ensemble at {:.2f}% with {} members' ''.format(ensemble_accuracy, len(trained_parameters))) print() sys.stdout.flush() member_path = os.path.join(experiment_path, 'model_{}'.format(index)) os.makedirs(member_path) stats = { 'training_losses': training_losses, 'validation_losses': validation_losses, 'validation_accuracies': validation_accuracies } with open(os.path.join(member_path, 'train_stats.json'), 'w') as fd: json.dump(stats, fd, indent=4) model_save_path = os.path.join(member_path, 'model.npz') np.savez(model_save_path, *get_all_param_values(model.final_layer)) model_hash = md5(model_save_path) model_hash_path = os.path.join(member_path, 'model_hash.txt') with open(model_hash_path, 'w') as fd: fd.write(model_hash + '\n') ensemble_accuracies = {} for num_models in range(1, args.num_individuals + 1): parameter_combinations = combinations(trained_parameters, num_models) validation_accuracies = [ ensemble_prediction(parameter_combination) for parameter_combination in parameter_combinations ] ensemble_accuracies[num_models] = { 'mean': np.mean(validation_accuracies), 'std': np.std(validation_accuracies), 'raw': validation_accuracies } results_path = os.path.join(experiment_path, 'results.json') with open(results_path, 'w') as fd: json.dump(ensemble_accuracies, fd, indent=4)
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4): print("Building multi task model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 filter_size=wordDim pool_size=num_filters input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size) hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid) network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax) conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size) hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid) network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax) conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size) hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid) network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax) conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size) hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid) network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax) conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size) hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid) network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax) conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size) hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid) network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax) conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size) hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid) network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax) conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()), (0,2,1)) maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size) hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid) network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax) # Is this important? network_1_out, network_2_out, network_3_out, network_4_out, \ network_5_out, network_6_out, network_7_out, network_8_out = \ get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8]) loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, hid_1:lambda_val, network_1:lambda_val} , l2) updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step) train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True) val_acc_1 = T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var)) val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True) loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, hid_2:lambda_val, network_2:lambda_val} , l2) updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step) train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True) val_acc_2 = T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var)) val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True) loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, hid_3:lambda_val, network_3:lambda_val} , l2) updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step) train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True) val_acc_3 = T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var)) val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True) loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, hid_4:lambda_val, network_4:lambda_val} , l2) updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step) train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True) val_acc_4 = T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var)) val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True) loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, hid_5:lambda_val, network_5:lambda_val} , l2) updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step) train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True) val_acc_5 = T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var)) val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True) loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, hid_6:lambda_val, network_6:lambda_val} , l2) updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step) train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True) val_acc_6 = T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var)) val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True) loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, hid_7:lambda_val, network_7:lambda_val} , l2) updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step) train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True) val_acc_7 = T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var)) val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True) loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, hid_8:lambda_val, network_8:lambda_val} , l2) updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step) train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True) val_acc_8 = T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var)) val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True) return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \ network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \ train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
def event_modality_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats): print("Building model with 1D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] kw = 2 num_filters = seqlen-kw+1 stride = 1 filter_size=wordDim pool_size=seqlen-filter_size+1 input = InputLayer((None, seqlen, num_feats),input_var=input_var) batchsize, _, _ = input.input_var.shape emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim) #print get_output_shape(emb) reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim)) #print get_output_shape(reshape) conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1 #print get_output_shape(conv1d) conv1d = DimshuffleLayer(conv1d, (0,2,1)) #print get_output_shape(conv1d) pool_size=num_filters maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) #print get_output_shape(maxpool) #forward = FlattenLayer(maxpool) #print get_output_shape(forward) hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid) network = DenseLayer(hid, num_units=5, nonlinearity=softmax) prediction = get_output(network) loss = T.mean(binary_crossentropy(prediction,target_var)) lambda_val = 0.5 * 1e-4 layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" test_prediction = get_output(network, deterministic=True) test_loss = T.mean(categorical_crossentropy(test_prediction,target_var)) train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn, network
def main(): setup_experiment() data = mnist_load() X_test = data.X_test y_test = data.y_test if FLAGS.sort_labels: ys_indices = np.argsort(y_test) X_test = X_test[ys_indices] y_test = y_test[ys_indices] img_shape = [None, 1, 28, 28] test_images = T.tensor4('test_images') test_labels = T.lvector('test_labels') # loaded discriminator number of classes and dims layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")] num_classes = layer_dims[-1] # create and load discriminator net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims) load_network(net, epoch=FLAGS.load_epoch) model = with_end_points(net) test_outputs = model(test_images, deterministic=True) # deepfool images test_df_images = deepfool(lambda x: model(x, deterministic=True)['logits'], test_images, test_labels, num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) test_df_images_all = deepfool( lambda x: model(x, deterministic=True)['logits'], test_images, num_classes=num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) test_df_outputs = model(test_df_images, deterministic=True) # fast gradient sign images test_fgsm_images = test_images + fast_gradient_perturbation( test_images, test_outputs['logits'], test_labels, FLAGS.fgsm_epsilon) test_at_outputs = model(test_fgsm_images, deterministic=True) # test metrics test_acc = categorical_accuracy(test_outputs['logits'], test_labels).mean() test_err = 1 - test_acc test_fgsm_acc = categorical_accuracy(test_at_outputs['logits'], test_labels).mean() test_fgsm_err = 1 - test_fgsm_acc test_df_acc = categorical_accuracy(test_df_outputs['logits'], test_labels).mean() test_df_err = 1 - test_df_acc # adversarial noise statistics reduc_ind = range(1, test_images.ndim) test_l2_df = T.sqrt( T.sum((test_df_images - test_images)**2, axis=reduc_ind)) test_l2_df_norm = test_l2_df / T.sqrt(T.sum(test_images**2, axis=reduc_ind)) test_l2_df_skip = test_l2_df.sum() / T.sum(test_l2_df > 0) test_l2_df_skip_norm = test_l2_df_norm.sum() / T.sum(test_l2_df_norm > 0) test_l2_df_all = T.sqrt( T.sum((test_df_images_all - test_images)**2, axis=reduc_ind)) test_l2_df_all_norm = test_l2_df_all / T.sqrt( T.sum(test_images**2, axis=reduc_ind)) test_metrics = OrderedDict([('err', test_err), ('err_fgsm', test_fgsm_err), ('err_df', test_df_err), ('l2_df', test_l2_df.mean()), ('l2_df_norm', test_l2_df_norm.mean()), ('l2_df_skip', test_l2_df_skip), ('l2_df_skip_norm', test_l2_df_skip_norm), ('l2_df_all', test_l2_df_all.mean()), ('l2_df_all_norm', test_l2_df_all_norm.mean()) ]) logger.info("Compiling theano functions...") test_fn = theano.function([test_images, test_labels], outputs=test_metrics.values()) generate_fn = theano.function([test_images, test_labels], [test_df_images, test_df_images_all], on_unused_input='ignore') logger.info("Generate samples...") samples_per_class = 10 summary_images, summary_labels = select_balanced_subset( X_test, y_test, num_classes, samples_per_class) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) df_images, df_images_all = generate_fn(summary_images, summary_labels) save_path = os.path.join(FLAGS.samples_dir, 'deepfool.png') save_images(df_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'deepfool_all.png') save_images(df_images_all, save_path) logger.info("Starting...") test_iterator = batch_iterator(X_test, y_test, FLAGS.batch_size, shuffle=False) test_results = np.zeros(len(test_fn.outputs)) start_time = time.time() for batch_index, (images, labels) in enumerate(test_iterator, 1): batch_results = test_fn(images, labels) test_results += batch_results if batch_index % FLAGS.summary_frequency == 0: df_images, df_images_all = generate_fn(images, labels) save_path = os.path.join(FLAGS.samples_dir, 'b%d-df.png' % batch_index) save_images(df_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'b%d-df_all.png' % batch_index) save_images(df_images_all, save_path) logger.info( build_result_str( "Batch [{}] adversarial statistics:".format(batch_index), test_metrics.keys(), batch_results)) test_results /= batch_index logger.info( build_result_str( "Test results [{:.2f}s]:".format(time.time() - start_time), test_metrics.keys(), test_results))
def __init__(self, config): self.clouds = T.tensor3(dtype='float32') self.norms = [ T.tensor3(dtype='float32') for step in xrange(config['steps']) ] self.target = T.vector(dtype='int64') KDNet = {} if config['input_features'] == 'no': KDNet['input'] = InputLayer((None, 1, 2**config['steps']), input_var=self.clouds) else: KDNet['input'] = InputLayer((None, 3, 2**config['steps']), input_var=self.clouds) for i in xrange(config['steps']): KDNet['norm{}_r'.format(i + 1)] = InputLayer( (None, 3, 2**(config['steps'] - 1 - i)), input_var=self.norms[i]) KDNet['norm{}_l'.format(i + 1)] = ExpressionLayer( KDNet['norm{}_r'.format(i + 1)], lambda X: -X) KDNet['norm{}_l_X-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 0, config['n_f'][i + 1]) KDNet['norm{}_l_Y-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 1, config['n_f'][i + 1]) KDNet['norm{}_l_Z-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 2, config['n_f'][i + 1]) KDNet['norm{}_l_X+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 0, config['n_f'][i + 1]) KDNet['norm{}_l_Y+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 1, config['n_f'][i + 1]) KDNet['norm{}_l_Z+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 2, config['n_f'][i + 1]) KDNet['norm{}_r_X-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 0, config['n_f'][i + 1]) KDNet['norm{}_r_Y-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 1, config['n_f'][i + 1]) KDNet['norm{}_r_Z-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 2, config['n_f'][i + 1]) KDNet['norm{}_r_X+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 0, config['n_f'][i + 1]) KDNet['norm{}_r_Y+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 1, config['n_f'][i + 1]) KDNet['norm{}_r_Z+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 2, config['n_f'][i + 1]) KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \ ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)], KDNet['cloud{}_l_Y-_masked'.format(i)], KDNet['cloud{}_l_Z-_masked'.format(i)], KDNet['cloud{}_l_X+_masked'.format(i)], KDNet['cloud{}_l_Y+_masked'.format(i)], KDNet['cloud{}_l_Z+_masked'.format(i)], KDNet['cloud{}_r_X-_masked'.format(i)], KDNet['cloud{}_r_Y-_masked'.format(i)], KDNet['cloud{}_r_Z-_masked'.format(i)], KDNet['cloud{}_r_X+_masked'.format(i)], KDNet['cloud{}_r_Y+_masked'.format(i)], KDNet['cloud{}_r_Z+_masked'.format(i)]]) KDNet['cloud{}_bn'.format(i + 1)] = BatchNormDNNLayer( KDNet['cloud{}'.format(i + 1)]) KDNet['cloud{}_relu'.format(i + 1)] = NonlinearityLayer( KDNet['cloud{}_bn'.format(i + 1)], rectify) KDNet['cloud{}_r'.format(i + 1)] = ExpressionLayer( KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, 1::2], (None, config['n_f'][i], 2**(config['steps'] - i - 1))) KDNet['cloud{}_l'.format(i + 1)] = ExpressionLayer( KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, ::2], (None, config['n_f'][i], 2**(config['steps'] - i - 1))) KDNet['cloud{}_l_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Y-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Z-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_X+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Y+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Z+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X-'.format(i + 1)].W, b=KDNet['cloud{}_l_X-'.format(i + 1)].b) KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X-'.format(i + 1)].W, b=KDNet['cloud{}_l_X-'.format(i + 1)].b) KDNet['cloud{}_r_Y-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Y-'.format(i + 1)].W, b=KDNet['cloud{}_l_Y-'.format(i + 1)].b) KDNet['cloud{}_r_Z-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Z-'.format(i + 1)].W, b=KDNet['cloud{}_l_Z-'.format(i + 1)].b) KDNet['cloud{}_r_X+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X+'.format(i + 1)].W, b=KDNet['cloud{}_l_X+'.format(i + 1)].b) KDNet['cloud{}_r_Y+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Y+'.format(i + 1)].W, b=KDNet['cloud{}_l_Y+'.format(i + 1)].b) KDNet['cloud{}_r_Z+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Z+'.format(i + 1)].W, b=KDNet['cloud{}_l_Z+'.format(i + 1)].b) KDNet['cloud{}_l_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_X-'.format(i + 1)], KDNet['norm{}_l_X-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Y-'.format(i + 1)], KDNet['norm{}_l_Y-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Z-'.format(i + 1)], KDNet['norm{}_l_Z-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_X+'.format(i + 1)], KDNet['norm{}_l_X+'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Y+'.format(i + 1)], KDNet['norm{}_l_Y+'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Z+'.format(i + 1)], KDNet['norm{}_l_Z+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_X-'.format(i + 1)], KDNet['norm{}_r_X-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Y-'.format(i + 1)], KDNet['norm{}_r_Y-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Z-'.format(i + 1)], KDNet['norm{}_r_Z-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_X+'.format(i + 1)], KDNet['norm{}_r_X+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Y+'.format(i + 1)], KDNet['norm{}_r_Y+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Z+'.format(i + 1)], KDNet['norm{}_r_Z+'.format(i + 1)] ], T.mul) KDNet['cloud_fin'] = ElemwiseSumLayer([ KDNet['cloud{}_l_X-_masked'.format(config['steps'])], KDNet['cloud{}_l_Y-_masked'.format(config['steps'])], KDNet['cloud{}_l_Z-_masked'.format(config['steps'])], KDNet['cloud{}_l_X+_masked'.format(config['steps'])], KDNet['cloud{}_l_Y+_masked'.format(config['steps'])], KDNet['cloud{}_l_Z+_masked'.format(config['steps'])], KDNet['cloud{}_r_X-_masked'.format(config['steps'])], KDNet['cloud{}_r_Y-_masked'.format(config['steps'])], KDNet['cloud{}_r_Z-_masked'.format(config['steps'])], KDNet['cloud{}_r_X+_masked'.format(config['steps'])], KDNet['cloud{}_r_Y+_masked'.format(config['steps'])], KDNet['cloud{}_r_Z+_masked'.format(config['steps'])] ]) KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin']) KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'], rectify) KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'], (-1, config['n_f'][-1])) KDNet['output'] = DenseLayer(KDNet['cloud_fin_reshape'], config['num_classes'], nonlinearity=softmax) prob = get_output(KDNet['output']) prob_det = get_output(KDNet['output'], deterministic=True) weights = get_all_params(KDNet['output'], trainable=True) l2_pen = regularize_network_params(KDNet['output'], l2) loss = categorical_crossentropy( prob, self.target).mean() + config['l2'] * l2_pen accuracy = categorical_accuracy(prob, self.target).mean() lr = theano.shared(np.float32(config['learning_rate'])) updates = adam(loss, weights, learning_rate=lr) self.train_fun = theano.function([self.clouds] + self.norms + [self.target], [loss, accuracy], updates=updates) self.prob_fun = theano.function([self.clouds] + self.norms + [self.target], [loss, prob_det]) self.KDNet = KDNet
patch_op = LL.InputLayer(input_var=Tsp.csc_fmatrix('patch_op'), shape=(None, None)) ffn = get_model(inp, patch_op) # L.layers.get_output -> theano variable representing network output = LL.get_output(ffn) pred = LL.get_output(ffn, deterministic=True) # in case we use dropout # target theano variable indicatind the index a vertex should be mapped to wrt the latent space target = T.ivector('idxs') # to work with logit predictions, better behaved numerically cla = utils_lasagne.categorical_crossentropy_logdomain(output, target, nclasses).mean() acc = LO.categorical_accuracy(pred, target).mean() # a bit of regularization is commonly used regL2 = L.regularization.regularize_network_params(ffn, L.regularization.l2) cost = cla + l2_weight * regL2 ''' Define the update rule, how to train ''' params = LL.get_all_params(ffn, trainable=True) grads = T.grad(cost, params) # computes the L2 norm of the gradient to better inspect training grads_norm = T.nlinalg.norm(T.concatenate([g.flatten() for g in grads]), 2) # Adam turned out to be a very good choice for correspondence updates = L.updates.adam(grads, params, learning_rate=0.001) ''' Compile '''
def build_network_2dconv( args, input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var, wordEmbeddings, maxlen=36 ): print ("Building model with 2D Convolution") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] num_filters = 100 stride = 1 # CNN_sentence config filter_size = (3, wordDim) pool_size = (maxlen - 3 + 1, 1) # two conv pool layer # filter_size=(10, 100) # pool_size=(4,4) input_1 = InputLayer((None, maxlen), input_var=input1_var) batchsize, seqlen = input_1.input_var.shape # input_1_mask = InputLayer((None, maxlen),input_var=input1_mask_var) emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_1.params[emb_1.W].remove("trainable") # (batchsize, maxlen, wordDim) reshape_1 = ReshapeLayer(emb_1, (batchsize, 1, maxlen, wordDim)) conv2d_1 = Conv2DLayer( reshape_1, num_filters=num_filters, filter_size=(filter_size), stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size) # (None, 100, 1, 1) """ filter_size_2=(4, 10) pool_size_2=(2,2) conv2d_1 = Conv2DLayer(maxpool_1, num_filters=num_filters, filter_size=filter_size_2, stride=stride, nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1) maxpool_1 = MaxPool2DLayer(conv2d_1, pool_size=pool_size_2) #(None, 100, 1, 1) (None, 100, 1, 20) """ forward_1 = FlattenLayer(maxpool_1) # (None, 100) #(None, 50400) input_2 = InputLayer((None, maxlen), input_var=input2_var) # input_2_mask = InputLayer((None, maxlen),input_var=input2_mask_var) emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_2.params[emb_2.W].remove("trainable") reshape_2 = ReshapeLayer(emb_2, (batchsize, 1, maxlen, wordDim)) conv2d_2 = Conv2DLayer( reshape_2, num_filters=num_filters, filter_size=filter_size, stride=stride, nonlinearity=rectify, W=GlorotUniform(), ) # (None, 100, 34, 1) maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size) # (None, 100, 1, 1) """ conv2d_2 = Conv2DLayer(maxpool_2, num_filters=num_filters, filter_size=filter_size_2, stride=stride, nonlinearity=rectify,W=GlorotUniform()) #(None, 100, 34, 1) maxpool_2 = MaxPool2DLayer(conv2d_2, pool_size=pool_size_2) #(None, 100, 1, 1) """ forward_2 = FlattenLayer(maxpool_2) # (None, 100) # elementwisemerge need fix the sequence length mul = ElemwiseMergeLayer([forward_1, forward_2], merge_function=T.mul) sub = AbsSubLayer([forward_1, forward_2], merge_function=T.sub) concat = ConcatLayer([mul, sub]) concat = ConcatLayer([forward_1, forward_2]) hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid) if args.task == "sts": network = DenseLayer(hid, num_units=5, nonlinearity=softmax) elif args.task == "ent": network = DenseLayer(hid, num_units=3, nonlinearity=softmax) # prediction = get_output(network, {input_1:input1_var, input_2:input2_var}) prediction = get_output(network) loss = T.mean(categorical_crossentropy(prediction, target_var)) lambda_val = 0.5 * 1e-4 layers = {conv2d_1: lambda_val, hid: lambda_val, network: lambda_val} penalty = regularize_layer_params_weighted(layers, l2) loss = loss + penalty params = get_all_params(network, trainable=True) if args.optimizer == "sgd": updates = sgd(loss, params, learning_rate=args.step) elif args.optimizer == "adagrad": updates = adagrad(loss, params, learning_rate=args.step) elif args.optimizer == "adadelta": updates = adadelta(loss, params, learning_rate=args.step) elif args.optimizer == "nesterov": updates = nesterov_momentum(loss, params, learning_rate=args.step) elif args.optimizer == "rms": updates = rmsprop(loss, params, learning_rate=args.step) elif args.optimizer == "adam": updates = adam(loss, params, learning_rate=args.step) else: raise "Need set optimizer correctly" # test_prediction = get_output(network, {input_1:input1_var, input_2:input2_var}, deterministic=True) test_prediction = get_output(network, deterministic=True) test_loss = T.mean(categorical_crossentropy(test_prediction, target_var)) """ train_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], loss, updates=updates, allow_input_downcast=True) """ train_fn = theano.function([input1_var, input2_var, target_var], loss, updates=updates, allow_input_downcast=True) if args.task == "sts": """ val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], [test_loss, test_prediction], allow_input_downcast=True) """ val_fn = theano.function( [input1_var, input2_var, target_var], [test_loss, test_prediction], allow_input_downcast=True ) elif args.task == "ent": # test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) test_acc = T.mean(categorical_accuracy(test_prediction, target_var)) """ val_fn = theano.function([input1_var, input1_mask_var, input2_var, intut2_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True) """ val_fn = theano.function([input1_var, input2_var, target_var], [test_loss, test_acc], allow_input_downcast=True) return train_fn, val_fn
def build_model(self, train_set, test_set, validation_set=None): super(UFCNN, self).build_model(train_set, test_set, validation_set) epsilon = 1e-8 loss_cc = aggregate(categorical_crossentropy( T.clip(get_output(self.model, self.sym_x), epsilon, 1), self.sym_t), mode='mean') y = T.clip(get_output(self.model, self.sym_x, deterministic=True), epsilon, 1) loss_eval = aggregate(categorical_crossentropy(y, self.sym_t), mode='mean') loss_acc = categorical_accuracy(y, self.sym_t).mean() all_params = get_all_params(self.model, trainable=True) grads = T.grad(loss_cc, all_params) for idx, param in enumerate(all_params): param_name = param.name if ('h2.W' in param_name) or ('g2.W' in param_name): print(param_name) grads[idx] *= self.l2_mask if ('h3.W' in param_name) or ('g3.W' in param_name): print(param_name) grads[idx] *= self.l3_mask sym_beta1 = T.scalar('beta1') sym_beta2 = T.scalar('beta2') updates = adam(grads, all_params, self.sym_lr, sym_beta1, sym_beta2) inputs = [ self.sym_index, self.sym_batchsize, self.sym_lr, sym_beta1, sym_beta2 ] f_train = theano.function( inputs, [loss_cc], updates=updates, givens={ self.sym_x: self.sh_train_x[self.batch_slice], self.sym_t: self.sh_train_t[self.batch_slice], }, ) f_test = theano.function( [self.sym_index, self.sym_batchsize], [loss_eval], givens={ self.sym_x: self.sh_test_x[self.batch_slice], self.sym_t: self.sh_test_t[self.batch_slice], }, ) f_validate = None if validation_set is not None: f_validate = theano.function( [self.sym_index, self.sym_batchsize], [loss_eval, loss_acc], givens={ self.sym_x: self.sh_valid_x[self.batch_slice], self.sym_t: self.sh_valid_t[self.batch_slice], }, ) self.train_args['inputs']['batchsize'] = 128 self.train_args['inputs']['learningrate'] = 1e-3 self.train_args['inputs']['beta1'] = 0.9 self.train_args['inputs']['beta2'] = 0.999 self.train_args['outputs']['loss_cc'] = '%0.6f' self.test_args['inputs']['batchsize'] = 128 self.test_args['outputs']['loss_eval'] = '%0.6f' self.validate_args['inputs']['batchsize'] = 128 self.validate_args['outputs']['loss_eval'] = '%0.6f' self.validate_args['outputs']['loss_acc'] = '%0.6f%%' return f_train, f_test, f_validate, self.train_args, self.test_args, self.validate_args
loss_lab_source = T.mean(categorical_crossentropy(predictions=softmax(source_lab), targets=T.zeros(shape=(args.batch_size,), dtype='int32'))) +\ T.mean(categorical_crossentropy(predictions=softmax(source_gen), targets=T.ones(shape=(args.batch_size,), dtype='int32'))) weight_gen_loss = th.shared(np.float32(0.)) output_lab = ll.get_output(disc_layers[-2], x_lab, deterministic=False) output_gen = ll.get_output(disc_layers[-2], gen_dat, deterministic=False) m1 = T.mean(output_lab, axis=0) m2 = T.mean(output_gen, axis=0) feature_loss = T.mean(abs(m1 - m2)) loss_gen = (1 - weight_gen_loss) * (loss_gen_source + 0.5 * feature_loss) loss_lab = (1 - weight_gen_loss) * loss_lab_source + weight_gen_loss * ( loss_lab_class + loss_gen_class) #network performance D_acc_on_real = T.mean( categorical_accuracy(predictions=source_lab, targets=T.zeros(shape=(args.batch_size, )))) D_acc_on_fake = T.mean( categorical_accuracy(predictions=source_gen, targets=T.ones(shape=(args.batch_size, )))) G_acc_on_fake = T.mean( categorical_accuracy(predictions=source_gen, targets=T.zeros(shape=(args.batch_size, )))) performfun = th.function(inputs=[x_lab, labels, labels_gen], outputs=[D_acc_on_real, D_acc_on_fake, G_acc_on_fake]) train_err = T.mean(T.neq(T.argmax(class_lab, axis=1), labels)) # test error output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True) test_class_lab = T.batched_dot( T.reshape(output_before_softmax,
def calc_accuracy(prediction, targets): #we can use the lasagne objective categorical_accuracy to determine the top1 single label accuracy a = T.mean(objectives.categorical_accuracy(prediction, targets, top_k=1)) return a
def calc_accuracy(prediction, targets): # The lasagne objective categorical_accuracy is used to determine the top1 accuracy a = T.mean(objectives.categorical_accuracy(prediction, targets, top_k=1)) return a