def create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with different filter-sizes and maxpooling ''' k = int(config[pref + '_kpool']) filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()] print filter_width_list num_filters = int(config[pref+'_num_filters']) #num_filters /= len(filter_width_list) totfilters = 0 print input_len, embedding_size, num_filters for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional( image_size=(input_len, embedding_size), filter_size=(fw, embedding_size), num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw), num_channels=1 ) totfilters += conv.num_filters * k # initialize2(conv, num_feature_map) initialize([conv]) conv.name = pref + 'conv_' + str(fw) layer0_input = debug_print(layer0_input, 'inp', False) convout = conv.apply(layer0_input) convout = debug_print(convout, 'convout', False) kpoolout = KmaxPooling(convout, k).apply().flatten(2) kpoolout = debug_print(kpoolout, 'poolout', False) if i == 0: outpools = kpoolout else: outpools = T.concatenate([outpools, kpoolout], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' y_pred = T.argmax(linear_output, axis=1) label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False) pat1 = T.mean(label_of_predicted) updates = None if 'ranking' in cost_fn: cost, updates = ranking_loss(linear_output, y) print 'using ranking loss function!' else: y_hat = Logistic().apply(linear_output) y_hat.name = 'y_hat' cost = cross_entropy_loss(y_hat, y) cost.name = 'cost' pat1.name = 'precision@1' misclassify_rate = MultiMisclassificationRate().apply( y, T.ge(linear_output, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, updates, misclassify_rate
def __init__(self, input, n_in, n_out): """ Initialize the parameters of the logistic regression """ # compute vector of class-membership probabilities in symbolic form self.s_y_given_x = T.nnet.sigmoid(input) self.s_y_given_x = debug_print(self.s_y_given_x, 'scores', False) super(SigmoidLoss, self).__init__(input, n_in, n_out)
def build_model(self, x, config): logger.info('building %s model for: %s ', self.nn_model, self.name) vocabsize = self.get_vocab_size() logger.info('%s vocab size is: %d', self.name, vocabsize) self.embeddings, self.dim_emb = self.get_embeddings() if self.tune_tune: logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb) lookup = LookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() # add_role(lookup.W, WEIGHT) lookup.W.name = 'lt.W' else: logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb) lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() lookup.name = self.name + 'lookuptable' lookup.W.set_value(self.embeddings) xemb = lookup.apply(x) xemb = debug_print(xemb, 'xemb', False) if 'cnn' in self.nn_model: logger.info('CNN') feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name) elif self.nn_model == 'lstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name) elif self.nn_model == 'bilstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name) elif self.nn_model == 'rnn': feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name) elif self.nn_model == 'ff': feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config) elif self.nn_model == 'mean': feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config) return feature_vec, feature_vec_len
def create_rnn(xemb, embedding_size, config, pref): hiddensize = int(config[pref + '_h_rnn']) for i in range(1): xemb = rnn_layer(embedding_size, xemb, hiddensize, 1, pref) embedding_size = hiddensize xemb.name = 'rnn' + str(i) + pref h = xemb lstm_outsize = hiddensize h = debug_print(h[h.shape[0] - 1], 'rnn', False) return h, lstm_outsize
def create_cnn_general(xemb, embedding_size, input_len, config, pref): numConvLayers = int(config[pref + '_convlayers']) xemb = debug_print(xemb, 'afterLookup', False) layer0_input = xemb.flatten().reshape((xemb.shape[0], 1, input_len, embedding_size)) if numConvLayers == 1: # return create_kim_cnn(layer0_input, embedding_size, input_len, config, pref) return create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref) elif numConvLayers == -1: return create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref) else: return create_yy_cnn(numConvLayers, layer0_input, embedding_size, input_len, config, pref)
def prior_network(x, n_input, hu_encoder, n_latent): logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder) mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder') initialize([mlp1]) h_encoder = mlp1.apply(x) h_encoder = debug_print(h_encoder, 'h_encoder', False) lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent) lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent) initialize([lin1]) initialize([lin2], rndstd=0.001) mu = lin1.apply(h_encoder) log_sigma = lin2.apply(h_encoder) return mu, log_sigma
def create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref): ''' One layer convolution with different filter-sizes and maxpooling ''' k = int(config[pref + '_kpool']) filter_width_list = [ int(fw) for fw in config[pref + '_filterwidth'].split() ] print filter_width_list num_filters = int(config[pref + '_num_filters']) #num_filters /= len(filter_width_list) totfilters = 0 print input_len, embedding_size, num_filters for i, fw in enumerate(filter_width_list): num_feature_map = input_len - fw + 1 #39 conv = Convolutional(image_size=(input_len, embedding_size), filter_size=(fw, embedding_size), num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw), num_channels=1) totfilters += conv.num_filters * k # initialize2(conv, num_feature_map) initialize([conv]) conv.name = pref + 'conv_' + str(fw) layer0_input = debug_print(layer0_input, 'inp', False) convout = conv.apply(layer0_input) convout = debug_print(convout, 'convout', False) kpoolout = KmaxPooling(convout, k).apply().flatten(2) kpoolout = debug_print(kpoolout, 'poolout', False) if i == 0: outpools = kpoolout else: outpools = T.concatenate([outpools, kpoolout], axis=1) name_rep_len = totfilters return outpools, name_rep_len
def create_cnn_general(xemb, embedding_size, input_len, config, pref): numConvLayers = int(config[pref + '_convlayers']) xemb = debug_print(xemb, 'afterLookup', False) layer0_input = xemb.flatten().reshape( (xemb.shape[0], 1, input_len, embedding_size)) if numConvLayers == 1: # return create_kim_cnn(layer0_input, embedding_size, input_len, config, pref) return create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref) elif numConvLayers == -1: return create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref) else: return create_yy_cnn(numConvLayers, layer0_input, embedding_size, input_len, config, pref)
def softmax_layer_old(h, y, hidden_size, num_targets, cost_fn='softmax'): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' y_pred = T.argmax(linear_output, axis=1) label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False) pat1 = T.mean(label_of_predicted) updates = {} if 'softmax' in cost_fn: y_hat = Logistic().apply(linear_output) y_hat.name = 'y_hat' cost = cross_entropy_loss(y_hat, y) else: cost, updates = ranking_loss(linear_output, y) cost.name = 'cost' pat1.name = 'precision@1' return cost, pat1, updates
def sampler(mu, log_sigma, deterministic=False, use_noise=True, input_log=False): log_sigma = debug_print(log_sigma, 'log_sigma', False) logger.info('deterministic: %s --- use noise: %s', deterministic, use_noise) if deterministic and use_noise: return mu if deterministic: #return mu + T.exp(0.5 * log_sigma) return mu + log_sigma eps = srng.normal(size=mu.shape, std=1) # Reparametrize if use_noise: if input_log: return mu + T.exp(0.5 * log_sigma) * eps else: return mu + log_sigma * eps else: #return mu + T.exp(0.5 * log_sigma) return mu + log_sigma
def create_lstm(xemb, embedding_size, bidirectional, config, pref): hiddensize = int(config[pref + '_h_lstm']) inpsize = embedding_size if bidirectional: for i in range(1): xemb = bilstm_layer(inpsize, xemb, hiddensize, i, pref) xemb.name = 'bilstm' + str(i) + pref inpsize = hiddensize * 2 lstm_outsize = hiddensize * 2 h = xemb else: for i in range(1): xemb = lstm_layer(embedding_size, xemb, hiddensize, 1, pref) embedding_size = hiddensize xemb.name = 'lstm' + str(i) + pref h = xemb lstm_outsize = hiddensize h = debug_print(h[h.shape[0] - 1], 'outlstm', False) return h, lstm_outsize
def build_model(self, x, config): logger.info('building %s model for: %s ', self.nn_model, self.name) vocabsize = self.get_vocab_size() logger.info('%s vocab size is: %d', self.name, vocabsize) self.embeddings, self.dim_emb = self.get_embeddings() if self.tune_tune: logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb) lookup = LookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() # add_role(lookup.W, WEIGHT) lookup.W.name = 'lt.W' else: logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb) lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() lookup.name = self.name + 'lookuptable' lookup.W.set_value(self.embeddings) xemb = lookup.apply(x) xemb = debug_print(xemb, 'xemb', False) if 'cnn' in self.nn_model: logger.info('CNN') feature_vec, feature_vec_len = create_cnn_general( xemb, self.dim_emb, self.max_len, config, self.name) elif self.nn_model == 'lstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name) elif self.nn_model == 'bilstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name) elif self.nn_model == 'rnn': feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name) elif self.nn_model == 'ff': feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config) elif self.nn_model == 'mean': feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config) return feature_vec, feature_vec_len
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'): hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets) initialize([hidden_to_output]) linear_output = hidden_to_output.apply(h) linear_output.name = 'linear_output' y_pred = T.argmax(linear_output, axis=1) label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False) pat1 = T.mean(label_of_predicted) updates = None if 'ranking' in cost_fn: cost, updates = ranking_loss(linear_output, y) print 'using ranking loss function!' else: y_hat = Logistic().apply(linear_output) y_hat.name = 'y_hat' cost = cross_entropy_loss(y_hat, y) cost.name = 'cost' pat1.name = 'precision@1' misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(linear_output, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, updates, misclassify_rate
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False): hidden_size = config['hidden_units'].split() use_highway = str_to_bool( config['use_highway']) if 'use_highway' in config else False use_gaus = str_to_bool( config['use_gaus']) if 'use_gaus' in config else False use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0 use_noise = str_to_bool( config['use_noise']) if 'use_noise' in config else False use_vae = str_to_bool(config['use_vae']) if 'use_vae' in config else False hu_decoder = int( config['hu_decoder']) if 'hu_decoder' in config else hidden_size logger.info( 'use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size) init_with_type = str_to_bool( config['init_with_type']) if 'init_with_type' in config else False y = T.matrix('targets', dtype='int32') drop_prob = float(config['dropout']) if 'dropout' in config else 0 #build the feature vector with one model, e.g., with cnn or mean or lstm feature_vec, feature_vec_len = build_feature_vec(fea2obj, config) #drop out if drop_prob > 0: mask = T.cast( srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape), 'float32') if test: feature_vec *= (1 - drop_prob) else: feature_vec *= mask #Highway network if use_highway: g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp') t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp') initialize([g_mlp, t_mlp]) t = t_mlp.apply(feature_vec) z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec feature_vec = z #MLP(s) logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size)) if len(hidden_size) > 1: #2 MLP on feature fector mlp = MLP( activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = int(hidden_size[1]) else: hidden_size = int(hidden_size[0]) mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = hidden_size #compute y_hat initial guess hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets) typemfile = None if init_with_type: typemfile = config['dsdir'] + '/_typematrix.npy' #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy' initialize_lasthid(hidden_to_output, typemfile) # initialize([hidden_to_output]) y_hat_init = Logistic().apply(hidden_to_output.apply(before_out)) y_hat_init.name = 'y_hat_init' y_hat_init = debug_print(y_hat_init, 'yhat_init', False) logpy_xz_init = cross_entropy_loss(y_hat_init, y) logpy_xz = logpy_xz_init y_hat_recog = y_hat_init y_hat = y_hat_init KLD = 0 if use_gaus: if use_vae: logger.info('using VAE') vae_conditional = str_to_bool(config['vae_cond']) if vae_conditional: y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal( kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) else: y_hat, logpy_xz, KLD = build_vae_basic( kl_weight, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) y_hat_recog = y_hat else: if use_rec: logger.info('Not using VAE... but using recursion') prior_in = T.concatenate([feature_vec, y_hat_init], axis=1) mu_prior, log_sigma_prior = prior_network( x=prior_in, n_input=feature_vec_len + num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat = (y_hat + y_hat_init) / 2. logpy_xz = (logpy_xz + logpy_xz_init) / 2. else: prior_in = T.concatenate([feature_vec], axis=1) mu_prior, log_sigma_prior = prior_network( x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_recog = y_hat y_hat = debug_print(y_hat, 'y_hat', False) pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)]) max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False) pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type]) mean_cross = T.mean(logpy_xz) mean_kld = T.mean(KLD) cost = mean_kld + mean_cross cost.name = 'cost' mean_kld.name = 'kld' mean_cross.name = 'cross_entropy_loss' pat1.name = 'p@1' pat1_recog.name = 'p@1_recog' misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False ): hidden_size = config['hidden_units'].split() use_highway = str_to_bool(config['use_highway']) if 'use_highway' in config else False use_gaus = str_to_bool(config['use_gaus']) if 'use_gaus' in config else False use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0 use_noise = str_to_bool(config['use_noise']) if 'use_noise' in config else False use_vae=str_to_bool(config['use_vae']) if 'use_vae' in config else False hu_decoder = int(config['hu_decoder']) if 'hu_decoder' in config else hidden_size logger.info('use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size) init_with_type = str_to_bool(config['init_with_type']) if 'init_with_type' in config else False y = T.matrix('targets', dtype='int32') drop_prob = float(config['dropout']) if 'dropout' in config else 0 #build the feature vector with one model, e.g., with cnn or mean or lstm feature_vec, feature_vec_len = build_feature_vec(fea2obj, config) #drop out if drop_prob > 0: mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32') if test: feature_vec *= (1 - drop_prob) else: feature_vec *= mask #Highway network if use_highway: g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp') t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp') initialize([g_mlp, t_mlp]) t = t_mlp.apply(feature_vec) z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec feature_vec = z #MLP(s) logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size)) if len(hidden_size) > 1: #2 MLP on feature fector mlp = MLP(activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = int(hidden_size[1]) else: hidden_size = int(hidden_size[0]) mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = hidden_size #compute y_hat initial guess hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets) typemfile = None if init_with_type: typemfile = config['dsdir'] + '/_typematrix.npy' #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy' initialize_lasthid(hidden_to_output, typemfile) # initialize([hidden_to_output]) y_hat_init = Logistic().apply(hidden_to_output.apply(before_out)) y_hat_init.name='y_hat_init' y_hat_init = debug_print(y_hat_init, 'yhat_init', False) logpy_xz_init = cross_entropy_loss(y_hat_init, y) logpy_xz = logpy_xz_init y_hat_recog = y_hat_init y_hat = y_hat_init KLD = 0 if use_gaus: if use_vae: logger.info('using VAE') vae_conditional=str_to_bool(config['vae_cond']) if vae_conditional: y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) else: y_hat, logpy_xz, KLD = build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) y_hat_recog = y_hat else: if use_rec: logger.info('Not using VAE... but using recursion') prior_in = T.concatenate([feature_vec, y_hat_init], axis=1) mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat = (y_hat + y_hat_init) / 2. logpy_xz = (logpy_xz + logpy_xz_init) / 2. else: prior_in = T.concatenate([feature_vec], axis=1) mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_recog = y_hat y_hat = debug_print(y_hat, 'y_hat', False) pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)]) max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False) pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type]) mean_cross = T.mean(logpy_xz) mean_kld = T.mean(KLD) cost = mean_kld + mean_cross cost.name = 'cost'; mean_kld.name = 'kld'; mean_cross.name = 'cross_entropy_loss'; pat1.name = 'p@1'; pat1_recog.name = 'p@1_recog' misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate