def get_comb_stream(fea2obj, which_set, batch_size=None, shuffle=True, num_examples=None): streams = [] for fea in fea2obj: obj = fea2obj[fea] dataset = H5PYDataset(obj.fuelfile, which_sets=(which_set, ), load_in_memory=True) if batch_size == None: batch_size = dataset.num_examples if num_examples == None: num_examples = dataset.num_examples if shuffle: iterschema = ShuffledScheme(examples=num_examples, batch_size=batch_size, rng=numpy.random.RandomState(seed)) else: iterschema = SequentialScheme(examples=num_examples, batch_size=batch_size) stream = DataStream(dataset=dataset, iteration_scheme=iterschema) if fea in seq_features: stream = CutInput(stream, obj.max_len) if obj.rec == True: logger.info('transforming data for recursive input') stream = LettersTransposer( stream, which_sources=fea ) # Required because Recurrent last_hid receive as input [sequence, batch,# features] streams.append(stream) stream = Merge(streams, tuple(fea2obj.keys())) return stream, num_examples
def build_model(self, x, config): logger.info('building %s model for: %s ', self.nn_model, self.name) vocabsize = self.get_vocab_size() logger.info('%s vocab size is: %d', self.name, vocabsize) self.embeddings, self.dim_emb = self.get_embeddings() if self.tune_tune: logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb) lookup = LookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() # add_role(lookup.W, WEIGHT) lookup.W.name = 'lt.W' else: logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb) lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() lookup.name = self.name + 'lookuptable' lookup.W.set_value(self.embeddings) xemb = lookup.apply(x) xemb = debug_print(xemb, 'xemb', False) if 'cnn' in self.nn_model: logger.info('CNN') feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name) elif self.nn_model == 'lstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name) elif self.nn_model == 'bilstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name) elif self.nn_model == 'rnn': feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name) elif self.nn_model == 'ff': feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config) elif self.nn_model == 'mean': feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config) return feature_vec, feature_vec_len
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def prior_network(x, n_input, hu_encoder, n_latent): logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder) mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder') initialize([mlp1]) h_encoder = mlp1.apply(x) h_encoder = debug_print(h_encoder, 'h_encoder', False) lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent) lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent) initialize([lin1]) initialize([lin2], rndstd=0.001) mu = lin1.apply(h_encoder) log_sigma = lin2.apply(h_encoder) return mu, log_sigma
def generation_simple(z_list, n_latent, n_out, y): logger.info('generate output without MLP') hid_to_out = Linear(name='hidDecoder_to_output', input_dim=n_latent, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for z in z_list: lin_out = hid_to_out.apply(z) y_hat = mysigmoid.apply(lin_out) #reconstructed x logpy_xz = -cross_entropy_loss(y_hat, y) agg_logpy_xz += logpy_xz agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200): logger.info('build VAE recognition network using basic prior: p(z)') y_as_float = T.cast(y, 'float32') drop_prob = float(config['dropprob']) if 'dropprob' in config else 0 mask = T.cast(srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape), 'float32') KLD = 0 if test: gen_inp = [] for _ in range(10): z_sampled = srng.normal([feature_vec.shape[0], n_latent_z]) #z_sampled = T.cast(srng.binomial(n=1, p=0, size=[feature_vec.shape[0], n_latent_z]), 'float32') gen_inp.append(T.concatenate([z_sampled, feature_vec], axis=1)) else: recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1) mu_recog, log_sigma_recog = recognition_network( x=recog_input, n_input=feature_vec_len + num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_sampled = sampler(mu_recog, log_sigma_recog, deterministic=deterministic, use_noise=True, input_log=True) gen_inp = [T.concatenate([z_sampled, feature_vec], axis=1)] KLD = kl_weight * -0.5 * T.sum( 1 + log_sigma_recog - mu_recog**2 - T.exp(log_sigma_recog), axis=1) y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) #logpy_z *= 1 - T.nnet.sigmoid(kl_weight) return y_hat, logpy_z, KLD
def sampler(mu, log_sigma, deterministic=False, use_noise=True, input_log=False): log_sigma = debug_print(log_sigma, 'log_sigma', False) logger.info('deterministic: %s --- use noise: %s', deterministic, use_noise) if deterministic and use_noise: return mu if deterministic: #return mu + T.exp(0.5 * log_sigma) return mu + log_sigma eps = srng.normal(size=mu.shape, std=1) # Reparametrize if use_noise: if input_log: return mu + T.exp(0.5 * log_sigma) * eps else: return mu + log_sigma * eps else: #return mu + T.exp(0.5 * log_sigma) return mu + log_sigma
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply(mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def build_model(self, x, config): logger.info('building %s model for: %s ', self.nn_model, self.name) vocabsize = self.get_vocab_size() logger.info('%s vocab size is: %d', self.name, vocabsize) self.embeddings, self.dim_emb = self.get_embeddings() if self.tune_tune: logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb) lookup = LookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() # add_role(lookup.W, WEIGHT) lookup.W.name = 'lt.W' else: logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb) lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() lookup.name = self.name + 'lookuptable' lookup.W.set_value(self.embeddings) xemb = lookup.apply(x) xemb = debug_print(xemb, 'xemb', False) if 'cnn' in self.nn_model: logger.info('CNN') feature_vec, feature_vec_len = create_cnn_general( xemb, self.dim_emb, self.max_len, config, self.name) elif self.nn_model == 'lstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name) elif self.nn_model == 'bilstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name) elif self.nn_model == 'rnn': feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name) elif self.nn_model == 'ff': feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config) elif self.nn_model == 'mean': feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config) return feature_vec, feature_vec_len
def get_comb_stream(fea2obj, which_set, batch_size=None, shuffle=True, num_examples=None): streams = [] for fea in fea2obj: obj = fea2obj[fea] dataset = H5PYDataset(obj.fuelfile, which_sets=(which_set,),load_in_memory=True) if batch_size == None: batch_size = dataset.num_examples if num_examples == None: num_examples = dataset.num_examples if shuffle: iterschema = ShuffledScheme(examples=num_examples, batch_size=batch_size, rng=numpy.random.RandomState(seed)) else: iterschema = SequentialScheme(examples=num_examples, batch_size=batch_size) stream = DataStream(dataset=dataset, iteration_scheme=iterschema) if fea in seq_features: stream = CutInput(stream, obj.max_len) if obj.rec == True: logger.info('transforming data for recursive input') stream = LettersTransposer(stream, which_sources=fea)# Required because Recurrent last_hid receive as input [sequence, batch,# features] streams.append(stream) stream = Merge(streams, tuple(fea2obj.keys())) return stream, num_examples
def build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200): logger.info('build VAE recognition network using basic prior: p(z)') y_as_float = T.cast(y, 'float32') drop_prob = float(config['dropprob']) if 'dropprob' in config else 0 mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32') KLD=0 if test: gen_inp = [] for _ in range(10): z_sampled = srng.normal([feature_vec.shape[0], n_latent_z]) #z_sampled = T.cast(srng.binomial(n=1, p=0, size=[feature_vec.shape[0], n_latent_z]), 'float32') gen_inp.append(T.concatenate([z_sampled, feature_vec], axis=1)) else: recog_input = T.concatenate([feature_vec*mask, y_as_float], axis=1) mu_recog, log_sigma_recog = recognition_network(x=recog_input, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_sampled = sampler(mu_recog, log_sigma_recog, deterministic=deterministic,use_noise=True, input_log=True) gen_inp = [T.concatenate([z_sampled, feature_vec], axis=1)] KLD = kl_weight * -0.5 * T.sum(1 + log_sigma_recog - mu_recog**2 - T.exp(log_sigma_recog), axis=1) y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) #logpy_z *= 1 - T.nnet.sigmoid(kl_weight) return y_hat, logpy_z, KLD
def build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200): logger.info('build VAE recognition network using conditional modeling: q(z|x,y)') y_as_float = T.cast(y, 'float32') drop_prob = float(config['dropprob']) if 'dropprob' in config else 0 logger.info('drop out probability: %d', drop_prob) if test == False or True: mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32') # feature_vec *= mask recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1) logpy_xz_init = cross_entropy_loss(y_hat_init, y) # recognition network q(z|x,y) #sampling z from recognition mu_recog, log_sigma_recog = recognition_network(x=recog_input, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_recog = sampler(mu_recog, log_sigma_recog, deterministic=deterministic, input_log=True) prior_input = T.concatenate([feature_vec, y_hat_init], axis=1) prinlen = feature_vec_len + num_targets mu_prior, log_sigma_prior = prior_network(x=prior_input, n_input=prinlen, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=True, input_log=True) if test: geninputs = [T.concatenate([z_prior, feature_vec], axis=1)] if deterministic == False: for _ in range(500): geninputs.append(T.concatenate([sampler(mu_prior, log_sigma_prior, deterministic=False, use_noise=True), feature_vec], axis=1)) y_hat, logpy_z = generation(geninputs, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_init = 0.5 * (y_hat + y_hat_init) # y_hat_init = y_hat else: gen_inp = [T.concatenate([z_recog, feature_vec], axis=1)] y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) logpy_z = (logpy_xz_init + logpy_z) / 2. KLD = kl_weight * compute_KLD_old(mu_recog, log_sigma_recog, mu_prior, log_sigma_prior) entropy_weight = T.nnet.sigmoid(-kl_weight) entropy_weight = T.switch(entropy_weight > 0, entropy_weight, 1) # logpy_z *= 1 - T.nnet.sigmoid(kl_weight) return y_hat_init, logpy_z, KLD, y_hat
def build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200): logger.info( 'build VAE recognition network using conditional modeling: q(z|x,y)') y_as_float = T.cast(y, 'float32') drop_prob = float(config['dropprob']) if 'dropprob' in config else 0 logger.info('drop out probability: %d', drop_prob) if test == False or True: mask = T.cast( srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape), 'float32') # feature_vec *= mask recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1) logpy_xz_init = cross_entropy_loss(y_hat_init, y) # recognition network q(z|x,y) #sampling z from recognition mu_recog, log_sigma_recog = recognition_network(x=recog_input, n_input=feature_vec_len + num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_recog = sampler(mu_recog, log_sigma_recog, deterministic=deterministic, input_log=True) prior_input = T.concatenate([feature_vec, y_hat_init], axis=1) prinlen = feature_vec_len + num_targets mu_prior, log_sigma_prior = prior_network(x=prior_input, n_input=prinlen, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=True, input_log=True) if test: geninputs = [T.concatenate([z_prior, feature_vec], axis=1)] if deterministic == False: for _ in range(500): geninputs.append( T.concatenate([ sampler(mu_prior, log_sigma_prior, deterministic=False, use_noise=True), feature_vec ], axis=1)) y_hat, logpy_z = generation(geninputs, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_init = 0.5 * (y_hat + y_hat_init) # y_hat_init = y_hat else: gen_inp = [T.concatenate([z_recog, feature_vec], axis=1)] y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) logpy_z = (logpy_xz_init + logpy_z) / 2. KLD = kl_weight * compute_KLD_old(mu_recog, log_sigma_recog, mu_prior, log_sigma_prior) entropy_weight = T.nnet.sigmoid(-kl_weight) entropy_weight = T.switch(entropy_weight > 0, entropy_weight, 1) # logpy_z *= 1 - T.nnet.sigmoid(kl_weight) return y_hat_init, logpy_z, KLD, y_hat
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False ): hidden_size = config['hidden_units'].split() use_highway = str_to_bool(config['use_highway']) if 'use_highway' in config else False use_gaus = str_to_bool(config['use_gaus']) if 'use_gaus' in config else False use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0 use_noise = str_to_bool(config['use_noise']) if 'use_noise' in config else False use_vae=str_to_bool(config['use_vae']) if 'use_vae' in config else False hu_decoder = int(config['hu_decoder']) if 'hu_decoder' in config else hidden_size logger.info('use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size) init_with_type = str_to_bool(config['init_with_type']) if 'init_with_type' in config else False y = T.matrix('targets', dtype='int32') drop_prob = float(config['dropout']) if 'dropout' in config else 0 #build the feature vector with one model, e.g., with cnn or mean or lstm feature_vec, feature_vec_len = build_feature_vec(fea2obj, config) #drop out if drop_prob > 0: mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32') if test: feature_vec *= (1 - drop_prob) else: feature_vec *= mask #Highway network if use_highway: g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp') t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp') initialize([g_mlp, t_mlp]) t = t_mlp.apply(feature_vec) z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec feature_vec = z #MLP(s) logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size)) if len(hidden_size) > 1: #2 MLP on feature fector mlp = MLP(activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = int(hidden_size[1]) else: hidden_size = int(hidden_size[0]) mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = hidden_size #compute y_hat initial guess hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets) typemfile = None if init_with_type: typemfile = config['dsdir'] + '/_typematrix.npy' #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy' initialize_lasthid(hidden_to_output, typemfile) # initialize([hidden_to_output]) y_hat_init = Logistic().apply(hidden_to_output.apply(before_out)) y_hat_init.name='y_hat_init' y_hat_init = debug_print(y_hat_init, 'yhat_init', False) logpy_xz_init = cross_entropy_loss(y_hat_init, y) logpy_xz = logpy_xz_init y_hat_recog = y_hat_init y_hat = y_hat_init KLD = 0 if use_gaus: if use_vae: logger.info('using VAE') vae_conditional=str_to_bool(config['vae_cond']) if vae_conditional: y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) else: y_hat, logpy_xz, KLD = build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) y_hat_recog = y_hat else: if use_rec: logger.info('Not using VAE... but using recursion') prior_in = T.concatenate([feature_vec, y_hat_init], axis=1) mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat = (y_hat + y_hat_init) / 2. logpy_xz = (logpy_xz + logpy_xz_init) / 2. else: prior_in = T.concatenate([feature_vec], axis=1) mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_recog = y_hat y_hat = debug_print(y_hat, 'y_hat', False) pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)]) max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False) pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type]) mean_cross = T.mean(logpy_xz) mean_kld = T.mean(KLD) cost = mean_kld + mean_cross cost.name = 'cost'; mean_kld.name = 'kld'; mean_cross.name = 'cross_entropy_loss'; pat1.name = 'p@1'; pat1_recog.name = 'p@1_recog' misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
elif 'ch_men_cnn' in nn_type: x_seqw, x_let, y = model.inputs xin = [x_seqw, x_let] wlev = True elif 'ch_men_cnn' in nn_type: x_seqw, x_let, y = model.inputs xin = [x_seqw, x_let] wlev = True elif 'w_lev' in nn_type: x_mnt, y = model.inputs xin = [x_mnt] else: x_let, y, x_emb = model.inputs xin = [x_let, x_emb] get_mlp_out = theano.function(xin, mlp_output) edev, etest = get_entity_metadata(hdf5_file, feature_name='letters') print len(edev), len(etest) logger.info('Starting to apply on test inputs') applypredict(get_mlp_out, test_stream, etest, num_samples_test, batch_size, sys.argv[1] + '.mlpouts', ix_to_target, len(xin), wlev)
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False): hidden_size = config['hidden_units'].split() use_highway = str_to_bool( config['use_highway']) if 'use_highway' in config else False use_gaus = str_to_bool( config['use_gaus']) if 'use_gaus' in config else False use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0 use_noise = str_to_bool( config['use_noise']) if 'use_noise' in config else False use_vae = str_to_bool(config['use_vae']) if 'use_vae' in config else False hu_decoder = int( config['hu_decoder']) if 'hu_decoder' in config else hidden_size logger.info( 'use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size) init_with_type = str_to_bool( config['init_with_type']) if 'init_with_type' in config else False y = T.matrix('targets', dtype='int32') drop_prob = float(config['dropout']) if 'dropout' in config else 0 #build the feature vector with one model, e.g., with cnn or mean or lstm feature_vec, feature_vec_len = build_feature_vec(fea2obj, config) #drop out if drop_prob > 0: mask = T.cast( srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape), 'float32') if test: feature_vec *= (1 - drop_prob) else: feature_vec *= mask #Highway network if use_highway: g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp') t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp') initialize([g_mlp, t_mlp]) t = t_mlp.apply(feature_vec) z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec feature_vec = z #MLP(s) logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size)) if len(hidden_size) > 1: #2 MLP on feature fector mlp = MLP( activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = int(hidden_size[1]) else: hidden_size = int(hidden_size[0]) mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp') initialize([mlp]) before_out = mlp.apply(feature_vec) last_hidden_size = hidden_size #compute y_hat initial guess hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets) typemfile = None if init_with_type: typemfile = config['dsdir'] + '/_typematrix.npy' #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy' initialize_lasthid(hidden_to_output, typemfile) # initialize([hidden_to_output]) y_hat_init = Logistic().apply(hidden_to_output.apply(before_out)) y_hat_init.name = 'y_hat_init' y_hat_init = debug_print(y_hat_init, 'yhat_init', False) logpy_xz_init = cross_entropy_loss(y_hat_init, y) logpy_xz = logpy_xz_init y_hat_recog = y_hat_init y_hat = y_hat_init KLD = 0 if use_gaus: if use_vae: logger.info('using VAE') vae_conditional = str_to_bool(config['vae_cond']) if vae_conditional: y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal( kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) else: y_hat, logpy_xz, KLD = build_vae_basic( kl_weight, feature_vec, feature_vec_len, config, y, test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder) y_hat_recog = y_hat else: if use_rec: logger.info('Not using VAE... but using recursion') prior_in = T.concatenate([feature_vec, y_hat_init], axis=1) mu_prior, log_sigma_prior = prior_network( x=prior_in, n_input=feature_vec_len + num_targets, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat = (y_hat + y_hat_init) / 2. logpy_xz = (logpy_xz + logpy_xz_init) / 2. else: prior_in = T.concatenate([feature_vec], axis=1) mu_prior, log_sigma_prior = prior_network( x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z) z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise) zl = [T.concatenate([z_prior, feature_vec], axis=1)] y_hat, logpy_xz = generation(zl, n_latent=n_latent_z + feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y) y_hat_recog = y_hat y_hat = debug_print(y_hat, 'y_hat', False) pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)]) max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False) pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type]) mean_cross = T.mean(logpy_xz) mean_kld = T.mean(KLD) cost = mean_kld + mean_cross cost.name = 'cost' mean_kld.name = 'kld' mean_cross.name = 'cross_entropy_loss' pat1.name = 'p@1' pat1_recog.name = 'p@1_recog' misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5)) misclassify_rate.name = 'error_rate' return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
wlev = False if 'all' in nn_type: x_seqwords, x_let, y, x_emb = model.inputs xin = [x_let, x_emb, x_seqwords] elif 'ch_only' in nn_type or 'ch_bilstm' in nn_type: x_let, y = model.inputs xin = [x_let] elif 'ch_men_cnn' in nn_type: x_seqw, x_let, y = model.inputs xin = [x_seqw, x_let] wlev = True elif 'ch_men_cnn' in nn_type: x_seqw, x_let, y = model.inputs xin = [x_seqw, x_let] wlev = True elif 'w_lev' in nn_type: x_mnt, y = model.inputs xin = [x_mnt] else: x_let, y, x_emb = model.inputs xin = [x_let, x_emb] get_mlp_out = theano.function(xin, mlp_output) edev, etest = get_entity_metadata(hdf5_file, feature_name='letters') print len(edev), len(etest) logger.info('Starting to apply on test inputs') applypredict(get_mlp_out, test_stream, etest, num_samples_test, batch_size, sys.argv[1] + '.mlpouts', ix_to_target, len(xin), wlev)