Beispiel #1
0
def get_comb_stream(fea2obj,
                    which_set,
                    batch_size=None,
                    shuffle=True,
                    num_examples=None):
    streams = []
    for fea in fea2obj:
        obj = fea2obj[fea]
        dataset = H5PYDataset(obj.fuelfile,
                              which_sets=(which_set, ),
                              load_in_memory=True)
        if batch_size == None: batch_size = dataset.num_examples
        if num_examples == None: num_examples = dataset.num_examples
        if shuffle:
            iterschema = ShuffledScheme(examples=num_examples,
                                        batch_size=batch_size,
                                        rng=numpy.random.RandomState(seed))
        else:
            iterschema = SequentialScheme(examples=num_examples,
                                          batch_size=batch_size)
        stream = DataStream(dataset=dataset, iteration_scheme=iterschema)
        if fea in seq_features:
            stream = CutInput(stream, obj.max_len)
            if obj.rec == True:
                logger.info('transforming data for recursive input')
                stream = LettersTransposer(
                    stream, which_sources=fea
                )  # Required because Recurrent last_hid receive as input [sequence, batch,# features]
        streams.append(stream)
    stream = Merge(streams, tuple(fea2obj.keys()))
    return stream, num_examples
Beispiel #2
0
    def build_model(self, x, config):
        logger.info('building %s model for: %s ', self.nn_model, self.name)
        vocabsize = self.get_vocab_size()
        logger.info('%s vocab size is: %d', self.name, vocabsize)
        self.embeddings, self.dim_emb = self.get_embeddings() 
        if self.tune_tune:
            logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
#             add_role(lookup.W, WEIGHT)
            lookup.W.name = 'lt.W'
        else:
            logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
        lookup.name = self.name + 'lookuptable'
        lookup.W.set_value(self.embeddings)
        xemb = lookup.apply(x)
        xemb = debug_print(xemb, 'xemb', False)
        if 'cnn' in self.nn_model:
            logger.info('CNN')
            feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name)
        elif self.nn_model == 'lstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name)
        elif self.nn_model == 'bilstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name)
        elif self.nn_model == 'rnn':
            feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name)
        elif self.nn_model == 'ff':
            feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config)
        elif self.nn_model == 'mean':
            feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config)
        return feature_vec, feature_vec_len
Beispiel #3
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Beispiel #4
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Beispiel #5
0
def generation_simple(z_list, n_latent, n_out, y):
    logger.info('generate output without MLP')
    hid_to_out = Linear(name='hidDecoder_to_output', input_dim=n_latent, output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for z in z_list:
        lin_out = hid_to_out.apply(z)
        y_hat = mysigmoid.apply(lin_out) #reconstructed x
        logpy_xz = -cross_entropy_loss(y_hat, y)
        agg_logpy_xz += logpy_xz
        agg_y_hat += y_hat
    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Beispiel #6
0
def build_vae_basic(kl_weight,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=False,
                    deterministic=False,
                    num_targets=102,
                    n_latent_z=50,
                    hidden_size=400,
                    hu_decoder=200):
    logger.info('build VAE recognition network using basic prior: p(z)')
    y_as_float = T.cast(y, 'float32')
    drop_prob = float(config['dropprob']) if 'dropprob' in config else 0
    mask = T.cast(srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape),
                  'float32')
    KLD = 0
    if test:
        gen_inp = []
        for _ in range(10):
            z_sampled = srng.normal([feature_vec.shape[0], n_latent_z])
            #z_sampled = T.cast(srng.binomial(n=1, p=0, size=[feature_vec.shape[0], n_latent_z]), 'float32')
            gen_inp.append(T.concatenate([z_sampled, feature_vec], axis=1))
    else:
        recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1)
        mu_recog, log_sigma_recog = recognition_network(
            x=recog_input,
            n_input=feature_vec_len + num_targets,
            hu_encoder=hidden_size,
            n_latent=n_latent_z)
        z_sampled = sampler(mu_recog,
                            log_sigma_recog,
                            deterministic=deterministic,
                            use_noise=True,
                            input_log=True)
        gen_inp = [T.concatenate([z_sampled, feature_vec], axis=1)]
        KLD = kl_weight * -0.5 * T.sum(
            1 + log_sigma_recog - mu_recog**2 - T.exp(log_sigma_recog), axis=1)
    y_hat, logpy_z = generation(gen_inp,
                                n_latent=n_latent_z + feature_vec_len,
                                hu_decoder=hu_decoder,
                                n_out=num_targets,
                                y=y)
    #logpy_z *= 1 - T.nnet.sigmoid(kl_weight)
    return y_hat, logpy_z, KLD
Beispiel #7
0
def generation_simple(z_list, n_latent, n_out, y):
    logger.info('generate output without MLP')
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=n_latent,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for z in z_list:
        lin_out = hid_to_out.apply(z)
        y_hat = mysigmoid.apply(lin_out)  #reconstructed x
        logpy_xz = -cross_entropy_loss(y_hat, y)
        agg_logpy_xz += logpy_xz
        agg_y_hat += y_hat
    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Beispiel #8
0
def sampler(mu, log_sigma, deterministic=False, use_noise=True, input_log=False):
    log_sigma = debug_print(log_sigma, 'log_sigma', False)
    logger.info('deterministic: %s --- use noise: %s', deterministic, use_noise)
    if deterministic and use_noise:
        return mu
    if deterministic:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
    eps = srng.normal(size=mu.shape, std=1)
    # Reparametrize
    if use_noise:
        if input_log:
            return mu + T.exp(0.5 * log_sigma) * eps
        else: 
            return mu + log_sigma * eps
    else:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
Beispiel #9
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(mlp1.apply(z))) #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat
    
    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Beispiel #10
0
 def build_model(self, x, config):
     logger.info('building %s model for: %s ', self.nn_model, self.name)
     vocabsize = self.get_vocab_size()
     logger.info('%s vocab size is: %d', self.name, vocabsize)
     self.embeddings, self.dim_emb = self.get_embeddings()
     if self.tune_tune:
         logger.info('%s lookuptable with size (%d, %d) will be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
         #             add_role(lookup.W, WEIGHT)
         lookup.W.name = 'lt.W'
     else:
         logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
     lookup.name = self.name + 'lookuptable'
     lookup.W.set_value(self.embeddings)
     xemb = lookup.apply(x)
     xemb = debug_print(xemb, 'xemb', False)
     if 'cnn' in self.nn_model:
         logger.info('CNN')
         feature_vec, feature_vec_len = create_cnn_general(
             xemb, self.dim_emb, self.max_len, config, self.name)
     elif self.nn_model == 'lstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    False, config,
                                                    self.name)
     elif self.nn_model == 'bilstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    True, config, self.name)
     elif self.nn_model == 'rnn':
         feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb,
                                                   config, self.name)
     elif self.nn_model == 'ff':
         feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb,
                                                  self.max_len, config)
     elif self.nn_model == 'mean':
         feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb,
                                                    self.max_len, config)
     return feature_vec, feature_vec_len
Beispiel #11
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input,
                hu_encoder)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_input, hu_encoder],
               name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu',
                  input_dim=hu_encoder,
                  output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma',
                  input_dim=hu_encoder,
                  output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Beispiel #12
0
def get_comb_stream(fea2obj, which_set, batch_size=None, shuffle=True, num_examples=None):
    streams = []
    for fea in fea2obj:
        obj = fea2obj[fea]
        dataset = H5PYDataset(obj.fuelfile, which_sets=(which_set,),load_in_memory=True)
        if batch_size == None: batch_size = dataset.num_examples
        if num_examples == None: num_examples = dataset.num_examples
        if shuffle: 
            iterschema = ShuffledScheme(examples=num_examples, batch_size=batch_size, rng=numpy.random.RandomState(seed))
        else: 
            iterschema = SequentialScheme(examples=num_examples, batch_size=batch_size)
        stream = DataStream(dataset=dataset, iteration_scheme=iterschema)
        if fea in seq_features:
            stream = CutInput(stream, obj.max_len)
            if obj.rec == True:
                logger.info('transforming data for recursive input')
                stream = LettersTransposer(stream, which_sources=fea)# Required because Recurrent last_hid receive as input [sequence, batch,# features]
        streams.append(stream)
    stream = Merge(streams, tuple(fea2obj.keys()))
    return stream, num_examples
Beispiel #13
0
def build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200):
    logger.info('build VAE recognition network using basic prior: p(z)')
    y_as_float = T.cast(y, 'float32')
    drop_prob = float(config['dropprob']) if 'dropprob' in config else 0
    mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32')
    KLD=0
    if test:
        gen_inp = []
        for _ in range(10):
            z_sampled = srng.normal([feature_vec.shape[0], n_latent_z])
            #z_sampled = T.cast(srng.binomial(n=1, p=0, size=[feature_vec.shape[0], n_latent_z]), 'float32')
            gen_inp.append(T.concatenate([z_sampled, feature_vec], axis=1))
    else:
        recog_input = T.concatenate([feature_vec*mask, y_as_float], axis=1)
        mu_recog, log_sigma_recog = recognition_network(x=recog_input, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z)
        z_sampled = sampler(mu_recog, log_sigma_recog, deterministic=deterministic,use_noise=True, input_log=True)
        gen_inp = [T.concatenate([z_sampled, feature_vec], axis=1)]
        KLD =  kl_weight * -0.5 * T.sum(1 + log_sigma_recog - mu_recog**2 - T.exp(log_sigma_recog), axis=1)
    y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
    #logpy_z *= 1 - T.nnet.sigmoid(kl_weight)
    return y_hat, logpy_z, KLD
Beispiel #14
0
def sampler(mu,
            log_sigma,
            deterministic=False,
            use_noise=True,
            input_log=False):
    log_sigma = debug_print(log_sigma, 'log_sigma', False)
    logger.info('deterministic: %s --- use noise: %s', deterministic,
                use_noise)
    if deterministic and use_noise:
        return mu
    if deterministic:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
    eps = srng.normal(size=mu.shape, std=1)
    # Reparametrize
    if use_noise:
        if input_log:
            return mu + T.exp(0.5 * log_sigma) * eps
        else:
            return mu + log_sigma * eps
    else:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
Beispiel #15
0
def build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y,
        test=False, deterministic=False, num_targets=102, n_latent_z=50, hidden_size=400, hu_decoder=200):
    logger.info('build VAE recognition network using conditional modeling: q(z|x,y)')
    y_as_float = T.cast(y, 'float32')
    drop_prob = float(config['dropprob']) if 'dropprob' in config else 0
    logger.info('drop out probability: %d', drop_prob)
    if test == False or True:
        mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32')
#         feature_vec *= mask
    recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    # recognition network q(z|x,y) #sampling z from recognition
    mu_recog, log_sigma_recog = recognition_network(x=recog_input, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z)
    z_recog = sampler(mu_recog, log_sigma_recog, deterministic=deterministic, input_log=True)
    
    prior_input = T.concatenate([feature_vec, y_hat_init], axis=1)
    prinlen = feature_vec_len + num_targets
    mu_prior, log_sigma_prior = prior_network(x=prior_input, n_input=prinlen, hu_encoder=hidden_size, n_latent=n_latent_z)
    z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=True, input_log=True)
    
    if test:
        geninputs = [T.concatenate([z_prior, feature_vec], axis=1)]
        if deterministic == False:
            for _ in range(500):
                geninputs.append(T.concatenate([sampler(mu_prior, log_sigma_prior, deterministic=False, use_noise=True), feature_vec], axis=1))
        y_hat, logpy_z = generation(geninputs, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
        y_hat_init = 0.5 * (y_hat + y_hat_init)
#         y_hat_init = y_hat 
    else:
        gen_inp = [T.concatenate([z_recog, feature_vec], axis=1)]
        y_hat, logpy_z = generation(gen_inp, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
    logpy_z = (logpy_xz_init + logpy_z) / 2.
    KLD = kl_weight * compute_KLD_old(mu_recog, log_sigma_recog, mu_prior, log_sigma_prior)
    entropy_weight = T.nnet.sigmoid(-kl_weight)
    entropy_weight = T.switch(entropy_weight > 0, entropy_weight, 1)
#     logpy_z *= 1 - T.nnet.sigmoid(kl_weight)
    return y_hat_init, logpy_z, KLD, y_hat
Beispiel #16
0
def build_vae_conditoinal(kl_weight,
                          entropy_weight,
                          y_hat_init,
                          feature_vec,
                          feature_vec_len,
                          config,
                          y,
                          test=False,
                          deterministic=False,
                          num_targets=102,
                          n_latent_z=50,
                          hidden_size=400,
                          hu_decoder=200):
    logger.info(
        'build VAE recognition network using conditional modeling: q(z|x,y)')
    y_as_float = T.cast(y, 'float32')
    drop_prob = float(config['dropprob']) if 'dropprob' in config else 0
    logger.info('drop out probability: %d', drop_prob)
    if test == False or True:
        mask = T.cast(
            srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape),
            'float32')
#         feature_vec *= mask
    recog_input = T.concatenate([feature_vec * mask, y_as_float], axis=1)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    # recognition network q(z|x,y) #sampling z from recognition
    mu_recog, log_sigma_recog = recognition_network(x=recog_input,
                                                    n_input=feature_vec_len +
                                                    num_targets,
                                                    hu_encoder=hidden_size,
                                                    n_latent=n_latent_z)
    z_recog = sampler(mu_recog,
                      log_sigma_recog,
                      deterministic=deterministic,
                      input_log=True)

    prior_input = T.concatenate([feature_vec, y_hat_init], axis=1)
    prinlen = feature_vec_len + num_targets
    mu_prior, log_sigma_prior = prior_network(x=prior_input,
                                              n_input=prinlen,
                                              hu_encoder=hidden_size,
                                              n_latent=n_latent_z)
    z_prior = sampler(mu_prior,
                      log_sigma_prior,
                      deterministic=deterministic,
                      use_noise=True,
                      input_log=True)

    if test:
        geninputs = [T.concatenate([z_prior, feature_vec], axis=1)]
        if deterministic == False:
            for _ in range(500):
                geninputs.append(
                    T.concatenate([
                        sampler(mu_prior,
                                log_sigma_prior,
                                deterministic=False,
                                use_noise=True), feature_vec
                    ],
                                  axis=1))
        y_hat, logpy_z = generation(geninputs,
                                    n_latent=n_latent_z + feature_vec_len,
                                    hu_decoder=hu_decoder,
                                    n_out=num_targets,
                                    y=y)
        y_hat_init = 0.5 * (y_hat + y_hat_init)


#         y_hat_init = y_hat
    else:
        gen_inp = [T.concatenate([z_recog, feature_vec], axis=1)]
        y_hat, logpy_z = generation(gen_inp,
                                    n_latent=n_latent_z + feature_vec_len,
                                    hu_decoder=hu_decoder,
                                    n_out=num_targets,
                                    y=y)
    logpy_z = (logpy_xz_init + logpy_z) / 2.
    KLD = kl_weight * compute_KLD_old(mu_recog, log_sigma_recog, mu_prior,
                                      log_sigma_prior)
    entropy_weight = T.nnet.sigmoid(-kl_weight)
    entropy_weight = T.switch(entropy_weight > 0, entropy_weight, 1)
    #     logpy_z *= 1 - T.nnet.sigmoid(kl_weight)
    return y_hat_init, logpy_z, KLD, y_hat
Beispiel #17
0
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False ):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(config['use_gaus']) if 'use_gaus' in config else False 
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(config['use_noise']) if 'use_noise' in config else False
    use_vae=str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info('use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size)
    init_with_type = str_to_bool(config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')
    
    drop_prob = float(config['dropout']) if 'dropout' in config else 0
    
    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)
    
    #drop out
    if drop_prob > 0:
        mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask
            

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp')
        t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z
        
    #MLP(s)         
    logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector    
        mlp = MLP(activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

        
    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets)
    
    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'
        
    initialize_lasthid(hidden_to_output, typemfile)
#         initialize([hidden_to_output])
    
    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name='y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init  
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0
    
    if use_gaus:     
        if use_vae:
            logger.info('using VAE')
            vae_conditional=str_to_bool(config['vae_cond']) 
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y,
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, 
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
                y_hat = (y_hat + y_hat_init) / 2. 
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
            
            y_hat_recog = y_hat
                

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross 
    cost.name = 'cost'; mean_kld.name = 'kld'; mean_cross.name = 'cross_entropy_loss'; pat1.name = 'p@1'; pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
Beispiel #18
0
elif 'ch_men_cnn' in nn_type:
    x_seqw, x_let, y = model.inputs
    xin = [x_seqw, x_let]
    wlev = True
elif 'ch_men_cnn' in nn_type:
    x_seqw, x_let, y = model.inputs
    xin = [x_seqw, x_let]
    wlev = True
elif 'w_lev' in nn_type: 
    x_mnt, y = model.inputs
    xin = [x_mnt]
else:
    x_let, y, x_emb = model.inputs
    xin = [x_let, x_emb]

get_mlp_out = theano.function(xin, mlp_output)

edev, etest = get_entity_metadata(hdf5_file, feature_name='letters')
print len(edev), len(etest)

logger.info('Starting to apply on test inputs')
applypredict(get_mlp_out, test_stream, etest, num_samples_test, batch_size, sys.argv[1] + '.mlpouts', ix_to_target, len(xin), wlev)








Beispiel #19
0
def build_model_new(fea2obj,
                    num_targets,
                    config,
                    kl_weight,
                    entropy_weight,
                    deterministic=False,
                    test=False):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(
        config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(
        config['use_gaus']) if 'use_gaus' in config else False
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(
        config['use_noise']) if 'use_noise' in config else False
    use_vae = str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(
        config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info(
        'use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s',
        use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z,
        hu_decoder, hidden_size)
    init_with_type = str_to_bool(
        config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')

    drop_prob = float(config['dropout']) if 'dropout' in config else 0

    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)

    #drop out
    if drop_prob > 0:
        mask = T.cast(
            srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape),
            'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='g_mlp')
        t_mlp = MLP(activations=[Logistic()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z

    #MLP(s)
    logger.info('feature vec length = %s and hidden layer units = %s',
                feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector
        mlp = MLP(
            activations=[Rectifier(), Rectifier()],
            dims=[feature_vec_len,
                  int(hidden_size[0]),
                  int(hidden_size[1])],
            name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()],
                  dims=[feature_vec_len, hidden_size],
                  name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=last_hidden_size,
                              output_dim=num_targets)

    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'

    initialize_lasthid(hidden_to_output, typemfile)
    #         initialize([hidden_to_output])

    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name = 'y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0

    if use_gaus:
        if use_vae:
            logger.info('using VAE')
            vae_conditional = str_to_bool(config['vae_cond'])
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(
                    kl_weight,
                    entropy_weight,
                    y_hat_init,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(
                    kl_weight,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len + num_targets,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)
                y_hat = (y_hat + y_hat_init) / 2.
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)

            y_hat_recog = y_hat

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross
    cost.name = 'cost'
    mean_kld.name = 'kld'
    mean_cross.name = 'cross_entropy_loss'
    pat1.name = 'p@1'
    pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
wlev = False
if 'all' in nn_type:
    x_seqwords, x_let, y, x_emb = model.inputs
    xin = [x_let, x_emb, x_seqwords]
elif 'ch_only' in nn_type or 'ch_bilstm' in nn_type:
    x_let, y = model.inputs
    xin = [x_let]
elif 'ch_men_cnn' in nn_type:
    x_seqw, x_let, y = model.inputs
    xin = [x_seqw, x_let]
    wlev = True
elif 'ch_men_cnn' in nn_type:
    x_seqw, x_let, y = model.inputs
    xin = [x_seqw, x_let]
    wlev = True
elif 'w_lev' in nn_type:
    x_mnt, y = model.inputs
    xin = [x_mnt]
else:
    x_let, y, x_emb = model.inputs
    xin = [x_let, x_emb]

get_mlp_out = theano.function(xin, mlp_output)

edev, etest = get_entity_metadata(hdf5_file, feature_name='letters')
print len(edev), len(etest)

logger.info('Starting to apply on test inputs')
applypredict(get_mlp_out, test_stream, etest, num_samples_test, batch_size,
             sys.argv[1] + '.mlpouts', ix_to_target, len(xin), wlev)