Exemple #1
0
def create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref):
    '''
        One layer convolution with different filter-sizes and maxpooling
    '''
    k = int(config[pref + '_kpool'])
    filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()]
    print filter_width_list
    num_filters = int(config[pref+'_num_filters'])
    #num_filters /= len(filter_width_list)
    totfilters = 0
    print input_len, embedding_size, num_filters
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1 #39
        conv = Convolutional(
            image_size=(input_len, embedding_size),
            filter_size=(fw, embedding_size),
            num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw),
            num_channels=1
        )
        totfilters += conv.num_filters * k
#         initialize2(conv, num_feature_map)
        initialize([conv])
        conv.name = pref + 'conv_' + str(fw)
        layer0_input = debug_print(layer0_input, 'inp', False)
        convout = conv.apply(layer0_input)
        convout = debug_print(convout, 'convout', False)
        kpoolout = KmaxPooling(convout, k).apply().flatten(2)
        kpoolout = debug_print(kpoolout, 'poolout', False)
        
        if i == 0:
            outpools = kpoolout
        else:
            outpools = T.concatenate([outpools, kpoolout], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
Exemple #2
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(
        y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Exemple #3
0
 def __init__(self, input, n_in, n_out):
     """ Initialize the parameters of the logistic regression
     """
     # compute vector of class-membership probabilities in symbolic form
     self.s_y_given_x = T.nnet.sigmoid(input)
     self.s_y_given_x = debug_print(self.s_y_given_x, 'scores', False)
     super(SigmoidLoss, self).__init__(input, n_in, n_out)
Exemple #4
0
    def build_model(self, x, config):
        logger.info('building %s model for: %s ', self.nn_model, self.name)
        vocabsize = self.get_vocab_size()
        logger.info('%s vocab size is: %d', self.name, vocabsize)
        self.embeddings, self.dim_emb = self.get_embeddings() 
        if self.tune_tune:
            logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
#             add_role(lookup.W, WEIGHT)
            lookup.W.name = 'lt.W'
        else:
            logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
        lookup.name = self.name + 'lookuptable'
        lookup.W.set_value(self.embeddings)
        xemb = lookup.apply(x)
        xemb = debug_print(xemb, 'xemb', False)
        if 'cnn' in self.nn_model:
            logger.info('CNN')
            feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name)
        elif self.nn_model == 'lstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name)
        elif self.nn_model == 'bilstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name)
        elif self.nn_model == 'rnn':
            feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name)
        elif self.nn_model == 'ff':
            feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config)
        elif self.nn_model == 'mean':
            feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config)
        return feature_vec, feature_vec_len
Exemple #5
0
def create_rnn(xemb, embedding_size, config, pref):
    hiddensize = int(config[pref + '_h_rnn'])
    for i in range(1):
        xemb = rnn_layer(embedding_size, xemb, hiddensize, 1, pref)
        embedding_size = hiddensize
        xemb.name = 'rnn' + str(i) + pref
    h = xemb
    lstm_outsize = hiddensize
    h = debug_print(h[h.shape[0] - 1], 'rnn', False)
    return h, lstm_outsize
Exemple #6
0
def create_rnn(xemb, embedding_size, config, pref):
    hiddensize = int(config[pref + '_h_rnn'])
    for i in range(1):
        xemb = rnn_layer(embedding_size, xemb, hiddensize, 1, pref)
        embedding_size = hiddensize
        xemb.name = 'rnn' + str(i) + pref
    h = xemb
    lstm_outsize = hiddensize
    h = debug_print(h[h.shape[0] - 1], 'rnn', False)
    return h, lstm_outsize
Exemple #7
0
def create_cnn_general(xemb, embedding_size, input_len, config, pref):
    numConvLayers = int(config[pref + '_convlayers'])
    xemb = debug_print(xemb, 'afterLookup', False)
    layer0_input = xemb.flatten().reshape((xemb.shape[0], 1, input_len, embedding_size))
    if numConvLayers == 1:
        # return create_kim_cnn(layer0_input, embedding_size, input_len, config, pref)
        return create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref)
    elif numConvLayers == -1:
        return create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config, pref)
    else:        
        return create_yy_cnn(numConvLayers, layer0_input, embedding_size, input_len, config, pref)
Exemple #8
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Exemple #9
0
def create_KmaxPooling_cnn(layer0_input, embedding_size, input_len, config,
                           pref):
    '''
        One layer convolution with different filter-sizes and maxpooling
    '''
    k = int(config[pref + '_kpool'])
    filter_width_list = [
        int(fw) for fw in config[pref + '_filterwidth'].split()
    ]
    print filter_width_list
    num_filters = int(config[pref + '_num_filters'])
    #num_filters /= len(filter_width_list)
    totfilters = 0
    print input_len, embedding_size, num_filters
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1  #39
        conv = Convolutional(image_size=(input_len, embedding_size),
                             filter_size=(fw, embedding_size),
                             num_filters=min(int(config[pref + '_maxfilter']),
                                             num_filters * fw),
                             num_channels=1)
        totfilters += conv.num_filters * k
        #         initialize2(conv, num_feature_map)
        initialize([conv])
        conv.name = pref + 'conv_' + str(fw)
        layer0_input = debug_print(layer0_input, 'inp', False)
        convout = conv.apply(layer0_input)
        convout = debug_print(convout, 'convout', False)
        kpoolout = KmaxPooling(convout, k).apply().flatten(2)
        kpoolout = debug_print(kpoolout, 'poolout', False)

        if i == 0:
            outpools = kpoolout
        else:
            outpools = T.concatenate([outpools, kpoolout], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
Exemple #10
0
def create_cnn_general(xemb, embedding_size, input_len, config, pref):
    numConvLayers = int(config[pref + '_convlayers'])
    xemb = debug_print(xemb, 'afterLookup', False)
    layer0_input = xemb.flatten().reshape(
        (xemb.shape[0], 1, input_len, embedding_size))
    if numConvLayers == 1:
        # return create_kim_cnn(layer0_input, embedding_size, input_len, config, pref)
        return create_OLD_kim_cnn(layer0_input, embedding_size, input_len,
                                  config, pref)
    elif numConvLayers == -1:
        return create_KmaxPooling_cnn(layer0_input, embedding_size, input_len,
                                      config, pref)
    else:
        return create_yy_cnn(numConvLayers, layer0_input, embedding_size,
                             input_len, config, pref)
Exemple #11
0
def softmax_layer_old(h, y, hidden_size, num_targets, cost_fn='softmax'):
    hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = {}
    if 'softmax' in cost_fn:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    else:
        cost, updates = ranking_loss(linear_output, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    return cost, pat1, updates
Exemple #12
0
def sampler(mu, log_sigma, deterministic=False, use_noise=True, input_log=False):
    log_sigma = debug_print(log_sigma, 'log_sigma', False)
    logger.info('deterministic: %s --- use noise: %s', deterministic, use_noise)
    if deterministic and use_noise:
        return mu
    if deterministic:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
    eps = srng.normal(size=mu.shape, std=1)
    # Reparametrize
    if use_noise:
        if input_log:
            return mu + T.exp(0.5 * log_sigma) * eps
        else: 
            return mu + log_sigma * eps
    else:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
Exemple #13
0
def create_lstm(xemb, embedding_size, bidirectional, config, pref):
    hiddensize = int(config[pref + '_h_lstm'])
    inpsize = embedding_size
    if bidirectional:
        for i in range(1):
            xemb = bilstm_layer(inpsize, xemb, hiddensize, i, pref)
            xemb.name = 'bilstm' + str(i) + pref
            inpsize = hiddensize * 2
        lstm_outsize = hiddensize * 2
        h = xemb
    else:
        for i in range(1):
            xemb = lstm_layer(embedding_size, xemb, hiddensize, 1, pref)
            embedding_size = hiddensize
            xemb.name = 'lstm' + str(i) + pref
        h = xemb
        lstm_outsize = hiddensize
    h = debug_print(h[h.shape[0] - 1], 'outlstm', False)
    return h, lstm_outsize
Exemple #14
0
def create_lstm(xemb, embedding_size, bidirectional, config, pref):
    hiddensize = int(config[pref + '_h_lstm'])
    inpsize = embedding_size
    if bidirectional:
        for i in range(1):
            xemb = bilstm_layer(inpsize, xemb, hiddensize, i, pref)
            xemb.name = 'bilstm' + str(i) + pref
            inpsize = hiddensize * 2
        lstm_outsize = hiddensize * 2
        h = xemb
    else:
        for i in range(1):
            xemb = lstm_layer(embedding_size, xemb, hiddensize, 1, pref)
            embedding_size = hiddensize
            xemb.name = 'lstm' + str(i) + pref
        h = xemb
        lstm_outsize = hiddensize
    h = debug_print(h[h.shape[0] - 1], 'outlstm', False)
    return h, lstm_outsize
Exemple #15
0
 def build_model(self, x, config):
     logger.info('building %s model for: %s ', self.nn_model, self.name)
     vocabsize = self.get_vocab_size()
     logger.info('%s vocab size is: %d', self.name, vocabsize)
     self.embeddings, self.dim_emb = self.get_embeddings()
     if self.tune_tune:
         logger.info('%s lookuptable with size (%d, %d) will be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
         #             add_role(lookup.W, WEIGHT)
         lookup.W.name = 'lt.W'
     else:
         logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
     lookup.name = self.name + 'lookuptable'
     lookup.W.set_value(self.embeddings)
     xemb = lookup.apply(x)
     xemb = debug_print(xemb, 'xemb', False)
     if 'cnn' in self.nn_model:
         logger.info('CNN')
         feature_vec, feature_vec_len = create_cnn_general(
             xemb, self.dim_emb, self.max_len, config, self.name)
     elif self.nn_model == 'lstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    False, config,
                                                    self.name)
     elif self.nn_model == 'bilstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    True, config, self.name)
     elif self.nn_model == 'rnn':
         feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb,
                                                   config, self.name)
     elif self.nn_model == 'ff':
         feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb,
                                                  self.max_len, config)
     elif self.nn_model == 'mean':
         feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb,
                                                    self.max_len, config)
     return feature_vec, feature_vec_len
Exemple #16
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input,
                hu_encoder)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_input, hu_encoder],
               name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu',
                  input_dim=hu_encoder,
                  output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma',
                  input_dim=hu_encoder,
                  output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
Exemple #17
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Exemple #18
0
def softmax_layer_old(h, y, hidden_size, num_targets, cost_fn='softmax'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = {}
    if 'softmax' in cost_fn:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    else:
        cost, updates = ranking_loss(linear_output, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    return cost, pat1, updates
Exemple #19
0
def sampler(mu,
            log_sigma,
            deterministic=False,
            use_noise=True,
            input_log=False):
    log_sigma = debug_print(log_sigma, 'log_sigma', False)
    logger.info('deterministic: %s --- use noise: %s', deterministic,
                use_noise)
    if deterministic and use_noise:
        return mu
    if deterministic:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
    eps = srng.normal(size=mu.shape, std=1)
    # Reparametrize
    if use_noise:
        if input_log:
            return mu + T.exp(0.5 * log_sigma) * eps
        else:
            return mu + log_sigma * eps
    else:
        #return mu + T.exp(0.5 * log_sigma)
        return mu + log_sigma
Exemple #20
0
def build_model_new(fea2obj,
                    num_targets,
                    config,
                    kl_weight,
                    entropy_weight,
                    deterministic=False,
                    test=False):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(
        config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(
        config['use_gaus']) if 'use_gaus' in config else False
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(
        config['use_noise']) if 'use_noise' in config else False
    use_vae = str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(
        config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info(
        'use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s',
        use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z,
        hu_decoder, hidden_size)
    init_with_type = str_to_bool(
        config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')

    drop_prob = float(config['dropout']) if 'dropout' in config else 0

    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)

    #drop out
    if drop_prob > 0:
        mask = T.cast(
            srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape),
            'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='g_mlp')
        t_mlp = MLP(activations=[Logistic()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z

    #MLP(s)
    logger.info('feature vec length = %s and hidden layer units = %s',
                feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector
        mlp = MLP(
            activations=[Rectifier(), Rectifier()],
            dims=[feature_vec_len,
                  int(hidden_size[0]),
                  int(hidden_size[1])],
            name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()],
                  dims=[feature_vec_len, hidden_size],
                  name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=last_hidden_size,
                              output_dim=num_targets)

    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'

    initialize_lasthid(hidden_to_output, typemfile)
    #         initialize([hidden_to_output])

    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name = 'y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0

    if use_gaus:
        if use_vae:
            logger.info('using VAE')
            vae_conditional = str_to_bool(config['vae_cond'])
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(
                    kl_weight,
                    entropy_weight,
                    y_hat_init,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(
                    kl_weight,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len + num_targets,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)
                y_hat = (y_hat + y_hat_init) / 2.
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)

            y_hat_recog = y_hat

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross
    cost.name = 'cost'
    mean_kld.name = 'kld'
    mean_cross.name = 'cross_entropy_loss'
    pat1.name = 'p@1'
    pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
Exemple #21
0
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False ):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(config['use_gaus']) if 'use_gaus' in config else False 
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(config['use_noise']) if 'use_noise' in config else False
    use_vae=str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info('use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size)
    init_with_type = str_to_bool(config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')
    
    drop_prob = float(config['dropout']) if 'dropout' in config else 0
    
    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)
    
    #drop out
    if drop_prob > 0:
        mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask
            

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp')
        t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z
        
    #MLP(s)         
    logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector    
        mlp = MLP(activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

        
    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets)
    
    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'
        
    initialize_lasthid(hidden_to_output, typemfile)
#         initialize([hidden_to_output])
    
    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name='y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init  
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0
    
    if use_gaus:     
        if use_vae:
            logger.info('using VAE')
            vae_conditional=str_to_bool(config['vae_cond']) 
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y,
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, 
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
                y_hat = (y_hat + y_hat_init) / 2. 
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
            
            y_hat_recog = y_hat
                

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross 
    cost.name = 'cost'; mean_kld.name = 'kld'; mean_cross.name = 'cross_entropy_loss'; pat1.name = 'p@1'; pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate