def build_network(self,
                      num_labels,
                      features,
                      max_len=None,
                      hidden_units=None,
                      l2=None,
                      use_cnn=None,
                      cnn_filter_size=None,
                      cnn_pool_size=None,
                      cnn_num_filters=None,
                      cnn_filter_sizes=None,
                      embedding_size=None,
                      DEBUG=False):
        """ Build the neural network used for training.

        :param num_labels:      Number of labels to classify
        :param features:        the input features we use
        :param max_len:     Configured window-size
        :param hidden_units:    Number of units in the MLP's hiddden layer
        :returns:               The cost function, the misclassification rate
                                function, the computation graph of the cost
                                function and the prediction function
        """
        logger.info(
            'building the network, with one CNN for left and one for right')
        hidden_units = hidden_units or self._config['hidden_units']
        logger.info('#hidden units: %d', hidden_units)
        # building the feature vector from input.
        mlp_in_e1, mlp_in_e2, mlp_in_dim = self.build_feature_vector_noMention(
            features)
        logger.info('feature vector size: %d', mlp_in_dim)

        mlp = MLP(activations=[Rectifier()],
                  dims=[mlp_in_dim, hidden_units],
                  seed=self.curSeed)
        initialize([mlp])
        before_out_e1 = mlp.apply(mlp_in_e1)
        before_out_e2 = mlp.apply(mlp_in_e2)
        hidden_to_output = Linear(name='hidden_to_output',
                                  input_dim=hidden_units,
                                  output_dim=num_labels)
        initialize([hidden_to_output])
        linear_output_e1 = hidden_to_output.apply(before_out_e1)
        linear_output_e2 = hidden_to_output.apply(before_out_e2)
        linear_output_e1.name = 'linear_output_e1'
        linear_output_e2.name = 'linear_output_e2'

        y_hat_e1 = Logistic(name='logistic1').apply(linear_output_e1)
        y_hat_e2 = Logistic(name='logistic2').apply(linear_output_e2)
        y_hat_e1.name = 'y_hat_e1'
        y_hat_e2.name = 'y_hat_e2'
        y_hat_e1 = debug_print(y_hat_e1, 'y_1', DEBUG)
        return y_hat_e1, y_hat_e2, before_out_e1, before_out_e2
Exemple #2
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(
        y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Exemple #3
0
def softmax_layer_old(h, y, hidden_size, num_targets, cost_fn='softmax'):
    hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = {}
    if 'softmax' in cost_fn:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    else:
        cost, updates = ranking_loss(linear_output, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    return cost, pat1, updates
Exemple #4
0
def softmax_layer_old(h, y, hidden_size, num_targets, cost_fn='softmax'):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred],
                                     'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = {}
    if 'softmax' in cost_fn:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    else:
        cost, updates = ranking_loss(linear_output, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    return cost, pat1, updates
Exemple #5
0
def softmax_layer(h, y, hidden_size, num_targets, cost_fn='cross'):
    hidden_to_output = Linear(name='hidden_to_output', input_dim=hidden_size, output_dim=num_targets)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    y_pred = T.argmax(linear_output, axis=1)
    label_of_predicted = debug_print(y[T.arange(y.shape[0]), y_pred], 'label_of_predicted', False)
    pat1 = T.mean(label_of_predicted)
    updates = None
    if 'ranking' in cost_fn:
        cost, updates = ranking_loss(linear_output, y)
        print 'using ranking loss function!'
    else:
        y_hat = Logistic().apply(linear_output)
        y_hat.name = 'y_hat'
        cost = cross_entropy_loss(y_hat, y)
    cost.name = 'cost'
    pat1.name = 'precision@1'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(linear_output, 0.5))
    misclassify_rate.name = 'error_rate'
    return cost, pat1, updates, misclassify_rate
Exemple #6
0
def build_model_new(fea2obj,
                    num_targets,
                    config,
                    kl_weight,
                    entropy_weight,
                    deterministic=False,
                    test=False):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(
        config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(
        config['use_gaus']) if 'use_gaus' in config else False
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(
        config['use_noise']) if 'use_noise' in config else False
    use_vae = str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(
        config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info(
        'use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s',
        use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z,
        hu_decoder, hidden_size)
    init_with_type = str_to_bool(
        config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')

    drop_prob = float(config['dropout']) if 'dropout' in config else 0

    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)

    #drop out
    if drop_prob > 0:
        mask = T.cast(
            srng.binomial(n=1, p=1 - drop_prob, size=feature_vec.shape),
            'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='g_mlp')
        t_mlp = MLP(activations=[Logistic()],
                    dims=[feature_vec_len, feature_vec_len],
                    name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z

    #MLP(s)
    logger.info('feature vec length = %s and hidden layer units = %s',
                feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector
        mlp = MLP(
            activations=[Rectifier(), Rectifier()],
            dims=[feature_vec_len,
                  int(hidden_size[0]),
                  int(hidden_size[1])],
            name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()],
                  dims=[feature_vec_len, hidden_size],
                  name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=last_hidden_size,
                              output_dim=num_targets)

    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'

    initialize_lasthid(hidden_to_output, typemfile)
    #         initialize([hidden_to_output])

    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name = 'y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0

    if use_gaus:
        if use_vae:
            logger.info('using VAE')
            vae_conditional = str_to_bool(config['vae_cond'])
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(
                    kl_weight,
                    entropy_weight,
                    y_hat_init,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(
                    kl_weight,
                    feature_vec,
                    feature_vec_len,
                    config,
                    y,
                    test=test,
                    deterministic=deterministic,
                    num_targets=num_targets,
                    n_latent_z=n_latent_z,
                    hidden_size=hidden_size,
                    hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len + num_targets,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)
                y_hat = (y_hat + y_hat_init) / 2.
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(
                    x=prior_in,
                    n_input=feature_vec_len,
                    hu_encoder=hidden_size,
                    n_latent=n_latent_z)
                z_prior = sampler(mu_prior,
                                  log_sigma_prior,
                                  deterministic=deterministic,
                                  use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl,
                                             n_latent=n_latent_z +
                                             feature_vec_len,
                                             hu_decoder=hu_decoder,
                                             n_out=num_targets,
                                             y=y)

            y_hat_recog = y_hat

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross
    cost.name = 'cost'
    mean_kld.name = 'kld'
    mean_cross.name = 'cross_entropy_loss'
    pat1.name = 'p@1'
    pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate
Exemple #7
0
def build_model_new(fea2obj, num_targets, config, kl_weight, entropy_weight, deterministic=False, test=False ):
    hidden_size = config['hidden_units'].split()
    use_highway = str_to_bool(config['use_highway']) if 'use_highway' in config else False
    use_gaus = str_to_bool(config['use_gaus']) if 'use_gaus' in config else False 
    use_rec = str_to_bool(config['use_rec']) if 'use_rec' in config else True
    n_latent_z = int(config['n_latent']) if 'use_gaus' in config else 0
    use_noise = str_to_bool(config['use_noise']) if 'use_noise' in config else False
    use_vae=str_to_bool(config['use_vae']) if 'use_vae' in config else False
    hu_decoder = int(config['hu_decoder']) if 'hu_decoder' in config else hidden_size
    logger.info('use_gaus: %s, use_rec: %s, use_noise: %s, use_vae: %s, hidden_size: %s, n_latent_z: %d, hu_decoder: %s, hu_encoder: %s', use_gaus, use_rec, use_noise, use_vae, hidden_size, n_latent_z, hu_decoder, hidden_size)
    init_with_type = str_to_bool(config['init_with_type']) if 'init_with_type' in config else False
    y = T.matrix('targets', dtype='int32')
    
    drop_prob = float(config['dropout']) if 'dropout' in config else 0
    
    #build the feature vector with one model, e.g., with cnn or mean or lstm
    feature_vec, feature_vec_len = build_feature_vec(fea2obj, config)
    
    #drop out
    if drop_prob > 0:
        mask = T.cast(srng.binomial(n=1, p=1-drop_prob, size=feature_vec.shape), 'float32')
        if test:
            feature_vec *= (1 - drop_prob)
        else:
            feature_vec *= mask
            

    #Highway network
    if use_highway:
        g_mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, feature_vec_len], name='g_mlp')
        t_mlp = MLP(activations=[Logistic()], dims=[feature_vec_len, feature_vec_len], name='t_mlp')
        initialize([g_mlp, t_mlp])
        t = t_mlp.apply(feature_vec)
        z = t * g_mlp.apply(feature_vec) + (1. - t) * feature_vec
        feature_vec = z
        
    #MLP(s)         
    logger.info('feature vec length = %s and hidden layer units = %s', feature_vec_len, ' '.join(hidden_size))
    if len(hidden_size) > 1:
        #2 MLP on feature fector    
        mlp = MLP(activations=[Rectifier(), Rectifier()], dims=[feature_vec_len, int(hidden_size[0]), int(hidden_size[1])], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = int(hidden_size[1])
    else:
        hidden_size = int(hidden_size[0])
        mlp = MLP(activations=[Rectifier()], dims=[feature_vec_len, hidden_size], name='joint_mlp')
        initialize([mlp])
        before_out = mlp.apply(feature_vec)
        last_hidden_size = hidden_size

        
    #compute y_hat initial guess
    hidden_to_output = Linear(name='hidden_to_output', input_dim=last_hidden_size, output_dim=num_targets)
    
    typemfile = None
    if init_with_type:
        typemfile = config['dsdir'] + '/_typematrix.npy'
        #typemfile = config['dsdir'] + '/_typeCooccurrMatrix.npy'
        
    initialize_lasthid(hidden_to_output, typemfile)
#         initialize([hidden_to_output])
    
    y_hat_init = Logistic().apply(hidden_to_output.apply(before_out))
    y_hat_init.name='y_hat_init'
    y_hat_init = debug_print(y_hat_init, 'yhat_init', False)
    logpy_xz_init = cross_entropy_loss(y_hat_init, y)
    logpy_xz = logpy_xz_init  
    y_hat_recog = y_hat_init
    y_hat = y_hat_init
    KLD = 0
    
    if use_gaus:     
        if use_vae:
            logger.info('using VAE')
            vae_conditional=str_to_bool(config['vae_cond']) 
            if vae_conditional:
                y_hat, logpy_xz, KLD, y_hat_recog = build_vae_conditoinal(kl_weight, entropy_weight, y_hat_init, feature_vec, feature_vec_len, config, y,
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
            else:
                y_hat, logpy_xz, KLD = build_vae_basic(kl_weight, feature_vec, feature_vec_len, config, y, 
                    test=test, deterministic=deterministic, num_targets=num_targets, n_latent_z=n_latent_z, hidden_size=hidden_size, hu_decoder=hu_decoder)
                y_hat_recog = y_hat
        else:
            if use_rec:
                logger.info('Not using VAE... but using recursion')
                prior_in = T.concatenate([feature_vec, y_hat_init], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len+num_targets, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
                y_hat = (y_hat + y_hat_init) / 2. 
                logpy_xz = (logpy_xz + logpy_xz_init) / 2.
            else:
                prior_in = T.concatenate([feature_vec], axis=1)
                mu_prior, log_sigma_prior = prior_network(x=prior_in, n_input=feature_vec_len, hu_encoder=hidden_size, n_latent=n_latent_z)
                z_prior = sampler(mu_prior, log_sigma_prior, deterministic=deterministic, use_noise=use_noise)
                zl = [T.concatenate([z_prior, feature_vec], axis=1)]
                y_hat, logpy_xz = generation(zl, n_latent=n_latent_z+feature_vec_len, hu_decoder=hu_decoder, n_out=num_targets, y=y)
            
            y_hat_recog = y_hat
                

    y_hat = debug_print(y_hat, 'y_hat', False)

    pat1 = T.mean(y[T.arange(y.shape[0]), T.argmax(y_hat, axis=1)])
    max_type = debug_print(T.argmax(y_hat_recog, axis=1), 'max_type', False)
    pat1_recog = T.mean(y[T.arange(y.shape[0]), max_type])
    mean_cross = T.mean(logpy_xz)
    mean_kld = T.mean(KLD)
    cost = mean_kld + mean_cross 
    cost.name = 'cost'; mean_kld.name = 'kld'; mean_cross.name = 'cross_entropy_loss'; pat1.name = 'p@1'; pat1_recog.name = 'p@1_recog'
    misclassify_rate = MultiMisclassificationRate().apply(y, T.ge(y_hat, 0.5))
    misclassify_rate.name = 'error_rate'

    return cost, pat1, y_hat, mean_kld, mean_cross, pat1_recog, misclassify_rate