def create_encoder(opt):
    # Initialize the network
    encoder = network.Encoder(opt)
    # Init the network
    network.weights_init(encoder,
                         init_type=opt.init_type,
                         init_gain=opt.init_gain)
    print('Encoder is created!')
    return encoder
    def build_model(self):
        # Define generators and discriminators
        self.E = network.Encoder(self.e_conv_dim)
        self.G = network.Generator(self.g_conv_dim)
        for i in self.cls:
            setattr(
                self, "D_" + i,
                net.Discriminator(self.img_size, self.d_conv_dim,
                                  self.d_repeat_num, self.norm))

        # Define vgg for perceptual loss
        self.vgg = net.VGG()
        self.vgg.load_state_dict(torch.load('addings/vgg_conv.pth'))

        # Define loss
        self.criterionL1 = torch.nn.L1Loss()
        self.criterionL2 = torch.nn.MSELoss()
        self.criterionGAN = GANLoss(use_lsgan=True,
                                    tensor=torch.cuda.FloatTensor)

        # Optimizers
        self.e_optimizer = torch.optim.Adam(self.E.parameters(), self.e_lr,
                                            [self.beta1, self.beta2])
        self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr,
                                            [self.beta1, self.beta2])
        for i in self.cls:
            setattr(self, "d_" + i + "_optimizer", \
                    torch.optim.Adam(filter(lambda p: p.requires_grad, getattr(self, "D_" + i).parameters()), \
                                     self.d_lr, [self.beta1, self.beta2]))

        # Weights initialization
        self.E.apply(self.weights_init_xavier)
        self.G.apply(self.weights_init_xavier)
        for i in self.cls:
            getattr(self, "D_" + i).apply(self.weights_init_xavier)

        # Print networks
        self.print_network(self.E, 'E')
        self.print_network(self.G, 'G')
        for i in self.cls:
            self.print_network(getattr(self, "D_" + i), "D_" + i)

        if torch.cuda.is_available():
            self.E.cuda()
            self.G.cuda()
            self.vgg.cuda()
            for i in self.cls:
                getattr(self, "D_" + i).cuda()
Example #3
0
 def __init__(self, opt):
     super(TrainModel, self).__init__()
     self.isTrain = opt.isTrain
     if self.isTrain:
         self.netE = network.Encoder(opt.input_nc, opt.ngf,
                                     opt.n_downsampling)
         self.netE.apply(network.weights_init)
         self.netG = network.Decoder(opt.input_nc, opt.output_nc, opt.ngf,
                                     opt.n_downsampling)
         self.netG.apply(network.weights_init)
         self.netD = network.Discriminator(opt.input_nc, opt.ngf,
                                           opt.n_layer)
         self.netD.apply(network.weights_init)
         self.criterionGAN = nn.BCELoss()
         self.criterionKL = network.KLLoss
         self.criterionRecon = network.ReconLoss
     else:
         pass
Example #4
0
        gene_exp = dataset_list[i]['mz_exp'].transpose()
        labels = dataset_list[i]['labels']  

        # construct DataLoader list
        if cuda:
            torch_dataset = torch.utils.data.TensorDataset(
                torch.FloatTensor(gene_exp).cuda(), torch.LongTensor(labels).cuda())
        else:
            torch_dataset = torch.utils.data.TensorDataset(
                torch.FloatTensor(gene_exp), torch.LongTensor(labels))
        data_loader = torch.utils.data.DataLoader(torch_dataset, batch_size=batch_size,
                                                        shuffle=True, drop_last=True)
        batch_loader_dict[i+1] = data_loader

    # create model
    encoder = models.Encoder(num_inputs=num_inputs)
    decoder_a = models.Decoder_a(num_inputs=num_inputs)
    decoder_b = models.Decoder_b(num_inputs=num_inputs)
    discriminator = models.Discriminator(num_inputs=num_inputs)

    if cuda:
        encoder.cuda()
        decoder_a.cuda()
        decoder_b.cuda()
        discriminator.cuda()

    # training
    loss_total_list = []  # list of total loss
    loss_reconstruct_list = []
    loss_transfer_list = []
    loss_classifier_list = []
Example #5
0
print('total train models: {}; total train batches: {}'.format(
    len(train_set), len(train_loader)))

val_loader = udata.DataLoader(dataset=val_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers,
                              drop_last=True)

print('total val models: {}; total val batches: {}'.format(
    len(val_set), len(val_loader)))

import network

encoder = network.Encoder().cuda()
convrnn = network.ConvRNN3d().cuda()
decoder = network.Decoder().cuda()

NLL = torch.nn.NLLLoss()
solver = optim.Adam([
    {
        'params': encoder.parameters()
    },
    {
        'params': convrnn.parameters()
    },
    {
        'params': decoder.parameters()
    },
],
Example #6
0
def _create_model(dim_input, dim_output,
        embedding_dim, hidden_dim, alignment_dim,
        optimization_method, activation):
    """ Create a neural network to train a new model
    
    Args:
        dim_input (int)
        dim_output (int)
        embedding_dim (int): Bigger is better
            300 is a reasonable choice, according to Kann and Schütze (2016)
        hidden_dim (int): 100 achieved the best (among 50, 100, 200, 400)
            in Kann and Schütze (2016)
        alignment_dim (int)
    """
    
    input_seq = T.dmatrix('input_seq')
    output_seq = T.dmatrix('output_seq')

    h_init = theano.shared(np.zeros(hidden_dim))
    h_init_rev = theano.shared(np.zeros(hidden_dim))
    out_init = T.dvector('out_init')
    context_init = T.dvector('context_init')

    encoder = network.Encoder(dim_input, embedding_dim, hidden_dim,
        with_bidirectional=True)
    decoder = network.Decoder(dim_output, embedding_dim, hidden_dim * 2,
        with_attention=True, alignment_dim=alignment_dim)

    params = encoder.params + decoder.params
    
    ###
    # Loop for encoder
    ###
    prediction, _ = theano.scan(fn=network.Encoder.create_step_bidi(activation),
        sequences=[input_seq, input_seq[::-1]],
        outputs_info=[context_init, h_init, h_init_rev],
        non_sequences=encoder.params,
        strict=True)

    context_vec = prediction[0][-1]
    
    ###
    # Loop for decoder
    ###
    annotations = prediction[0]
    prediction, _ = theano.scan(fn=network.Decoder.create_step_attention(activation),
        outputs_info=[out_init, context_vec],
        non_sequences=[annotations] + decoder.params,
        n_steps=128,
        strict=True)

    predicted_seq = prediction[0]
    
    ###
    # Compute loss tensor
    ###
    loss = network.loss_seq_cross_entropy(output_seq, predicted_seq)
    loss.name = 'loss'
    
    ###
    # Update weights
    ###
    updating = optimization_method(loss, params)
    
    current_loss = 0.0
    
    ###
    # Realize the function
    ###
    _update = theano.function(inputs=[input_seq, output_seq,
                context_init, out_init],
                outputs=loss,
                updates=updating)

    _predict = theano.function(inputs=[input_seq, context_init, out_init],
                outputs=predicted_seq)
    
    out_init_zeros = np.zeros((dim_output,))
    context_init_zeros = np.zeros((hidden_dim * 2,))
    h_init_zeros = np.zeros((hidden_dim,))

    return (_update, _predict)
Example #7
0
def run_AA(data,
           n_archetypes,
           true_archetypal_coords=None,
           true_archetypes=None,
           method='PCHA',
           n_subsample=None,
           n_batches=40000,
           latent_noise=0.05,
           arch=[1024, 512, 256, 128],
           seed=42):
    """Runs Chen at al. 2014 on input data and calculates errors on the
    data in the archetypal space and the error between the learned vs true
    archetypes.

    Parameters
    ----------
    data : [samples, features]
        Data in the feature space
    true_archetypal_coords : [samples, archetypes]
        Ground truth archetypal coordinates. Rows must sum to 1.
    true_archetypes : [archetypes, features]
        Ground truth archetypes in the feature space
    n_archetypes : int
        Number of archetypes to be learned
    method : ['PCHA', 'kernelPCHA', 'Chen', 'Javadi', 'NMF', 'PCHA_on_AE', 'AAnet']
        The method to use for archetypal analysis
    n_subsample : int
        Number of data points to subsample
    seed : int
        Random seed
    batches : int
        Number of batches used to train AAnet or AutoEncoder
    Returns
    -------
    mse_archetypes: float
        Mean squared error between the learned archetypes and the ground
        truth archetypes as calculated in the feature space
    mse_encoding: float
        Mean squared error between the true coordinates of the data in the
        archetypal space and the coordinates in the learned space
    new_archetypal_coords: [samples, archetypes]
        Learned encoding of the samples in the archetypal space
    new_archetypes: [archetypes, features]
        Learned archetypes in the feature space
    """
    tic = time.time()
    # Select a subsample of the data
    np.random.seed(seed)
    if n_subsample is not None:
        r_idx = np.random.choice(data.shape[0], n_subsample, replace=False)
        data = data[r_idx, :]  # otherwise really slow
        true_archetypal_coords = true_archetypal_coords[r_idx, :]

    if method == 'Chen':
        '''AA as implemented in Chen et al. 2014 https://arxiv.org/abs/1405.6472'''
        new_archetypes, new_archetypal_coords, _ = sp.archetypalAnalysis(
            np.asfortranarray(data.T),
            p=n_archetypes,
            returnAB=True,
            numThreads=-1)

        # Fix transposition
        new_archetypal_coords = new_archetypal_coords.toarray().T
        new_archetypes = new_archetypes.T
    elif method == 'Javadi':
        '''AA as implemented in Javadi et al. 2017 https://arxiv.org/abs/1705.02994'''

        new_archetypal_coords, new_archetypes, _, _ = javadi.acc_palm_nmf(
            data, r=n_archetypes, maxiter=25, plotloss=False, ploterror=False)
    elif method == 'PCHA':
        '''Principal convex hull analysis as implemented by Morup and Hansen 2012.
        https://www.sciencedirect.com/science/article/pii/S0925231211006060 '''
        new_archetypes, new_archetypal_coords, _, _, _ = PCHA(data.T,
                                                              noc=n_archetypes)
        new_archetypes = np.array(new_archetypes.T)
        new_archetypal_coords = np.array(new_archetypal_coords.T)

    elif method == 'kernelPCHA':
        '''PCHA in a kernel space as described by Morup and Hansen 2012.
        https://www.sciencedirect.com/science/article/pii/S0925231211006060 '''
        D = scipy.spatial.distance.pdist(data)
        D = scipy.spatial.distance.squareform(D)
        sigma = np.std(D)
        #K = np.exp(-((D**2)/sigma))
        K = data @ data.T
        _, new_archetypal_coords, C, _, _ = PCHA(K, noc=n_archetypes)
        new_archetypes = np.array(data.T @ C).T
        new_archetypal_coords = np.array(new_archetypal_coords.T)

    elif method == 'NMF':
        '''Factor analysis using non-negative matrix factorization (NMF)'''
        nnmf = NMF(n_components=n_archetypes,
                   init='nndsvda',
                   tol=1e-4,
                   max_iter=1000)
        new_archetypal_coords = nnmf.fit_transform(data - np.min(data))
        new_archetypes = nnmf.components_

    elif method == 'PCHA_on_AE':
        ##############
        # MODEL PARAMS
        ##############
        noise_z_std = 0
        z_dim = arch
        act_out = tf.nn.tanh
        input_dim = data.shape[1]

        enc_AE = network.Encoder(num_at=n_archetypes, z_dim=z_dim)
        dec_AE = network.Decoder(x_dim=input_dim,
                                 noise_z_std=noise_z_std,
                                 z_dim=z_dim,
                                 act_out=act_out)

        # By setting both gammas to zero, we arrive at the standard autoencoder
        AE = AAnet.AAnet(enc_AE, dec_AE, gamma_convex=0, gamma_nn=0)
        ##########
        # TRAINING
        ##########
        # AE
        AE.train(data, batch_size=256, num_batches=n_batches)
        latent_encoding = AE.data2z(data)

        # PCHA learns an encoding into a simplex
        new_archetypes, new_archetypal_coords, _, _, _ = PCHA(
            latent_encoding.T, noc=n_archetypes)
        new_archetypes = np.array(new_archetypes.T)
        new_archetypal_coords = np.array(new_archetypal_coords.T)

        # Decode ATs
        new_archetypes = AE.z2data(new_archetypes)

    elif method == 'AAnet':
        ##############
        # MODEL PARAMS
        ##############

        noise_z_std = latent_noise
        z_dim = arch
        act_out = tf.nn.tanh
        input_dim = data.shape[1]

        enc_net = network.Encoder(num_at=n_archetypes, z_dim=z_dim)
        dec_net = network.Decoder(x_dim=input_dim,
                                  noise_z_std=noise_z_std,
                                  z_dim=z_dim,
                                  act_out=act_out)
        model = AAnet.AAnet(enc_net, dec_net)

        ##########
        # TRAINING
        ##########

        model.train(data, batch_size=256, num_batches=n_batches)

        ###################
        # GETTING OUTPUT
        ###################

        new_archetypal_coords = model.data2at(data)
        new_archetypes = model.get_ats_x()
    else:
        raise ValueError('{} is not a valid method'.format(method))
    toc = time.time() - tic

    # Calculate MSE if given ground truth
    if true_archetypes is not None:
        mse_archetypes, _, _ = calc_MSE(new_archetypes, true_archetypes)
    else:
        mse_archetypes = None
    if true_archetypal_coords is not None:
        mse_encoding, _, _ = calc_MSE(new_archetypal_coords.T,
                                      true_archetypal_coords.T)
    else:
        mse_encoding = None

    return mse_archetypes, mse_encoding, new_archetypal_coords, new_archetypes, toc
Example #8
0
def main(input_path, training_mode='mle-gan'):
    #Preprocessing data
    data_obj = pr.Preprocessing()
    data_obj.training_mode = training_mode
    data_obj.run(input_path)

    #Creating network object
    #Parameters
    input_size = len(data_obj.selected_columns)
    batch = data_obj.batch_size
    hidden_size = 200
    num_layers = 5
    num_directions = 1  # It should be 2 if we use bidirectional
    beam_width = [1, 3, 5, 7, 10, 15]  #Window size of beam search

    #Creating Networks
    enc = nw.Encoder(input_size, batch, hidden_size, num_layers,
                     num_directions).cuda()
    dec = nw.Decoder(input_size, batch, hidden_size, num_layers,
                     dropout=.3).cuda()
    dec.duration_time_loc = data_obj.duration_time_loc
    rnnD = nw.Discriminator(input_size,
                            batch,
                            hidden_size,
                            num_layers,
                            dropout=.3).cuda()
    model = nw.Seq2Seq(enc, dec).cuda()
    #Initializing model parameters
    model.apply(nw.init_weights)
    rnnD.apply(nw.init_weights)
    #Creating optimizers
    optimizerG = torch.optim.RMSprop(model.parameters(), lr=5e-5)
    optimizerD = torch.optim.RMSprop(rnnD.parameters(), lr=5e-5)
    #Lets try several GPU
    # if torch.cuda.device_count() > 1:
    #     print("Let's use", torch.cuda.device_count(), "GPUs!")
    #     model = torch.nn.DataParallel(model, device_ids= range(0, torch.cuda.device_count()))
    #     enc = torch.nn.DataParallel(enc, device_ids= range(0, torch.cuda.device_count()))
    #     dec = torch.nn.DataParallel(dec, device_ids= range(0, torch.cuda.device_count()))
    #     rnnD = torch.nn.DataParallel(rnnD, device_ids= range(0, torch.cuda.device_count()))

    #--------------------------------------------
    if (training_mode == 'mle'):
        print("Training via MLE")
        nw.train_mle(model, optimizerG, data_obj)
        #Loading the best model saved during training
        path = os.path.join(data_obj.output_dir, 'rnnG(validation entropy).m')
        model.load_state_dict(torch.load(path))
        nw.model_eval_test(model, data_obj, mode='test')

    elif (training_mode == 'mle-gan'):
        print("Training via MLE-GAN")
        #Training via MLE-GAN
        nw.train_gan(model, rnnD, optimizerG, optimizerD, data_obj)
        # Loading the best model saved during training
        path = os.path.join(data_obj.output_dir,
                            'rnnG(validation entropy gan).m')
        model.load_state_dict(torch.load(path))
        nw.model_eval_test(model, data_obj, mode='test')
    #-------------------------------------------

    #Generating suffixes
    print("start generating suffixes using beam search!")
    for i in beam_width:
        sf.suffix_generate(model, data_obj, candidate_num=i)
        sf.suffix_similarity(data_obj, beam_size=i)

    return data_obj, model