def create_encoder(opt): # Initialize the network encoder = network.Encoder(opt) # Init the network network.weights_init(encoder, init_type=opt.init_type, init_gain=opt.init_gain) print('Encoder is created!') return encoder
def build_model(self): # Define generators and discriminators self.E = network.Encoder(self.e_conv_dim) self.G = network.Generator(self.g_conv_dim) for i in self.cls: setattr( self, "D_" + i, net.Discriminator(self.img_size, self.d_conv_dim, self.d_repeat_num, self.norm)) # Define vgg for perceptual loss self.vgg = net.VGG() self.vgg.load_state_dict(torch.load('addings/vgg_conv.pth')) # Define loss self.criterionL1 = torch.nn.L1Loss() self.criterionL2 = torch.nn.MSELoss() self.criterionGAN = GANLoss(use_lsgan=True, tensor=torch.cuda.FloatTensor) # Optimizers self.e_optimizer = torch.optim.Adam(self.E.parameters(), self.e_lr, [self.beta1, self.beta2]) self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2]) for i in self.cls: setattr(self, "d_" + i + "_optimizer", \ torch.optim.Adam(filter(lambda p: p.requires_grad, getattr(self, "D_" + i).parameters()), \ self.d_lr, [self.beta1, self.beta2])) # Weights initialization self.E.apply(self.weights_init_xavier) self.G.apply(self.weights_init_xavier) for i in self.cls: getattr(self, "D_" + i).apply(self.weights_init_xavier) # Print networks self.print_network(self.E, 'E') self.print_network(self.G, 'G') for i in self.cls: self.print_network(getattr(self, "D_" + i), "D_" + i) if torch.cuda.is_available(): self.E.cuda() self.G.cuda() self.vgg.cuda() for i in self.cls: getattr(self, "D_" + i).cuda()
def __init__(self, opt): super(TrainModel, self).__init__() self.isTrain = opt.isTrain if self.isTrain: self.netE = network.Encoder(opt.input_nc, opt.ngf, opt.n_downsampling) self.netE.apply(network.weights_init) self.netG = network.Decoder(opt.input_nc, opt.output_nc, opt.ngf, opt.n_downsampling) self.netG.apply(network.weights_init) self.netD = network.Discriminator(opt.input_nc, opt.ngf, opt.n_layer) self.netD.apply(network.weights_init) self.criterionGAN = nn.BCELoss() self.criterionKL = network.KLLoss self.criterionRecon = network.ReconLoss else: pass
gene_exp = dataset_list[i]['mz_exp'].transpose() labels = dataset_list[i]['labels'] # construct DataLoader list if cuda: torch_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(gene_exp).cuda(), torch.LongTensor(labels).cuda()) else: torch_dataset = torch.utils.data.TensorDataset( torch.FloatTensor(gene_exp), torch.LongTensor(labels)) data_loader = torch.utils.data.DataLoader(torch_dataset, batch_size=batch_size, shuffle=True, drop_last=True) batch_loader_dict[i+1] = data_loader # create model encoder = models.Encoder(num_inputs=num_inputs) decoder_a = models.Decoder_a(num_inputs=num_inputs) decoder_b = models.Decoder_b(num_inputs=num_inputs) discriminator = models.Discriminator(num_inputs=num_inputs) if cuda: encoder.cuda() decoder_a.cuda() decoder_b.cuda() discriminator.cuda() # training loss_total_list = [] # list of total loss loss_reconstruct_list = [] loss_transfer_list = [] loss_classifier_list = []
print('total train models: {}; total train batches: {}'.format( len(train_set), len(train_loader))) val_loader = udata.DataLoader(dataset=val_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) print('total val models: {}; total val batches: {}'.format( len(val_set), len(val_loader))) import network encoder = network.Encoder().cuda() convrnn = network.ConvRNN3d().cuda() decoder = network.Decoder().cuda() NLL = torch.nn.NLLLoss() solver = optim.Adam([ { 'params': encoder.parameters() }, { 'params': convrnn.parameters() }, { 'params': decoder.parameters() }, ],
def _create_model(dim_input, dim_output, embedding_dim, hidden_dim, alignment_dim, optimization_method, activation): """ Create a neural network to train a new model Args: dim_input (int) dim_output (int) embedding_dim (int): Bigger is better 300 is a reasonable choice, according to Kann and Schütze (2016) hidden_dim (int): 100 achieved the best (among 50, 100, 200, 400) in Kann and Schütze (2016) alignment_dim (int) """ input_seq = T.dmatrix('input_seq') output_seq = T.dmatrix('output_seq') h_init = theano.shared(np.zeros(hidden_dim)) h_init_rev = theano.shared(np.zeros(hidden_dim)) out_init = T.dvector('out_init') context_init = T.dvector('context_init') encoder = network.Encoder(dim_input, embedding_dim, hidden_dim, with_bidirectional=True) decoder = network.Decoder(dim_output, embedding_dim, hidden_dim * 2, with_attention=True, alignment_dim=alignment_dim) params = encoder.params + decoder.params ### # Loop for encoder ### prediction, _ = theano.scan(fn=network.Encoder.create_step_bidi(activation), sequences=[input_seq, input_seq[::-1]], outputs_info=[context_init, h_init, h_init_rev], non_sequences=encoder.params, strict=True) context_vec = prediction[0][-1] ### # Loop for decoder ### annotations = prediction[0] prediction, _ = theano.scan(fn=network.Decoder.create_step_attention(activation), outputs_info=[out_init, context_vec], non_sequences=[annotations] + decoder.params, n_steps=128, strict=True) predicted_seq = prediction[0] ### # Compute loss tensor ### loss = network.loss_seq_cross_entropy(output_seq, predicted_seq) loss.name = 'loss' ### # Update weights ### updating = optimization_method(loss, params) current_loss = 0.0 ### # Realize the function ### _update = theano.function(inputs=[input_seq, output_seq, context_init, out_init], outputs=loss, updates=updating) _predict = theano.function(inputs=[input_seq, context_init, out_init], outputs=predicted_seq) out_init_zeros = np.zeros((dim_output,)) context_init_zeros = np.zeros((hidden_dim * 2,)) h_init_zeros = np.zeros((hidden_dim,)) return (_update, _predict)
def run_AA(data, n_archetypes, true_archetypal_coords=None, true_archetypes=None, method='PCHA', n_subsample=None, n_batches=40000, latent_noise=0.05, arch=[1024, 512, 256, 128], seed=42): """Runs Chen at al. 2014 on input data and calculates errors on the data in the archetypal space and the error between the learned vs true archetypes. Parameters ---------- data : [samples, features] Data in the feature space true_archetypal_coords : [samples, archetypes] Ground truth archetypal coordinates. Rows must sum to 1. true_archetypes : [archetypes, features] Ground truth archetypes in the feature space n_archetypes : int Number of archetypes to be learned method : ['PCHA', 'kernelPCHA', 'Chen', 'Javadi', 'NMF', 'PCHA_on_AE', 'AAnet'] The method to use for archetypal analysis n_subsample : int Number of data points to subsample seed : int Random seed batches : int Number of batches used to train AAnet or AutoEncoder Returns ------- mse_archetypes: float Mean squared error between the learned archetypes and the ground truth archetypes as calculated in the feature space mse_encoding: float Mean squared error between the true coordinates of the data in the archetypal space and the coordinates in the learned space new_archetypal_coords: [samples, archetypes] Learned encoding of the samples in the archetypal space new_archetypes: [archetypes, features] Learned archetypes in the feature space """ tic = time.time() # Select a subsample of the data np.random.seed(seed) if n_subsample is not None: r_idx = np.random.choice(data.shape[0], n_subsample, replace=False) data = data[r_idx, :] # otherwise really slow true_archetypal_coords = true_archetypal_coords[r_idx, :] if method == 'Chen': '''AA as implemented in Chen et al. 2014 https://arxiv.org/abs/1405.6472''' new_archetypes, new_archetypal_coords, _ = sp.archetypalAnalysis( np.asfortranarray(data.T), p=n_archetypes, returnAB=True, numThreads=-1) # Fix transposition new_archetypal_coords = new_archetypal_coords.toarray().T new_archetypes = new_archetypes.T elif method == 'Javadi': '''AA as implemented in Javadi et al. 2017 https://arxiv.org/abs/1705.02994''' new_archetypal_coords, new_archetypes, _, _ = javadi.acc_palm_nmf( data, r=n_archetypes, maxiter=25, plotloss=False, ploterror=False) elif method == 'PCHA': '''Principal convex hull analysis as implemented by Morup and Hansen 2012. https://www.sciencedirect.com/science/article/pii/S0925231211006060 ''' new_archetypes, new_archetypal_coords, _, _, _ = PCHA(data.T, noc=n_archetypes) new_archetypes = np.array(new_archetypes.T) new_archetypal_coords = np.array(new_archetypal_coords.T) elif method == 'kernelPCHA': '''PCHA in a kernel space as described by Morup and Hansen 2012. https://www.sciencedirect.com/science/article/pii/S0925231211006060 ''' D = scipy.spatial.distance.pdist(data) D = scipy.spatial.distance.squareform(D) sigma = np.std(D) #K = np.exp(-((D**2)/sigma)) K = data @ data.T _, new_archetypal_coords, C, _, _ = PCHA(K, noc=n_archetypes) new_archetypes = np.array(data.T @ C).T new_archetypal_coords = np.array(new_archetypal_coords.T) elif method == 'NMF': '''Factor analysis using non-negative matrix factorization (NMF)''' nnmf = NMF(n_components=n_archetypes, init='nndsvda', tol=1e-4, max_iter=1000) new_archetypal_coords = nnmf.fit_transform(data - np.min(data)) new_archetypes = nnmf.components_ elif method == 'PCHA_on_AE': ############## # MODEL PARAMS ############## noise_z_std = 0 z_dim = arch act_out = tf.nn.tanh input_dim = data.shape[1] enc_AE = network.Encoder(num_at=n_archetypes, z_dim=z_dim) dec_AE = network.Decoder(x_dim=input_dim, noise_z_std=noise_z_std, z_dim=z_dim, act_out=act_out) # By setting both gammas to zero, we arrive at the standard autoencoder AE = AAnet.AAnet(enc_AE, dec_AE, gamma_convex=0, gamma_nn=0) ########## # TRAINING ########## # AE AE.train(data, batch_size=256, num_batches=n_batches) latent_encoding = AE.data2z(data) # PCHA learns an encoding into a simplex new_archetypes, new_archetypal_coords, _, _, _ = PCHA( latent_encoding.T, noc=n_archetypes) new_archetypes = np.array(new_archetypes.T) new_archetypal_coords = np.array(new_archetypal_coords.T) # Decode ATs new_archetypes = AE.z2data(new_archetypes) elif method == 'AAnet': ############## # MODEL PARAMS ############## noise_z_std = latent_noise z_dim = arch act_out = tf.nn.tanh input_dim = data.shape[1] enc_net = network.Encoder(num_at=n_archetypes, z_dim=z_dim) dec_net = network.Decoder(x_dim=input_dim, noise_z_std=noise_z_std, z_dim=z_dim, act_out=act_out) model = AAnet.AAnet(enc_net, dec_net) ########## # TRAINING ########## model.train(data, batch_size=256, num_batches=n_batches) ################### # GETTING OUTPUT ################### new_archetypal_coords = model.data2at(data) new_archetypes = model.get_ats_x() else: raise ValueError('{} is not a valid method'.format(method)) toc = time.time() - tic # Calculate MSE if given ground truth if true_archetypes is not None: mse_archetypes, _, _ = calc_MSE(new_archetypes, true_archetypes) else: mse_archetypes = None if true_archetypal_coords is not None: mse_encoding, _, _ = calc_MSE(new_archetypal_coords.T, true_archetypal_coords.T) else: mse_encoding = None return mse_archetypes, mse_encoding, new_archetypal_coords, new_archetypes, toc
def main(input_path, training_mode='mle-gan'): #Preprocessing data data_obj = pr.Preprocessing() data_obj.training_mode = training_mode data_obj.run(input_path) #Creating network object #Parameters input_size = len(data_obj.selected_columns) batch = data_obj.batch_size hidden_size = 200 num_layers = 5 num_directions = 1 # It should be 2 if we use bidirectional beam_width = [1, 3, 5, 7, 10, 15] #Window size of beam search #Creating Networks enc = nw.Encoder(input_size, batch, hidden_size, num_layers, num_directions).cuda() dec = nw.Decoder(input_size, batch, hidden_size, num_layers, dropout=.3).cuda() dec.duration_time_loc = data_obj.duration_time_loc rnnD = nw.Discriminator(input_size, batch, hidden_size, num_layers, dropout=.3).cuda() model = nw.Seq2Seq(enc, dec).cuda() #Initializing model parameters model.apply(nw.init_weights) rnnD.apply(nw.init_weights) #Creating optimizers optimizerG = torch.optim.RMSprop(model.parameters(), lr=5e-5) optimizerD = torch.optim.RMSprop(rnnD.parameters(), lr=5e-5) #Lets try several GPU # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = torch.nn.DataParallel(model, device_ids= range(0, torch.cuda.device_count())) # enc = torch.nn.DataParallel(enc, device_ids= range(0, torch.cuda.device_count())) # dec = torch.nn.DataParallel(dec, device_ids= range(0, torch.cuda.device_count())) # rnnD = torch.nn.DataParallel(rnnD, device_ids= range(0, torch.cuda.device_count())) #-------------------------------------------- if (training_mode == 'mle'): print("Training via MLE") nw.train_mle(model, optimizerG, data_obj) #Loading the best model saved during training path = os.path.join(data_obj.output_dir, 'rnnG(validation entropy).m') model.load_state_dict(torch.load(path)) nw.model_eval_test(model, data_obj, mode='test') elif (training_mode == 'mle-gan'): print("Training via MLE-GAN") #Training via MLE-GAN nw.train_gan(model, rnnD, optimizerG, optimizerD, data_obj) # Loading the best model saved during training path = os.path.join(data_obj.output_dir, 'rnnG(validation entropy gan).m') model.load_state_dict(torch.load(path)) nw.model_eval_test(model, data_obj, mode='test') #------------------------------------------- #Generating suffixes print("start generating suffixes using beam search!") for i in beam_width: sf.suffix_generate(model, data_obj, candidate_num=i) sf.suffix_similarity(data_obj, beam_size=i) return data_obj, model