import vaegan_copy_model from scipy.cluster.hierarchy import dendrogram, linkage from sklearn.cluster import KMeans def loss_function(recon_x, x, mu, logvar): BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return BCE + KLD ctype='weighted' #'average' datasets=torch_dataset_cancer.CANCER_TYPES trainset = CancerTypesDataset(dataset_names=torch_dataset_cancer.CANCER_TYPES, meta_groups_files=torch_dataset_cancer.META_GROUPS, metagroups_names=torch_dataset_cancer.CANCER_TYPES) trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True, num_workers=5, pin_memory=True) testset = trainset testloader = trainloader n_latent_vector=2 encoder=vaegan_copy_model.Encoder(n_latent_vector=n_latent_vector) decoder=vaegan_copy_model.Decoder(n_latent_vector=n_latent_vector) discriminator=vaegan_copy_model.Discriminator(n_latent_vector=n_latent_vector) model_base_folder="/home/hag007/Desktop/nn/" PATH_DISCRIMINATOR= model_base_folder+"GAN_DIS_mdl" PATH_ENCODER= model_base_folder+"GAN_ENC_mdl" PATH_DECODER= model_base_folder+"GAN_DEC_mdl" load_model=True if load_model and os.path.exists(PATH_ENCODER):
m_GAN = nn.Sequential(decoder, discriminator) m_FULL = nn.Sequential(encoder, decoder, discriminator) csv_files = [] datasets = CANCER_TYPES for cur_ds in datasets: dataset = cur_ds constants.update_dirs(DATASET_NAME_u=dataset) data_normalizaton = "fpkm" gene_expression_file_name, phenotype_file_name, survival_file_name, mutation_file_name, mirna_file_name, pval_preprocessing_file_name = \ build_gdc_params(dataset=dataset, data_normalizaton=data_normalizaton) csv_files.append( os.path.join(constants.DATA_DIR, gene_expression_file_name)) trainset = CancerTypesDataset(csv_files=csv_files, labels=datasets) trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, num_workers=40, pin_memory=True) testset = trainset # CancerTypesDataset(csv_files=csv_files, labels=datasets) testloader = trainloader # torch.utils.data.DataLoader(trainset, batch_size=10, # shuffle=True, num_workers=10) criterion = nn.BCELoss() # create your optimizer vae_optimizer = optim.Adam(m_VAE.parameters(), lr=0.00001) gan_optimizer = optim.Adam(m_GAN.parameters(), lr=0.00001)
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return BCE + 0.1 * KLD datasets = torch_dataset_cancer.CANCER_TYPES torch_dataset = CancerTypesDataset( dataset_names=torch_dataset_cancer.CANCER_TYPES, meta_groups_files=torch_dataset_cancer.META_GROUPS, metagroups_names=[ "{}_{}".format(x.split("/")[1].split(".")[0], i_x) for i_x, x in enumerate(torch_dataset_cancer.META_GROUPS) ]) train_dataset, test_dataset = torch.utils.data.random_split( torch_dataset, [ torch_dataset.__len__() - torch_dataset.__len__() / 100, torch_dataset.__len__() / 100 ]) print "train: {}, test: {}".format(len(train_dataset), len(test_dataset)) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=num_workers,
num_workers=25 batch_size_train=100 batch_size_val=10 def loss_function(recon_x, x, mu, logvar): BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return BCE + KLD datasets=torch_dataset_cancer.CANCER_TYPES torch_dataset=CancerTypesDataset(dataset_names=torch_dataset_cancer.CANCER_TYPES, meta_groups_files=torch_dataset_cancer.META_GROUPS, metagroups_names=["{}".format(x, i_x) for i_x, x in enumerate(torch_dataset_cancer.CANCER_TYPES)]) train_dataset,test_dataset = torch.utils.data.random_split(torch_dataset, [torch_dataset.__len__()-torch_dataset.__len__()/100, torch_dataset.__len__()/100]) print "train: {}, test: {}".format(len(train_dataset), len(test_dataset)) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=num_workers, pin_memory=True) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size_val, shuffle=True, num_workers=num_workers, pin_memory=True) net = vae_bn_after_relu_flex_model.Net(n_reduction_layers=2 ,factor=0.5,n_latent_vector=2 ) load_model=True # False if load_model: PATH= "/home/hag007/Desktop/nn/VAE_model" # "/specific/netapp5/gaga/hagailevi/evaluation/bnet/output/VAE_model" net.load_state_dict(torch.load(PATH))
def main(): parser = argparse.ArgumentParser(description='args') parser.add_argument('--n_latent_vector', dest='n_latent_vector', default='100') parser.add_argument('--load_model', dest='load_model', default="false") args = parser.parse_args() load_model = args.load_model == 'true' n_latent_vector = int(args.n_latent_vector) torch_dataset = CancerTypesDataset( dataset_names=torch_dataset_cancer.CANCER_TYPES, meta_groups_files=torch_dataset_cancer.META_GROUPS, metagroups_names=[ "{}".format(x) for i_x, x in enumerate(torch_dataset_cancer.CANCER_TYPES) ]) train_dataset, test_dataset = torch.utils.data.random_split( torch_dataset, [ torch_dataset.__len__() - torch_dataset.__len__() / 100, torch_dataset.__len__() / 100 ]) print "n_train samples: {}, n_test samples: {}".format( len(train_dataset), len(test_dataset)) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=num_workers, pin_memory=True) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size_val, shuffle=True, num_workers=num_workers, pin_memory=True) m_encoder = vae_gan_bn_after_relu_flex_model.Encoder( n_latent_vector=n_latent_vector) m_decoder = vae_gan_bn_after_relu_flex_model.Decoder( n_latent_vector=n_latent_vector) m_discriminator = vae_gan_bn_after_relu_flex_model.Discriminator( n_latent_vector=n_latent_vector) model_base_folder = constants.OUTPUT_GLOBAL_DIR PATH_DISCRIMINATOR = os.path.join( model_base_folder, "GAN_DIS_model" ) # os.path.join(constants.OUTPUT_GLOBAL_DIR, "VAE_model") PATH_ENCODER = os.path.join(model_base_folder, "GAN_ENC_model") PATH_DECODER = os.path.join(model_base_folder, "GAN_DEC_model") if load_model and os.path.exists(PATH_ENCODER): m_encoder.load_state_dict(torch.load(PATH_ENCODER)) m_encoder.eval() m_decoder.load_state_dict(torch.load(PATH_DECODER)) m_decoder.eval() m_discriminator.load_state_dict(torch.load(PATH_DISCRIMINATOR)) m_discriminator.eval() m_VAE = vae_gan_bn_after_relu_flex_model.VAE(m_encoder, m_decoder) m_GAN = vae_gan_bn_after_relu_flex_model.GAN(m_decoder, m_discriminator) m_VAEGAN = vae_gan_bn_after_relu_flex_model.VAEGAN(m_VAE, m_discriminator) # create your optimizer lr = 0.001 o_discriminator = optim.Adam(m_discriminator.parameters(), lr=lr * ALPHA) o_encoder = optim.Adam(m_encoder.parameters(), lr=lr) o_decoder = optim.Adam(m_decoder.parameters(), lr=lr) # o_vae = optim.Adam(m_VAE.parameters(), lr=lr) # o_gan = optim.Adam(m_GAN.parameters(), lr=lr) # o_vaegan = optim.Adam(m_VAEGAN.parameters(), lr=lr) n_epoches = 1 for meta_epoch in range(0, 1000): # loop over the dataset multiple times print "meta_epoch: {}".format(meta_epoch) discrimination_loss = 100 gen_loss = 100 print "start backprop_vae.." backprop_encoder(m_VAEGAN, [o_encoder], n_epoches, trainloader, testloader, min(meta_epoch / 100.0, 1.0)) # while gen_loss > 0.1: print "start backprop_gen.." gen_loss = backprop_gen(m_VAEGAN, o_decoder, n_epoches, trainloader) # while discrimination_loss > 50: print "start backprop_dis.." discrimination_loss, discrimination_accuracy = backprop_dis( m_VAEGAN, o_discriminator, n_epoches, trainloader) torch.save(m_encoder.state_dict(), os.path.join(constants.OUTPUT_GLOBAL_DIR, "GAN_ENC_model")) torch.save(m_decoder.state_dict(), os.path.join(constants.OUTPUT_GLOBAL_DIR, "GAN_DEC_model")) torch.save(m_discriminator.state_dict(), os.path.join(constants.OUTPUT_GLOBAL_DIR, "GAN_DIS_model"))