def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print("Getting the dataset...") dataset = datasets.get_dataset(opt, exp_dir) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size(), dataset.dataset.additional_info()) # Training optimizer and stuff criterion = torch.nn.MSELoss() if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) # The training. print("Start training.") #monitoring and predictions predictions = np.zeros( (dataset.dataset.nb_patient, dataset.dataset.nb_gene)) indices_patients = np.arange(dataset.dataset.nb_patient) indices_genes = np.arange(dataset.dataset.nb_gene) xdata = np.transpose([ np.tile(indices_genes, len(indices_patients)), np.repeat(indices_patients, len(indices_genes)) ]) progress_bar_modulo = len(dataset) / 100 monitoring_dic = {} monitoring_dic['train_loss'] = [] for t in range(epoch, opt.epoch): start_timer = time.time() thisepoch_trainloss = [] with tqdm(dataset, unit="batch") as tepoch: for mini in tepoch: tepoch.set_description(f"Epoch {t}") inputs, targets = mini[0], mini[1] inputs = Variable(inputs, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) # Forward pass: Compute predicted y by passing x to the model y_pred = my_model(inputs).float() y_pred = y_pred.squeeze() targets = torch.reshape(targets, (targets.shape[0], )) # Compute and print loss loss = criterion(y_pred, targets) to_list = loss.cpu().data.numpy().reshape((1, ))[0] thisepoch_trainloss.append(to_list) tepoch.set_postfix(loss=loss.item()) np.save(os.path.join(exp_dir, 'pixel_epoch_{}'.format(t)), my_model.emb_1.weight.cpu().data.numpy()) np.save(os.path.join(exp_dir, 'digit_epoch_{}'.format(t)), my_model.emb_2.weight.cpu().data.numpy()) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) monitoring_dic['train_loss'].append(np.mean(thisepoch_trainloss)) np.save(f'{exp_dir}/train_loss.npy', monitoring_dic['train_loss'])
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print "Getting the dataset..." dataset = datasets.get_dataset(opt) # Creating a model print "Getting the model..." my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size()) # Training optimizer and stuff criterion = torch.nn.MSELoss() if not opt.cpu: print "Putting the model on gpu..." my_model.cuda() # The training. print "Start training." progress_bar_modulo = len(dataset) / 100 for t in range(epoch, opt.epoch): start_timer = time.time() if opt.save_error: outfname_g = '_'.join(['gene_epoch', str(t), 'prediction.npy']) outfname_g = os.path.join(exp_dir, outfname_g) outfname_t = '_'.join(['tissue_epoch', str(t), 'prediction.npy']) outfname_t = os.path.join(exp_dir, outfname_t) train_trace = np.zeros( (dataset.dataset.nb_gene, dataset.dataset.nb_patient)) for no_b, mini in enumerate(dataset): inputs, targets = mini[0], mini[1] inputs = Variable(inputs, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() if not opt.cpu: inputs = inputs.cuda() targets = targets.cuda() # Forward pass: Compute predicted y by passing x to the model y_pred = my_model(inputs).float() if opt.save_error: # Log the predicted values per sample and per gene (S.L. validation) batch_inputs = mini[0].numpy() predicted_values = y_pred.data.cpu().numpy() train_trace[batch_inputs[:, 0], batch_inputs[:, 1]] = predicted_values[:, 0] # Compute and print loss loss = criterion(y_pred, targets) # TODO: the logging here. if no_b % progress_bar_modulo == 0: print "Doing epoch {}, examples {}/{}. Loss: {}".format( t, no_b, len(dataset), loss.data[0]) # Saving the emb monitoring.save_everything(exp_dir, t, my_model, dataset.dataset) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() if opt.save_error: monitoring.dump_error_by_tissue(train_trace, dataset.dataset.data, outfname_t, exp_dir, dataset.dataset.data_type, dataset.dataset.nb_patient) monitoring.dump_error_by_gene(train_trace, dataset.dataset.data, outfname_g, exp_dir) print "Saving the model..." monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) print "Done!"
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) if opt.cache == 0: opt.cache = random.getrandbits(128) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) if opt.model == 'RNN': print('This model is deprecated - please use TCRonly from now on') # creating the dataset print("Getting the dataset...") if not 'cached_dataset' in os.listdir('.'): os.mkdir('cached_dataset') opt.dataset = 'binary_thome' tenth = opt.tenth #dataset = datasets.get_dataset(opt,exp_dir,tenth=opt.tenth) dataset = old_datasets.get_dataset(opt, exp_dir, test=True) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size(), ) criterion = torch.nn.MSELoss() # Training optimizer and stuff if opt.loss == 'NLL' or opt.model == 'allseq_bin': criterion = torch.nn.NLLLoss() criterion = torch.nn.BCELoss() if not 'tcr_embs' in os.listdir(exp_dir): if opt.model == 'TCRonly': os.mkdir(f'{exp_dir}/tcr_embs/') elif opt.model == 'allseq' or opt.model == 'allseq_bin': os.mkdir(f'{exp_dir}/tcr_embs/') os.mkdir(f'{exp_dir}/hla_embs/') if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) loss_dict = {} loss_dict['train_losses'] = [] def estimate_batch_accuracy(y, yhat): return np.sum([i == j for i, j in zip(y, yhat)]) / y.shape[0] if opt.model == 'allseq' or opt.model == 'allseq_bin': valid_list = np.load( '/u/trofimov/Emerson/processed_data/valid_list.npy') loss_dict['valid_losses'] = [] # The training. print("Getting the likelihood") os.mkdir(f'{exp_dir}/Thome_tenth{tenth}_preds_100/') #monitoring and predictions for t in range(1): loss_dict = monitoring.update_loss_dict(loss_dict, start=True) if opt.model == 'allseq_bin': good = 0 for no_b, mini in enumerate(dataset): if opt.model == 'TCRonly': y_pred, my_model, targets = training.TCRonly_batch( mini, opt, my_model) np.save(f'{exp_dir}/preds_100/likelihood_batch{no_b}.npy', y_pred.data.cpu().numpy()) if no_b % 5 == 0: print(f"Doing epoch{t},examples{no_b}/{len(dataset)}") # Saving the emb elif opt.model == 'allseq': inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4, targets = training.allseq_batch( mini, opt) y_pred = my_model(inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4).float() np.save(f'{exp_dir}/preds_100/likelihood_batch{no_b}.npy', y_pred.data.cpu().numpy()) batch_number = dataset.dataset.data[no_b] bn = batch_number[0] np.save(f'{exp_dir}/preds_100/likelihood_batch{bn}.npy', y_pred.data.cpu().numpy()) if no_b % 5 == 0: print(f"Doing epoch {t},examples{no_b}/{len(dataset)}") elif opt.model == 'allseq_bin': inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4, targets = training.binallseq_batch( mini, opt) y_pred = my_model(inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4).float() batch_number = dataset.dataset.data[no_b] bn = batch_number[0] np.save( f'{exp_dir}/Thome_tenth{tenth}_preds_100/likelihood_batch{bn}.npy', y_pred.data.cpu().numpy()) if no_b % 5 == 0: print(f"Doing epoch {t},examples{no_b}/{len(dataset)}")
def main(argv=None): """Main.""" opt = parse_args(argv) seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print("Getting the dataset...") dataset = datasets.get_dataset(opt, exp_dir) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint(exp_dir, opt) # Training optimizer and stuff criterion = nn.NLLLoss() criterion = nn.CrossEntropyLoss() if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) # The training. print("Start training.") monitoring_dic = {} monitoring_dic['train_loss'] = [] monitoring_dic['valid_loss'] = [] monitoring_dic['train_accuracy'] = [] monitoring_dic['valid_accuracy'] = [] max_accuracy = 0 min_loss = 10000 patience = 5 for t in range(epoch, opt.epoch): if patience == 0: break thisepoch_trainloss = [] thisepoch_trainaccuracy = [] with tqdm(dataset, unit="batch") as tepoch: for mini in tepoch: tepoch.set_description(f"Epoch {t}") inputs, targets = mini[0], mini[1] inputs = Variable(inputs, requires_grad=False).long() targets = Variable(targets, requires_grad=False).long() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) optimizer.zero_grad() y_pred = my_model(inputs).float() #y_pred = torch.reshape(y_pred, (y_pred.shape[0], )) #targets = torch.reshape(targets, (targets.shape[1], 1)) loss = criterion(y_pred, targets) to_list = loss.cpu().data.numpy().reshape((1, ))[0] thisepoch_trainloss.append(to_list) loss.backward() optimizer.step() accuracy = np.sum( np.argmax(y_pred.cpu().data.numpy(), axis=1) == targets.cpu().data.numpy()) / targets.shape[0] thisepoch_trainaccuracy.append(accuracy) tepoch.set_postfix(loss=loss.item(), accuracy=accuracy) inputs = dataset.dataset.valid_inputs inputs = torch.FloatTensor(inputs) inputs = Variable(inputs, requires_grad=False).long() targets = dataset.dataset.valid_targets targets = torch.FloatTensor(targets) targets = Variable(targets, requires_grad=False).long() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) vlosses = [] accuracies = [] split = np.sum(targets.cpu().data.numpy()) / targets.shape[0] for i in tqdm(range(0, inputs.shape[0], 100)): with torch.no_grad(): y_pred = my_model(inputs[i:i + 100]) vloss = criterion(y_pred, targets[i:i + 100]) accuracy = np.sum( np.argmax(y_pred.cpu().data.numpy(), axis=1) == targets[i:i + 100].cpu().data.numpy()) / y_pred.shape[0] vloss = vloss.cpu().data.numpy().reshape((1, ))[0] vlosses.append(vloss) accuracies.append(accuracy) vloss = np.mean(vlosses) accuracy = np.mean(accuracies) if vloss < min_loss: monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) print( f'*** new min loss*** Validation loss: epoch {t}, loss: {vloss}, accuracy: {accuracy}, split {split}' ) min_loss = vloss patience = 5 else: print( f'Validation loss: epoch {t}, loss: {vloss}, accuracy:{accuracy}, split {split}' ) patience -= 1 monitoring_dic['valid_loss'].append(vloss) monitoring_dic['valid_accuracy'].append(accuracy) monitoring_dic['train_loss'].append(np.mean(thisepoch_trainloss)) monitoring_dic['train_accuracy'].append( np.mean(thisepoch_trainaccuracy)) trainacc = np.mean(thisepoch_trainaccuracy) print(f'Training accuracy: {trainacc}') np.save(f'{exp_dir}/train_loss.npy', monitoring_dic['train_loss']) np.save(f'{exp_dir}/valid_loss.npy', monitoring_dic['valid_loss']) np.save(f'{exp_dir}/train_accuracy.npy', monitoring_dic['train_accuracy']) np.save(f'{exp_dir}/valid_accuracy.npy', monitoring_dic['valid_accuracy']) print('Done training!')
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) if opt.cache==0: opt.cache = random.getrandbits(128) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) if opt.model == 'RNN': print ('This model is deprecated - please use TCRonly from now on') # creating the dataset print ("Getting the dataset...") if not 'cached_dataset' in os.listdir('.'): os.mkdir('cached_dataset') dataset = datasets.get_dataset(opt,exp_dir) # Creating a model print ("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint(exp_dir, opt, dataset.dataset.input_size(), ) criterion = torch.nn.MSELoss() # Training optimizer and stuff if opt.loss == 'NLL' or opt.model=='allseq_bin': criterion = torch.nn.NLLLoss() criterion = torch.nn.BCELoss() if not 'tcr_embs' in os.listdir(exp_dir): if opt.model == 'TCRonly': os.mkdir(f'{exp_dir}/tcr_embs/') elif opt.model == 'allseq' or opt.model == 'allseq_bin': os.mkdir(f'{exp_dir}/tcr_embs/') os.mkdir(f'{exp_dir}/hla_embs/') os.mkdir(f'{exp_dir}/predictions') if not opt.cpu: print ("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) loss_dict = {} loss_dict['train_losses'] = [] def estimate_batch_accuracy(y,yhat): return np.sum([i==j for i,j in zip(y,yhat)])/y.shape[0] if opt.model == 'allseq' or opt.model == 'allseq_bin': valid_list = np.load('/u/trofimov/Emerson/processed_data/valid_list.npy') loss_dict['valid_losses'] = [] # The training. print ("Start training.") #monitoring and predictions for t in range(epoch, opt.epoch): loss_dict = monitoring.update_loss_dict(loss_dict,start = True) if opt.model == 'allseq_bin': good = 0 for no_b, mini in enumerate(dataset): if opt.model == 'TCRonly': y_pred, my_model, targets = training.TCRonly_batch(mini,opt,my_model) loss = criterion(y_pred, targets) loss_save = loss.data.cpu().numpy().reshape(1,)[0] loss_dict['train_losses_epoch'].append(loss_save) if no_b % 5 == 0: print (f"Doing epoch{t},examples{no_b}/{len(dataset)}.Loss:{loss_save}") # Saving the emb np.save(os.path.join(exp_dir, 'pixel_epoch_{}'.format(t)),my_model.emb_1.weight.cpu().data.numpy()) optimizer.zero_grad() loss.backward() optimizer.step() kmerembs = my_model.get_embeddings(inputs_k, inputs_s)[0].squeeze() np.save(f'{exp_dir}/kmer_embs/kmer_embs_batch_{no_b}',kmerembs.cpu().data.numpy()) elif opt.model == 'allseq': inputs_k,inputs_h1, inputs_h2, inputs_h3, inputs_h4, targets = training.allseq_batch(mini,opt) y_pred = my_model(inputs_k,inputs_h1, inputs_h2, inputs_h3, inputs_h4).float() loss = criterion(y_pred, targets) loss_save = loss.data.cpu().numpy().reshape(1,)[0] if no_b in valid_list: loss_dict['valid_losses_epoch'].append(loss_save) print (f"Validation error {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}") elif no_b % 5 == 0: loss_dict['train_losses_epoch'].append(loss_save) print (f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}") optimizer.zero_grad() loss.backward() optimizer.step() batch_number = dataset.dataset.data[no_b] kmerembs = my_model.get_embeddings(inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4) kmerembs1 = kmerembs[0].squeeze() bn = batch_number[0] np.save(f'{exp_dir}/tcr_embs/tcr_embs_batch_{bn}',kmerembs1.cpu().data.numpy()) for i in range(4): kmerembs1 = kmerembs[i+1].squeeze() kmerembs1 = kmerembs1[0] np.save(f'{exp_dir}/hla_embs/hla_embs_batch_{bn}_h{i+1}',kmerembs1.cpu().data.numpy()) kmerembs1 = my_model.hla_representation kmerembs1 = kmerembs1[0].squeeze() np.save(f'{exp_dir}/hla_embs/ppl_embs_batch_{bn}',kmerembs1.cpu().data.numpy()) elif opt.model == 'allseq_bin': inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4, targets = training.binallseq_batch(mini,opt) y_pred = my_model(inputs_k,inputs_h1, inputs_h2, inputs_h3, inputs_h4).float() loss = criterion(y_pred, targets) #nb_pos = (np.sum(np.argmax(y_pred.cpu().detach().numpy(),axis=1))/y_pred.shape[0]) #b_accuracy = (estimate_batch_accuracy(np.argmax(y_pred.cpu().detach().numpy(),axis=1), # np.argmax(targets.cpu().detach().numpy(),axis=1))) #if no_b % 10 == 0: # print (f'predicted proportion: {nb_pos} - accuracy: {b_accuracy}') #if b_accuracy>0.75: # good+=1 loss_save = loss.data.cpu().numpy().reshape(1,)[0] if no_b in valid_list: loss_dict['valid_losses_epoch'].append(loss_save) print (f"Validation error {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}") else: loss_dict['train_losses_epoch'].append(loss_save) if no_b % 50 == 0: print (f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}") optimizer.zero_grad() loss.backward() optimizer.step() batch_number = dataset.dataset.data[no_b] bn = batch_number[0] for newpass in range(100): y_pred = my_model(inputs_k,inputs_h1, inputs_h2, inputs_h3, inputs_h4).float() loss = criterion(y_pred, targets) loss_newpass = loss.data.cpu().numpy().reshape(1,)[0] print (f'batch {bn} pass {newpass} loss: {loss_newpass}') optimizer.zero_grad() loss.backward() optimizer.step() preds_targets = np.hstack((y_pred.cpu().data.numpy(), targets.cpu().data.numpy())) np.save(f'{exp_dir}/predictions/batch_{bn}_{no_b}.npy',preds_targets) monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) batch_number = dataset.dataset.data[no_b] kmerembs = my_model.get_embeddings(inputs_k, inputs_h1, inputs_h2, inputs_h3, inputs_h4) kmerembs1 = kmerembs[0].squeeze() bn = batch_number[0] true_size = int(kmerembs1.shape[0]/2) np.save(f'{exp_dir}/tcr_embs/tcr_embs_batch_{bn}',kmerembs1.cpu().data.numpy()[:true_size]) for i in range(4): kmerembs1 = kmerembs[i+1].squeeze() kmerembs1 = kmerembs1[0] np.save(f'{exp_dir}/hla_embs/hla_embs_batch_{bn}_h{i+1}',kmerembs1.cpu().data.numpy()[:true_size]) kmerembs1 = my_model.hla_representation kmerembs1 = kmerembs1[0].squeeze() np.save(f'{exp_dir}/hla_embs/ppl_embs_batch_{bn}',kmerembs1.cpu().data.numpy()[:true_size]) print ("Saving the model...") if opt.model=='allseq_bin' or opt.model=='allseq': validation_scores = loss_dict['valid_losses_epoch'] else: validation_scores = None #print (f'number correct examples: {good}') if opt.model == 'allseq' or opt.model=='allseq_bin': toprint = loss_dict['valid_losses_epoch'] print (f'validation loss matrix: {toprint}') monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) monitoring.update_loss_dict(loss_dict, start=False) monitoring.save_loss(loss_dict,exp_dir) if t % opt.plot_frequency==0: monitoring.plot_training_curve(exp_dir, loss_dict) print ('Finished training! Starting evaluations') tcr_rep_dir = f'{exp_dir}/tcr_embs' patient_to_index = f'data/hla_for_model_eval/pt_names.csv' original_data_dir = f'/u/trofimov/Emerson/original' validation_scores = np.load(f'{exp_dir}/validation_loss.npy') nb_patients = 15 validation_scores = np.load(f'{exp_dir}/validation_loss.npy') output = evaluations.evaluate_model(opt, my_model ,exp_dir, tcr_rep_dir, patient_to_index, original_data_dir, validation_scores, nb_patients, train_on_index=0) with open(f'{exp_dir}/evaluation_results.json', 'w') as json_file: json.dump(output, json_file)
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print("Getting the dataset...") dataset = datasets.get_dataset(opt, exp_dir) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size(), dataset.dataset.additional_info()) # Training optimizer and stuff criterion = torch.nn.MSELoss() if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) # The training. print("Start training.") #monitoring and predictions predictions = np.zeros( (dataset.dataset.nb_patient, dataset.dataset.nb_gene)) indices_patients = np.arange(dataset.dataset.nb_patient) indices_genes = np.arange(dataset.dataset.nb_gene) xdata = np.transpose([ np.tile(indices_genes, len(indices_patients)), np.repeat(indices_patients, len(indices_genes)) ]) progress_bar_modulo = len(dataset) / 100 for t in range(epoch, opt.epoch): start_timer = time.time() if opt.save_error: outfname_g = '_'.join(['gene_epoch', str(t), 'prediction.npy']) outfname_g = os.path.join(exp_dir, outfname_g) outfname_t = '_'.join(['tissue_epoch', str(t), 'prediction.npy']) outfname_t = os.path.join(exp_dir, outfname_t) train_trace = np.zeros( (dataset.dataset.nb_gene, dataset.dataset.nb_patient)) ### making predictions: nb_proteins = my_model.emb_3.weight.cpu().data.numpy().shape[0] nb_patients = my_model.emb_2.weight.cpu().data.numpy().shape[0] predictions_protein = np.zeros((nb_patients, nb_proteins)) patient_embs = my_model.emb_2.weight.cpu().data.numpy() for patient in np.arange(nb_patients): new = my_model.generate_datapoint_protein(patient_embs[patient, :], gpu=2) new = new.cpu().data.numpy() predictions_protein[patient, :] = new[:, 0] np.save(f'predictions_protein_{epoch}.npy', predictions_protein) for no_b, mini in enumerate(dataset): inputs, targets, inputs2, targets2 = mini[0], mini[1], mini[ 2], mini[3] inputs = Variable(inputs, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() inputs2 = Variable(inputs2, requires_grad=False).float() targets2 = Variable(targets2, requires_grad=False).float() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) inputs2 = inputs2.cuda(opt.gpu_selection) targets2 = targets2.cuda(opt.gpu_selection) # Forward pass: Compute predicted y by passing x to the model y_pred = my_model([inputs, inputs2]) #if opt.save_error: # Log the predicted values per sample and per gene (S.L. validation) # batch_inputs = mini[0].numpy() # predicted_values = y_pred.data.cpu().numpy() # train_trace[batch_inputs[:,0],batch_inputs[:,1]] = predicted_values[:,0] targets = torch.reshape(targets, (targets.shape[0], 1)) targets2 = torch.reshape(targets2, (targets2.shape[0], 1)) # Compute and print loss loss1 = criterion(y_pred[0], targets) loss2 = criterion(y_pred[1], targets2) loss = loss1 + loss2 if no_b % 5 == 0: print( f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss.data.cpu().numpy().reshape(1,)[0]}" ) # Saving the emb np.save(os.path.join(exp_dir, 'pixel_epoch_{}'.format(t)), my_model.emb_1.weight.cpu().data.numpy()) np.save(os.path.join(exp_dir, 'digit_epoch_{}'.format(t)), my_model.emb_2.weight.cpu().data.numpy()) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() #my_model.generate_datapoint([0,0], opt.gpu_selection) #monitoring.save_predictions(exp_dir, predictions) # for i in range(0,xdata.shape[0],1000): # #import pdb; pdb.set_trace() # inputs = torch.FloatTensor(xdata[i:i+1000,:]) # inputs = Variable(inputs, requires_grad=False).float() # if not opt.cpu: # inputs = inputs.cuda(opt.gpu_selection) # y_pred = my_model(inputs).float() # predictions[inputs.data.cpu().numpy()[:,1].astype('int32'),inputs.data.cpu().numpy()[:,0].astype('int32')] = y_pred.data.cpu().numpy()[:,0] # monitoring.dump_error_by_tissue(train_trace, dataset.dataset.data, outfname_t, exp_dir, dataset.dataset.data_type, dataset.dataset.nb_patient) # monitoring.dump_error_by_gene(train_trace, dataset.dataset.data, outfname_g, exp_dir) #print ("Saving the model...") monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir)
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print("Getting the dataset...") dataset = datasets.get_dataset(opt, exp_dir) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size(), ) criterion = torch.nn.MSELoss() # Training optimizer and stuff if opt.loss == 'NLL': criterion = torch.nn.NLLLoss() os.mkdir(f'{exp_dir}/kmer_embs/') if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) # The training. print("Start training.") #monitoring and predictions for t in range(epoch, opt.epoch): start_timer = time.time() for no_b, mini in enumerate(dataset): inputs_s, inputs_k, targets = mini[0], mini[1], mini[2] inputs_s = Variable(inputs_s, requires_grad=False).float() inputs_k = Variable(inputs_k, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() if not opt.cpu: inputs_s = inputs_s.cuda(opt.gpu_selection) inputs_k = inputs_k.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) # Forward pass: Compute predicted y by passing x to the model inputs_k = inputs_k.view(-1, 31, 4) y_pred = my_model(inputs_k, inputs_s).float() #import pdb; pdb.set_trace() #targets = torch.reshape(targets,(targets.shape[0],1)) # Compute and print loss loss = criterion(y_pred, targets) if no_b % 5 == 0: print( f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss.data.cpu().numpy().reshape(1,)[0]}" ) # Saving the emb np.save(os.path.join(exp_dir, 'pixel_epoch_{}'.format(t)), my_model.emb_1.weight.cpu().data.numpy()) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() kmerembs_batch = my_model.get_embeddings(inputs_k, inputs_s)[0] kmerembs = kmerembs_batch[:int(kmerembs_batch.shape[0] / opt.nb_patient)] np.save(f'{exp_dir}/kmer_embs/kmer_embs_batch_{no_b}', kmerembs.cpu().data.numpy()) #print ("Saving the model...") monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir)
def main(argv=None): opt = parse_args(argv) # TODO: set the seed seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) if opt.cache == 0: opt.cache = random.getrandbits(128) exp_dir = opt.load_folder if exp_dir is None: # we create a new folder if we don't load. exp_dir = monitoring.create_experiment_folder(opt) # creating the dataset print("Getting the dataset...") if not 'cached_dataset' in os.listdir('.'): os.mkdir('cached_dataset') dataset = datasets.get_dataset(opt, exp_dir) # Creating a model print("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint( exp_dir, opt, dataset.dataset.input_size(), ) # Training optimizer and stuff criterion = torch.nn.BCELoss() if not opt.cpu: print("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) loss_dict = {} loss_dict['train_losses'] = [] valid_list = np.load('/u/trofimov/Emerson/processed_data/valid_list.npy') loss_dict['valid_losses'] = [] # The training. print("Start training.") #monitoring and predictions for t in range(epoch, opt.epoch): loss_dict = monitoring.update_loss_dict(loss_dict, start=True) for no_b, mini in enumerate(dataset): inputs_x, targets = mini[0], mini[1] inputs_x = Variable(inputs_x, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() if not opt.cpu: inputs_x = inputs_x.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) y_pred = my_model(inputs_x) loss = criterion(y_pred, targets) loss_save = loss.data.cpu().numpy().reshape(1, )[0] if no_b in valid_list: loss_dict['valid_losses_epoch'].append(loss_save) print( f"Validation error {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}" ) else: loss_dict['train_losses_epoch'].append(loss_save) print( f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss_save}" ) optimizer.zero_grad() loss.backward() optimizer.step() validation_scores = loss_dict['valid_losses_epoch'] monitoring.save_checkpoint(my_model, optimizer, t, opt, exp_dir) monitoring.update_loss_dict(loss_dict, start=False) monitoring.save_loss(loss_dict, exp_dir) if t % opt.plot_frequency == 0: monitoring.plot_training_curve(exp_dir, loss_dict)
def impute(argv=None): opt = parse_args(argv) seed = opt.seed torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) exp_dir = opt.load_folder new_save_dir = monitoring.create_experiment_folder(opt) if exp_dir is None: print ("Experiment doesn't exist!") else: # creating the dataset print ("Getting the dataset...") dataset = datasets.get_dataset(opt,exp_dir) old_nb_patients = dataset.dataset.nb_patient old_data_file = opt.data_file new_data_file = opt.new_data_file imputation_list = opt.imputation_list nb_shuffles = opt.nb_shuffles # Creating a model print ("Getting the model...") my_model, optimizer, epoch, opt = monitoring.load_checkpoint(exp_dir,opt,dataset.dataset.input_size(),impute=True) old_model_size = dataset.dataset.input_size() ### Making sure updates are only on the patient embedding layer #my_model.freeze_all() #optimizer = torch.optim.RMSprop(my_model.emb_2.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) ### Replacing the first embeddings as the new number of patients to predict my_model.emb_2.weight[:dataset.dataset.nb_patient,:] = Variable(torch.FloatTensor(np.zeros((dataset.dataset.nb_patient,2))), requires_grad=False).float() # Training optimizer and stuff criterion = torch.nn.MSELoss() if not opt.cpu: print ("Putting the model on gpu...") my_model.cuda(opt.gpu_selection) # The training. print ("Start training.") for nb_genes in imputation_list: print (f'Imputation with {nb_genes} genes given...') for shuffle in range(nb_shuffles): opt.data_file = new_data_file print ("Re-getting the dataset...") dataset = datasets.get_dataset(opt,exp_dir, masked = nb_genes) new_embs = np.zeros((dataset.dataset.nb_patient,2)) #monitoring and predictions predictions=np.zeros((dataset.dataset.nb_patient,my_model.emb_1.weight.shape[0])) indices_patients = np.arange(predictions.shape[0]) indices_genes = np.arange(predictions.shape[1]) xdata = np.transpose([np.tile(indices_genes, len(indices_patients)), np.repeat(indices_patients, len(indices_genes))]) progress_bar_modulo = len(dataset)/100 for t in range(25): my_model, optimizer, epoch, opt = monitoring.load_checkpoint(exp_dir,opt,old_model_size,impute=True) temp_embs = my_model.emb_2.weight.cpu().data.numpy() temp_embs[:dataset.dataset.nb_patient,:] = new_embs my_model.emb_2.weight.data = my_model.emb_2.weight.data.copy_(torch.from_numpy(temp_embs)) #my_model.emb_2.weight[:dataset.dataset.nb_patient,:] = Variable(torch.FloatTensor(new_embs),requires_grad=False).float() start_timer = time.time() #if opt.save_error: #outfname_g = f'shuffle_{shuffle}_{nb_genes}_genes_epoch_{t}_prediction_genes.npy' #outfname_g = os.path.join(new_exp_dir,outfname_g) #outfname_t = f'shuffle_{shuffle}_{nb_genes}_genes_epoch_{t}_prediction_tissue.npy' #outfname_t = os.path.join(new_exp_dir,outfname_t) #train_trace = np.zeros((dataset.dataset.nb_gene, dataset.dataset.nb_patient)) for no_b, mini in enumerate(dataset): inputs, targets = mini[0], mini[1] inputs = Variable(inputs, requires_grad=False).float() targets = Variable(targets, requires_grad=False).float() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) targets = targets.cuda(opt.gpu_selection) # Forward pass: Compute predicted y by passing x to the model y_pred = my_model(inputs).float() #if opt.save_error: ## Log the predicted values per sample and per gene (S.L. validation) #batch_inputs = mini[0].numpy() #predicted_values = y_pred.data.cpu().numpy() #train_trace[batch_inputs[:,0],batch_inputs[:,1]] = predicted_values[:,0] #import pdb; pdb.set_trace() targets = torch.reshape(targets,(targets.shape[0],1)) # Compute and print loss loss = criterion(y_pred, targets) if no_b % 5 == 0: print (f"Doing epoch {t},examples{no_b}/{len(dataset)}.Loss:{loss.data.cpu().numpy().reshape(1,)[0]}") # Saving the emb np.save(os.path.join(new_save_dir, 'pixel_epoch_{}'.format(t)),my_model.emb_1.weight.cpu().data.numpy() ) np.save(os.path.join(new_save_dir,'digit_epoch_{}'.format(t)),my_model.emb_2.weight.cpu().data.numpy()) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() #import pdb; pdb.set_trace() loss.backward() optimizer.step() new_embs = my_model.emb_2.weight.cpu().data.numpy()[:dataset.dataset.nb_patient,:] #my_model.generate_datapoint([0,0], opt.gpu_selection) for i in range(0,xdata.shape[0],1000): #import pdb; pdb.set_trace() inputs = torch.FloatTensor(xdata[i:i+1000,:]) inputs = Variable(inputs, requires_grad=False).float() if not opt.cpu: inputs = inputs.cuda(opt.gpu_selection) y_pred = my_model(inputs).float() predictions[inputs.data.cpu().numpy()[:,1].astype('int32'),inputs.data.cpu().numpy()[:,0].astype('int32')] = y_pred.data.cpu().numpy()[:,0] outfname_pred = f'shuffle_{shuffle}_{nb_genes}_genes_prediction.npy' outfname_pred = os.path.join(new_save_dir,outfname_pred) np.save(outfname_pred, predictions)