def main(): parser = argparse.ArgumentParser(description='Prediction of TCR binding to peptide-MHC complexes') parser.add_argument('--infile', type=str, help='input file for training') parser.add_argument('--indepfile', type=str, default=None, help='independent test file') parser.add_argument('--blosum', type=str, default='data/BLOSUM50', help='file with BLOSUM matrix') parser.add_argument('--batch_size', type=int, default=50, metavar='N', help='batch size') parser.add_argument('--model_name', type=str, default='original.ckpt', help = 'if train is True, model name to be saved, otherwise model name to be loaded') parser.add_argument('--epoch', type = int, default=200, metavar='N', help='number of epoch to train') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate') parser.add_argument('--cuda', type = str2bool, default=True, help = 'enable cuda') parser.add_argument('--seed', type=int, default=7405, help='random seed') parser.add_argument('--mode', default = 'train', type=str, help = 'train or test') parser.add_argument('--model', type=str, default='cnn', help='cnn, resnet') args = parser.parse_args() if args.mode is 'test': assert args.indepfile is not None, '--indepfile is missing!' ## cuda if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") args.cuda = (args.cuda and torch.cuda.is_available()) device = torch.device('cuda' if args.cuda else 'cpu') ## set random seed seed = args.seed torch.manual_seed(seed) if args.cuda: torch.cuda.manual_seed(seed) if args.cuda else None # embedding matrix embedding = load_embedding(args.blosum) ## read data X_pep, X_tcr, y = data_io_tf.read_pTCR(args.infile) y = np.array(y) n_total = len(y) n_train = int(round(n_total * 0.8)) n_valid = int(round(n_total * 0.1)) n_test = n_total - n_train - n_valid idx_shuffled = np.arange(n_total); np.random.shuffle(idx_shuffled) idx_train, idx_valid, idx_test = idx_shuffled[:n_train], \ idx_shuffled[n_train:(n_train+n_valid)], \ idx_shuffled[(n_train+n_valid):] ## define dataloader train_loader = define_dataloader(X_pep[idx_train], X_tcr[idx_train], y[idx_train], None, None, None, batch_size=args.batch_size, device=device) valid_loader = define_dataloader(X_pep[idx_valid], X_tcr[idx_valid], y[idx_valid], None, maxlen_pep=train_loader['pep_length'], maxlen_tcr=train_loader['tcr_length'], batch_size=args.batch_size, device=device) test_loader = define_dataloader(X_pep[idx_test], X_tcr[idx_test], y[idx_test], None, maxlen_pep=train_loader['pep_length'], maxlen_tcr=train_loader['tcr_length'], batch_size=args.batch_size, device=device) ## read indep data if args.indepfile is not None: X_indep_pep, X_indep_tcr, y_indep = data_io_tf.read_pTCR(args.indepfile) y_indep = np.array(y_indep) indep_loader = define_dataloader(X_indep_pep, X_indep_tcr, y_indep, None, maxlen_pep=train_loader['pep_length'], maxlen_tcr=train_loader['tcr_length'], batch_size=args.batch_size, device=device) if args.model == 'cnn': from cnn import Net #if args.model == 'resnet': # # from resnet import Net # Net = models.resnet18 else: raise ValueError('unknown model name') ## define model model = Net(embedding, train_loader['pep_length'], train_loader['tcr_length']).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) if 'models' not in os.listdir('.'): os.mkdir('models') if 'result' not in os.listdir('.'): os.mkdir('result') ## fit model if args.mode == 'train' : model_name = check_model_name(args.model_name) model_name = check_model_name(model_name, './models') model_name = args.model_name wf_open = open('result/'+os.path.splitext(os.path.basename(args.infile))[0]+'_'+os.path.splitext(os.path.basename(args.model_name))[0]+'_valid.csv', 'w') wf_colnames = ['loss', 'accuracy', 'precision1', 'precision0', 'recall1', 'recall0', 'f1macro','f1micro', 'auc'] wf = csv.DictWriter(wf_open, wf_colnames, delimiter='\t') t0 = time.time() for epoch in range(1, args.epoch + 1): train(args, model, device, train_loader['loader'], optimizer, epoch) ## evaluate performance perf_train = get_performance_batchiter(train_loader['loader'], model, device) perf_valid = get_performance_batchiter(valid_loader['loader'], model, device) ## print performance print('Epoch {} TimeSince {}\n'.format(epoch, timeSince(t0))) print('[TRAIN] {} ----------------'.format(epoch)) print_performance(perf_train) print('[VALID] {} ----------------'.format(epoch)) print_performance(perf_valid, writeif=True, wf=wf) ## evaluate and print test-set performance print('[TEST ] {} ----------------'.format(epoch)) perf_test = get_performance_batchiter(test_loader['loader'], model, device) print_performance(perf_test) model_name = './models/' + model_name torch.save(model.state_dict(), model_name) elif args.mode == 'test' : model_name = args.model_name assert model_name in os.listdir('./models') model_name = './models/' + model_name model.load_state_dict(torch.load(model_name)) ## evaluate and print independent-test-set performance print('[INDEP] {} ----------------') perf_indep = get_performance_batchiter(indep_loader['loader'], model, device) print_performance(perf_indep) ## write blackbox output wf_bb_open = open('data/testblackboxpred_' + os.path.basename(args.indepfile), 'w') wf_bb = csv.writer(wf_bb_open, delimiter='\t') write_blackbox_output_batchiter(indep_loader, model, wf_bb, device) wf_bb_open1 = open('data/testblackboxpredscore_' + os.path.basename(args.indepfile), 'w') wf_bb1 = csv.writer(wf_bb_open1, delimiter='\t') write_blackbox_output_batchiter(indep_loader, model, wf_bb1, device, ifscore=True) else : print('\nError: "--mode train" or "--mode test" expected')
batch_size=batch_size, shuffle=True, num_workers=0) testLoader = DataLoader(testDataset, batch_size=1, shuffle=True, num_workers=0) model = Net() critirion = torch.nn.SmoothL1Loss() optimizer = optim.Adam(model.parameters(), lr=0.00001) print(model) epoch = 1000 model.cuda() model, optimizer = loadModel(model, optimizer) #train(epoch=epoch, train_loader=trainLoader, optimizer= optimizer, critirion=critirion, model=model, testLoader= testLoader, batch_size= batch_size) torch.save(model.state_dict(), 'SavedModels/pull_model_saved') #test(model=model, testLoader=testLoader, batch_size=batch_size, im_num=12) #model = loadModel(model) #weights = model.conv2.weight.data.numpy() #feature_visualization(weights=weights, image=getImage(iter(testLoader).next()['image'][0]), depth =32) ### Test def read_image(dir): transform = transforms.Compose( [Rescale(224), RandomCrop(223), Normalize(), ToTensor()])
npimg = img.numpy() plt.imshow(np.transpose(npimg, (1,2,0))) plt.show() dataiter = iter(trainloader) images, labels = dataiter.next() net = Net() # define loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(2): running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 1999: print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 2000)) running_loss = 0.0 print('Finished training') torch.save(net.state_dict(), PATH)
for epoch in range(0, num_epochs): train_val = model.backprop(img, nrg, loss, optimizer) # appending loss values for training dataset obj_vals.append(train_val) if not ((epoch + 1) % disp_epochs): print("file name:{}".format(f) + "\tEpoch [{}/{}]".format(epoch+1, num_epochs) + "\tTraining Loss: {:.5f}".format(train_val)) print("Training Finished") pt = files[0][files[0].index("_"):files[0].index("_")+4] tsc = files[0][(files[0].index(" ")+1)+files[0][files[0].index(" ")+1:].index(" ")+1:files[0].index("]")] print("Saving Model") torch.save(model.state_dict(), moddir + "model_{}{}".format(pt,tsc)) print("Starting Testing") out_nrgs, test_val = model.test(t_img, t_nrg, loss) print("Testing Finished") out_nrgs = convertToNumpy(out_nrgs, enableCuda) plt.plot(np.linspace(100, 400), np.linspace(100, 400), color="black") plt.scatter(np.sort(t_nrg)*1000, np.sort(out_nrgs)*1000, s=0.5, c='#FF0000') plt.xlim((100,400)) plt.ylim((100,400)) plt.savefig(outdir + "InitialE.png") plt.close()