Exemple #1
0
def main():

    parser = argparse.ArgumentParser(description='Prediction of TCR binding to peptide-MHC complexes')

    parser.add_argument('--infile', type=str,
                        help='input file for training')
    parser.add_argument('--indepfile', type=str, default=None,
                        help='independent test file')
    parser.add_argument('--blosum', type=str, default='data/BLOSUM50',
                        help='file with BLOSUM matrix')
    parser.add_argument('--batch_size', type=int, default=50, metavar='N',
                        help='batch size')
    parser.add_argument('--model_name', type=str, default='original.ckpt',
                        help = 'if train is True, model name to be saved, otherwise model name to be loaded')
    parser.add_argument('--epoch', type = int, default=200, metavar='N',
                        help='number of epoch to train')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate')
    parser.add_argument('--cuda', type = str2bool, default=True,
                        help = 'enable cuda')
    parser.add_argument('--seed', type=int, default=7405,
                        help='random seed')
    parser.add_argument('--mode', default = 'train', type=str,
                        help = 'train or test')
    parser.add_argument('--model', type=str, default='cnn',
                        help='cnn, resnet')
    
    args = parser.parse_args()

    if args.mode is 'test':
        assert args.indepfile is not None, '--indepfile is missing!'
        
    ## cuda
    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    args.cuda = (args.cuda and torch.cuda.is_available()) 
    device = torch.device('cuda' if args.cuda else 'cpu')

    ## set random seed
    seed = args.seed
    torch.manual_seed(seed)
    if args.cuda:
        torch.cuda.manual_seed(seed) if args.cuda else None

    # embedding matrix
    embedding = load_embedding(args.blosum)
      
    ## read data
    X_pep, X_tcr, y = data_io_tf.read_pTCR(args.infile)
    y = np.array(y)

    n_total = len(y)
    n_train = int(round(n_total * 0.8))
    n_valid = int(round(n_total * 0.1))
    n_test = n_total - n_train - n_valid
    idx_shuffled = np.arange(n_total); np.random.shuffle(idx_shuffled)
    idx_train, idx_valid, idx_test = idx_shuffled[:n_train], \
                                     idx_shuffled[n_train:(n_train+n_valid)], \
                                     idx_shuffled[(n_train+n_valid):]

    ## define dataloader
    train_loader = define_dataloader(X_pep[idx_train], X_tcr[idx_train], y[idx_train], None,
                                     None, None,
                                     batch_size=args.batch_size, device=device)
    valid_loader = define_dataloader(X_pep[idx_valid], X_tcr[idx_valid], y[idx_valid], None,
                                     maxlen_pep=train_loader['pep_length'],
                                     maxlen_tcr=train_loader['tcr_length'],
                                     batch_size=args.batch_size, device=device)
    test_loader = define_dataloader(X_pep[idx_test], X_tcr[idx_test], y[idx_test], None,
                                    maxlen_pep=train_loader['pep_length'],
                                    maxlen_tcr=train_loader['tcr_length'],
                                    batch_size=args.batch_size, device=device)
        
    ## read indep data
    if args.indepfile is not None:
        X_indep_pep, X_indep_tcr, y_indep = data_io_tf.read_pTCR(args.indepfile)
        y_indep = np.array(y_indep)
        indep_loader = define_dataloader(X_indep_pep, X_indep_tcr, y_indep, None,
                                         maxlen_pep=train_loader['pep_length'],
                                         maxlen_tcr=train_loader['tcr_length'],
                                         batch_size=args.batch_size, device=device)

    if args.model == 'cnn':
        
        from cnn import Net
        
    #if args.model == 'resnet':
    #
    #    from resnet import Net
    #    Net = models.resnet18
        
    else:
        raise ValueError('unknown model name')
    
    ## define model
    model = Net(embedding, train_loader['pep_length'], train_loader['tcr_length']).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if 'models' not in os.listdir('.'):
        os.mkdir('models')
    if 'result' not in os.listdir('.'):
        os.mkdir('result')

    ## fit model        
    if args.mode == 'train' : 
            
        model_name = check_model_name(args.model_name)
        model_name = check_model_name(model_name, './models')
        model_name = args.model_name

        wf_open = open('result/'+os.path.splitext(os.path.basename(args.infile))[0]+'_'+os.path.splitext(os.path.basename(args.model_name))[0]+'_valid.csv', 'w')
        wf_colnames = ['loss', 'accuracy',
                       'precision1', 'precision0',
                       'recall1', 'recall0',
                       'f1macro','f1micro', 'auc']
        wf = csv.DictWriter(wf_open, wf_colnames, delimiter='\t')

        t0 = time.time()
        for epoch in range(1, args.epoch + 1):
            
            train(args, model, device, train_loader['loader'], optimizer, epoch)

            ## evaluate performance
            perf_train = get_performance_batchiter(train_loader['loader'], model, device)
            perf_valid = get_performance_batchiter(valid_loader['loader'], model, device)

            ## print performance
            print('Epoch {} TimeSince {}\n'.format(epoch, timeSince(t0)))
            print('[TRAIN] {} ----------------'.format(epoch))
            print_performance(perf_train)
            print('[VALID] {} ----------------'.format(epoch))
            print_performance(perf_valid, writeif=True, wf=wf)

        ## evaluate and print test-set performance 
        print('[TEST ] {} ----------------'.format(epoch))
        perf_test = get_performance_batchiter(test_loader['loader'], model, device)
        print_performance(perf_test)

        model_name = './models/' + model_name
        torch.save(model.state_dict(), model_name)
            
    elif args.mode == 'test' : 
        
        model_name = args.model_name

        assert model_name in os.listdir('./models')
        
        model_name = './models/' + model_name
        model.load_state_dict(torch.load(model_name))

        ## evaluate and print independent-test-set performance
        print('[INDEP] {} ----------------') 
        perf_indep = get_performance_batchiter(indep_loader['loader'], model, device)
        print_performance(perf_indep)

        ## write blackbox output
        wf_bb_open = open('data/testblackboxpred_' + os.path.basename(args.indepfile), 'w')
        wf_bb = csv.writer(wf_bb_open, delimiter='\t')
        write_blackbox_output_batchiter(indep_loader, model, wf_bb, device)

        wf_bb_open1 = open('data/testblackboxpredscore_' + os.path.basename(args.indepfile), 'w')
        wf_bb1 = csv.writer(wf_bb_open1, delimiter='\t')
        write_blackbox_output_batchiter(indep_loader, model, wf_bb1, device, ifscore=True)
        
    else :
        
        print('\nError: "--mode train" or "--mode test" expected')
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers=0)
testLoader = DataLoader(testDataset, batch_size=1, shuffle=True, num_workers=0)

model = Net()
critirion = torch.nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
print(model)
epoch = 1000

model.cuda()
model, optimizer = loadModel(model, optimizer)

#train(epoch=epoch, train_loader=trainLoader, optimizer= optimizer, critirion=critirion, model=model, testLoader= testLoader, batch_size= batch_size)
torch.save(model.state_dict(), 'SavedModels/pull_model_saved')
#test(model=model, testLoader=testLoader, batch_size=batch_size, im_num=12)

#model = loadModel(model)

#weights = model.conv2.weight.data.numpy()
#feature_visualization(weights=weights, image=getImage(iter(testLoader).next()['image'][0]), depth =32)

### Test


def read_image(dir):
    transform = transforms.Compose(
        [Rescale(224), RandomCrop(223),
         Normalize(), ToTensor()])
Exemple #3
0
	npimg = img.numpy()
	plt.imshow(np.transpose(npimg, (1,2,0)))
	plt.show()

dataiter = iter(trainloader)
images, labels = dataiter.next()

net = Net()

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):
	running_loss = 0.0
	for i, data in enumerate(trainloader, 0):
		inputs, labels = data
		optimizer.zero_grad()
		outputs = net(inputs)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()

		running_loss += loss.item()
		if i % 2000 == 1999:
			print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 2000))
			running_loss = 0.0
print('Finished training')

torch.save(net.state_dict(), PATH)
Exemple #4
0
        for epoch in range(0, num_epochs):
            train_val = model.backprop(img, nrg, loss, optimizer)        
            # appending loss values for training dataset
            obj_vals.append(train_val)

            if not ((epoch + 1) % disp_epochs):
                print("file name:{}".format(f) +
                      "\tEpoch [{}/{}]".format(epoch+1, num_epochs) +
                      "\tTraining Loss: {:.5f}".format(train_val))

    print("Training Finished")
    
    pt = files[0][files[0].index("_"):files[0].index("_")+4]
    tsc = files[0][(files[0].index(" ")+1)+files[0][files[0].index(" ")+1:].index(" ")+1:files[0].index("]")]
    print("Saving Model")
    torch.save(model.state_dict(), moddir +
               "model_{}{}".format(pt,tsc))

    print("Starting Testing")
    out_nrgs, test_val = model.test(t_img, t_nrg, loss)
    print("Testing Finished")

    out_nrgs = convertToNumpy(out_nrgs, enableCuda)

    plt.plot(np.linspace(100, 400),
             np.linspace(100, 400), color="black")
    plt.scatter(np.sort(t_nrg)*1000, np.sort(out_nrgs)*1000, s=0.5, c='#FF0000')
    plt.xlim((100,400))
    plt.ylim((100,400))  
    plt.savefig(outdir + "InitialE.png")
    plt.close()