def train(train_files,
          test_files,
          train_batch_size,
          eval_batch_size,
          model_file,
          vocab_size,
          num_classes,
          n_epoch,
          print_every=50,
          eval_every=500):
    torch.multiprocessing.set_sharing_strategy('file_system')
    torch.backends.cudnn.benchmark = True
    print "Setting seed..."
    seed = 1234
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    # setup CNN model
    CONFIG["vocab_size"] = vocab_size
    CONFIG["num_classes"] = num_classes
    model = Net()

    if torch.cuda.is_available():
        print "CUDA is available on this machine. Moving model to GPU..."
        model.cuda()
    print model

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    train_set = HDF5Dataset(train_files)
    test_set = HDF5Dataset(test_files)

    train_loader = DataLoader(dataset=train_set,
                              batch_size=train_batch_size,
                              shuffle=True,
                              num_workers=2)

    test_loader = DataLoader(dataset=test_set,
                             batch_size=eval_batch_size,
                             num_workers=2)

    _train_loop(train_loader=train_loader,
                test_loader=test_loader,
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                n_epoch=n_epoch,
                print_every=print_every,
                eval_every=eval_every,
                model_file=model_file)
Exemple #2
0
    # constructing a model (converting model to double precision)
    model = Net(red_kernel=rKern,
                nonred_kernel=nKern,
                red_stride=rStride,
                nonred_stride=nStride,
                red_out=rOut,
                nonred_out=nOut,
                d="cuda:0" if enableCuda else "cpu")

    model.double()
    if enableCuda:
        model.cuda()

    # declare optimizer and gradient and loss function
    optimizer = optim.Adadelta(model.parameters(), lr=lr_rate)
    loss = torch.nn.MSELoss(reduction='mean')

    print("Loading model")
    model.load_state_dict(torch.load(modelFile))

    print("Starting Testing")
    out_nrgs, test_val = model.test(images, energies, loss)
    print("Testing Finished")

    out_nrgs = convertToNumpy(out_nrgs, enableCuda)
    # scaling energies to mHa
    energies = 1000 * energies
    out_nrgs = 1000 * out_nrgs

    # calculating median absolute error for energies in range (100-400 mHa)
    logger.setLevel(logging.CRITICAL)

for arg in vars(args):
    logger.info("{}: {}".format(arg, getattr(args, arg)))

##
torch.backends.cudnn.enabled = False
momentum = 0.5
learning_rate = 0.01
n_epochs = 3

training_data, testing_data, example_data, example_targets = getData()

# Save
network = Net()
optimizer = optim.SGD(network.parameters(),
                      lr=learning_rate,
                      momentum=momentum)
loss = 0

test(network, testing_data)
for epoch in range(1, n_epochs + 1):
    train(epoch, network, optimizer, loss, training_data)
    test(network, testing_data)

# Load
continued_network = Net()
continued_optimizer = optim.SGD(network.parameters(),
                                lr=learning_rate,
                                momentum=momentum)
continued_loss = 0
    def __execute(self, model: cnn.Net, image_paths: List[str]):
        processed_data = {
            'vanilla': [],
            'deconv': [],
            'gbp': [],
            'gcam': [],
            'ggcam': [],
        }

        device = next(model.parameters()).device
        model.eval()

        images, raw_images = load_images(image_paths, self.input_size)
        images = torch.stack(images).to(device)

        cls_num = len(self.classes)
        save_dir = Path(self.save_dir)
        save_dir.mkdir(parents=True, exist_ok=True)

        # --- Vanilla Backpropagation ---
        bp = BackPropagation(model=model)
        probs, ids = bp.forward(images)  # sorted

        # --- Deconvolution ---
        deconv = None

        if self.is_deconv:
            deconv = Deconvnet(model=model)
            _ = deconv.forward(images)

        # --- Grad-CAM / Guided Backpropagation / Guided Grad-CAM ---
        gcam = None
        gbp = None

        if self.is_gradcam:
            gcam = GradCAM(model=model)
            _ = gcam.forward(images)

            gbp = GuidedBackPropagation(model=model)
            _ = gbp.forward(images)

        # probs = probs.detach().cpu().numpy()  # to numpy
        # ids_np = ids.detach().cpu().numpy()  # to numpy

        pbar = tqdm(range(cls_num),
                    total=cls_num,
                    ncols=100,
                    bar_format='{l_bar}{bar:30}{r_bar}',
                    leave=False)
        pbar.set_description('Grad-CAM')

        for i in pbar:
            if self.is_vanilla:
                bp.backward(ids=ids[:, [i]])
                gradients = bp.generate()

                # Save results as image files
                for j in range(len(images)):
                    # fmt = '%d-{}-%s.png' % (j, self.classes[ids_np[j, i]])
                    # print("\t#{}: {} ({:.5f})".format(j, classes[ids[j, i]], probs[j, i]))

                    # append
                    _grad = get_gradient_data(gradients[j])
                    processed_data['vanilla'].append(_grad)

                    # save as image
                    # _p = save_dir.joinpath(fmt.format('vanilla'))
                    # save_gradient(str(_p), gradients[j])

            if self.is_deconv:
                deconv.backward(ids=ids[:, [i]])
                gradients = deconv.generate()

                for j in range(len(images)):
                    # fmt = '%d-{}-%s.png' % (j, self.classes[ids_np[j, i]])
                    # print("\t#{}: {} ({:.5f})".format(j, classes[ids[j, i]], probs[j, i]))

                    # append
                    _grad = get_gradient_data(gradients[j])
                    processed_data['deconv'].append(_grad)

                    # save as image
                    # _p = save_dir.joinpath(fmt.format('deconvnet'))
                    # save_gradient(str(_p), gradients[j])

            # Grad-CAM / Guided Grad-CAM / Guided Backpropagation
            if self.is_gradcam:
                gbp.backward(ids=ids[:, [i]])
                gradients = gbp.generate()

                # Grad-CAM
                gcam.backward(ids=ids[:, [i]])
                regions = gcam.generate(target_layer=self.target_layer)

                for j in range(len(images)):
                    # fmt = '%d-{}-%s.png' % (j, self.classes[ids_np[j, i]])
                    # print("\t#{}: {} ({:.5f})".format(j, classes[ids[j, i]], probs[j, i]))

                    # append
                    _grad = get_gradient_data(gradients[j])
                    processed_data['gbp'].append(_grad)

                    _grad = get_gradcam_data(regions[j, 0], raw_images[j])
                    processed_data['gcam'].append(_grad)

                    _grad = get_gradient_data(torch.mul(regions, gradients)[j])
                    processed_data['ggcam'].append(_grad)

                    # save as image - Guided Backpropagation
                    # _p = save_dir.joinpath(fmt.format('guided-bp'))
                    # save_gradient(str(_p), gradients[j])

                    # save as image - Grad-CAM
                    # _p = save_dir.joinpath(fmt.format(f'gradcam-{self.target_layer}'))
                    # save_gradcam(str(_p), regions[j, 0], raw_images[j])

                    # save as image - Guided Grad-CAM
                    # _p = save_dir.joinpath(fmt.format(f'guided_gradcam-{self.target_layer}'))
                    # save_gradient(str(_p), torch.mul(regions, gradients)[j])

        # Remove all the hook function in the 'model'
        bp.remove_hook()

        if self.is_deconv:
            deconv.remove_hook()

        if self.is_gradcam:
            gcam.remove_hook()
            gbp.remove_hook()

        return processed_data
def main():
    # load images as a numpy array
    train_dataset = np.array(
        np.load('/content/drive/My Drive/McGill/comp551/data/train_max_x',
                allow_pickle=True))
    train_dataset = train_dataset / 255.0
    train_dataset = train_dataset.astype('float32')
    targets = pd.read_csv(
        '/content/drive/My Drive/McGill/comp551/data/train_max_y.csv',
        delimiter=',',
        skipinitialspace=True)
    targets = targets.to_numpy()
    # remove id column
    targets = targets[:, 1]
    targets = targets.astype(int)

    X_train, X_test, y_train, y_test = train_test_split(train_dataset,
                                                        targets,
                                                        test_size=0.2,
                                                        random_state=42)
    # Clean memory
    train_dataset = None

    # converting training images into torch format
    dim1, dim2, dim3 = X_train.shape
    X_train = X_train.reshape(dim1, 1, dim2, dim3)
    X_train = torch.from_numpy(X_train)
    y_train = torch.from_numpy(y_train)

    # converting validation images into torch format
    dim1, dim2, dim3 = X_test.shape
    X_test = X_test.reshape(dim1, 1, dim2, dim3)
    X_test = torch.from_numpy(X_test)
    y_test = torch.from_numpy(y_test)

    # defining the model
    model = Net()

    criterion = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)

    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
    print(model)

    time0 = time()
    epochs = 1
    for e in range(epochs):
        model.train()
        running_loss = 0

        x_train, y_train = Variable(X_train).cuda(), Variable(y_train).cuda()
        x_val, y_val = Variable(X_test).cuda(), Variable(y_test).cuda()
        # converting the data into GPU format
        # if torch.cuda.is_available():
        #     x_train = x_train.cuda()
        #     y_train = y_train.cuda()
        #     x_val = x_val.cuda()
        #     y_val = y_val.cuda()

        # clearing the Gradients of the model parameters
        optimizer.zero_grad()

        # prediction for training and validation set
        output_train = model(x_train)
        output_val = model(x_val)

        # computing the training and validation loss
        loss_train = criterion(output_train, y_train)
        loss_val = criterion(output_val, y_val)

        # computing the updated weights of all the model parameters
        loss_train.backward()

        # And optimizes its weights here
        optimizer.step()

        running_loss += loss_train.item()
        print("Epoch {} - Training loss: {}".format(
            e, running_loss / len(train_dataset)))

    print("\nTraining Time (in minutes) =", (time() - time0) / 60)

    # prediction for validation set
    with torch.no_grad():
        output = model(x_val.cuda())

    ps = torch.exp(output).cpu()
    probab = list(ps.numpy())
    predictions = np.argmax(probab, axis=1)

    # accuracy on validation set

    print("\nModel Accuracy =", (accuracy_score(y_val, predictions)))
Exemple #6
0
def main():

    parser = argparse.ArgumentParser(description='Prediction of TCR binding to peptide-MHC complexes')

    parser.add_argument('--infile', type=str,
                        help='input file for training')
    parser.add_argument('--indepfile', type=str, default=None,
                        help='independent test file')
    parser.add_argument('--blosum', type=str, default='data/BLOSUM50',
                        help='file with BLOSUM matrix')
    parser.add_argument('--batch_size', type=int, default=50, metavar='N',
                        help='batch size')
    parser.add_argument('--model_name', type=str, default='original.ckpt',
                        help = 'if train is True, model name to be saved, otherwise model name to be loaded')
    parser.add_argument('--epoch', type = int, default=200, metavar='N',
                        help='number of epoch to train')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate')
    parser.add_argument('--cuda', type = str2bool, default=True,
                        help = 'enable cuda')
    parser.add_argument('--seed', type=int, default=7405,
                        help='random seed')
    parser.add_argument('--mode', default = 'train', type=str,
                        help = 'train or test')
    parser.add_argument('--model', type=str, default='cnn',
                        help='cnn, resnet')
    
    args = parser.parse_args()

    if args.mode is 'test':
        assert args.indepfile is not None, '--indepfile is missing!'
        
    ## cuda
    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    args.cuda = (args.cuda and torch.cuda.is_available()) 
    device = torch.device('cuda' if args.cuda else 'cpu')

    ## set random seed
    seed = args.seed
    torch.manual_seed(seed)
    if args.cuda:
        torch.cuda.manual_seed(seed) if args.cuda else None

    # embedding matrix
    embedding = load_embedding(args.blosum)
      
    ## read data
    X_pep, X_tcr, y = data_io_tf.read_pTCR(args.infile)
    y = np.array(y)

    n_total = len(y)
    n_train = int(round(n_total * 0.8))
    n_valid = int(round(n_total * 0.1))
    n_test = n_total - n_train - n_valid
    idx_shuffled = np.arange(n_total); np.random.shuffle(idx_shuffled)
    idx_train, idx_valid, idx_test = idx_shuffled[:n_train], \
                                     idx_shuffled[n_train:(n_train+n_valid)], \
                                     idx_shuffled[(n_train+n_valid):]

    ## define dataloader
    train_loader = define_dataloader(X_pep[idx_train], X_tcr[idx_train], y[idx_train], None,
                                     None, None,
                                     batch_size=args.batch_size, device=device)
    valid_loader = define_dataloader(X_pep[idx_valid], X_tcr[idx_valid], y[idx_valid], None,
                                     maxlen_pep=train_loader['pep_length'],
                                     maxlen_tcr=train_loader['tcr_length'],
                                     batch_size=args.batch_size, device=device)
    test_loader = define_dataloader(X_pep[idx_test], X_tcr[idx_test], y[idx_test], None,
                                    maxlen_pep=train_loader['pep_length'],
                                    maxlen_tcr=train_loader['tcr_length'],
                                    batch_size=args.batch_size, device=device)
        
    ## read indep data
    if args.indepfile is not None:
        X_indep_pep, X_indep_tcr, y_indep = data_io_tf.read_pTCR(args.indepfile)
        y_indep = np.array(y_indep)
        indep_loader = define_dataloader(X_indep_pep, X_indep_tcr, y_indep, None,
                                         maxlen_pep=train_loader['pep_length'],
                                         maxlen_tcr=train_loader['tcr_length'],
                                         batch_size=args.batch_size, device=device)

    if args.model == 'cnn':
        
        from cnn import Net
        
    #if args.model == 'resnet':
    #
    #    from resnet import Net
    #    Net = models.resnet18
        
    else:
        raise ValueError('unknown model name')
    
    ## define model
    model = Net(embedding, train_loader['pep_length'], train_loader['tcr_length']).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if 'models' not in os.listdir('.'):
        os.mkdir('models')
    if 'result' not in os.listdir('.'):
        os.mkdir('result')

    ## fit model        
    if args.mode == 'train' : 
            
        model_name = check_model_name(args.model_name)
        model_name = check_model_name(model_name, './models')
        model_name = args.model_name

        wf_open = open('result/'+os.path.splitext(os.path.basename(args.infile))[0]+'_'+os.path.splitext(os.path.basename(args.model_name))[0]+'_valid.csv', 'w')
        wf_colnames = ['loss', 'accuracy',
                       'precision1', 'precision0',
                       'recall1', 'recall0',
                       'f1macro','f1micro', 'auc']
        wf = csv.DictWriter(wf_open, wf_colnames, delimiter='\t')

        t0 = time.time()
        for epoch in range(1, args.epoch + 1):
            
            train(args, model, device, train_loader['loader'], optimizer, epoch)

            ## evaluate performance
            perf_train = get_performance_batchiter(train_loader['loader'], model, device)
            perf_valid = get_performance_batchiter(valid_loader['loader'], model, device)

            ## print performance
            print('Epoch {} TimeSince {}\n'.format(epoch, timeSince(t0)))
            print('[TRAIN] {} ----------------'.format(epoch))
            print_performance(perf_train)
            print('[VALID] {} ----------------'.format(epoch))
            print_performance(perf_valid, writeif=True, wf=wf)

        ## evaluate and print test-set performance 
        print('[TEST ] {} ----------------'.format(epoch))
        perf_test = get_performance_batchiter(test_loader['loader'], model, device)
        print_performance(perf_test)

        model_name = './models/' + model_name
        torch.save(model.state_dict(), model_name)
            
    elif args.mode == 'test' : 
        
        model_name = args.model_name

        assert model_name in os.listdir('./models')
        
        model_name = './models/' + model_name
        model.load_state_dict(torch.load(model_name))

        ## evaluate and print independent-test-set performance
        print('[INDEP] {} ----------------') 
        perf_indep = get_performance_batchiter(indep_loader['loader'], model, device)
        print_performance(perf_indep)

        ## write blackbox output
        wf_bb_open = open('data/testblackboxpred_' + os.path.basename(args.indepfile), 'w')
        wf_bb = csv.writer(wf_bb_open, delimiter='\t')
        write_blackbox_output_batchiter(indep_loader, model, wf_bb, device)

        wf_bb_open1 = open('data/testblackboxpredscore_' + os.path.basename(args.indepfile), 'w')
        wf_bb1 = csv.writer(wf_bb_open1, delimiter='\t')
        write_blackbox_output_batchiter(indep_loader, model, wf_bb1, device, ifscore=True)
        
    else :
        
        print('\nError: "--mode train" or "--mode test" expected')
Exemple #7
0
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def imshow(img):
	img = img / 2 + 0.5 
	npimg = img.numpy()
	plt.imshow(np.transpose(npimg, (1,2,0)))
	plt.show()

dataiter = iter(trainloader)
images, labels = dataiter.next()

net = Net()

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):
	running_loss = 0.0
	for i, data in enumerate(trainloader, 0):
		inputs, labels = data
		optimizer.zero_grad()
		outputs = net(inputs)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()

		running_loss += loss.item()
		if i % 2000 == 1999:
			print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 2000))
			running_loss = 0.0
sample_x = transform(sample)
display(sample_x['image'], sample_x['keypoints'])

print(sample['keypoints'][0][1])
'''

trainLoader = DataLoader(trainDataset,
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers=0)
testLoader = DataLoader(testDataset, batch_size=1, shuffle=True, num_workers=0)

model = Net()
critirion = torch.nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
print(model)
epoch = 1000

model.cuda()
model, optimizer = loadModel(model, optimizer)

#train(epoch=epoch, train_loader=trainLoader, optimizer= optimizer, critirion=critirion, model=model, testLoader= testLoader, batch_size= batch_size)
torch.save(model.state_dict(), 'SavedModels/pull_model_saved')
#test(model=model, testLoader=testLoader, batch_size=batch_size, im_num=12)

#model = loadModel(model)

#weights = model.conv2.weight.data.numpy()
#feature_visualization(weights=weights, image=getImage(iter(testLoader).next()['image'][0]), depth =32)
Exemple #9
0
def main(model: cnn.Net, classes: List[str], input_size: Tuple[int, int]):

    # print("Mode:", ctx.invoked_subcommand)
    # classes = ['crossing', 'klaxon', 'noise']
    # input_size = (60, 60)

    # model = cnn.Net(input_size)
    device = next(model.parameters()).device
    # model.to(device)
    model.eval()

    image_paths = [
        './recognition_datasets/Images/crossing/crossing-samp1_3_4.jpg',
        './recognition_datasets/Images/crossing/crossing-samp1_3_3.jpg'
    ]
    images, raw_images = load_images(image_paths, input_size)
    images = torch.stack(images).to(device)

    bp = BackPropagation(model=model)
    probs, ids = bp.forward(images)  # sorted
    ids = ids.cpu().numpy()  # numpy

    gcam = GradCAM(model=model)
    _ = gcam.forward(images)

    gbp = GuidedBackPropagation(model=model)
    _ = gbp.forward(images)

    topk = 3
    target_layer = 'conv5'
    output_dir = Path('results')

    output_dir.mkdir(parents=True, exist_ok=True)

    for i in range(topk):
        # Guided Backpropagation
        gbp.backward(ids=ids[:, [i]])
        gradients = gbp.generate()

        # Grad-CAM
        gcam.backward(ids=ids[:, [i]])
        regions = gcam.generate(target_layer=target_layer)

        for j in range(len(images)):
            name_fmt = f'{j}-' + '{}' + f'-{classes[ids[j, i]]}.png'

            print("\t#{}: {} ({:.5f})".format(j, classes[ids[j, i]], probs[j,
                                                                           i]))

            # Guided Backpropagation
            path = output_dir.joinpath(name_fmt.format('guided')).as_posix()
            save_gradient(filename=path, gradient=gradients[j])

            # Grad-CAM
            path = Path(output_dir,
                        name_fmt.format(f'gradcam-{target_layer}')).as_posix()
            save_gradcam(filename=path,
                         gcam=regions[j, 0],
                         raw_image=raw_images[j])

            # Guided Grad-CAM
            path = Path(
                output_dir,
                name_fmt.format(f'guided_gradcam-{target_layer}')).as_posix()
            save_gradient(filename=path,
                          gradient=torch.mul(regions, gradients)[j])