import numpy as np import matplotlib import matplotlib.pyplot as plt import torch.nn as nn import torch.nn.functional as F from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from torchvision.utils import make_grid from torch.utils.data.dataloader import DataLoader from torch.utils.data import random_split # %matplotlib inline # Use a white background for matplotlib figures matplotlib.rcParams['figure.facecolor'] = '#ffffff' dataset = MNIST(root='data/', download=True, transform=ToTensor()) image, label = dataset[0] print('image.shape:', image.shape) plt.imshow(image.permute(1,2), cmap='gray') print('Label:', label) val_size = 10000 train_size = len(dataset) - val_size train_ds, val_ds = random_split(dataset, [train_size, val_size]) len(train_ds), len(val_ds) batch_size = 128 train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True) val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)
import torch from torch import nn from torchvision import datasets from torchvision.transforms import ToTensor from torch.utils.data import DataLoader device = 'cuda' training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor()) test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor()) batch_size = 100 train_dataloader = DataLoader(training_data, batch_size=batch_size) test_dataloader = DataLoader(test_data, batch_size=batch_size) class NeuralNetwork(nn.Module): def __init__(self): super(NeuralNetwork, self).__init__() self.stack = nn.Sequential( nn.Conv2d(1, 24, 3, 2, 1), nn.ReLU(),
def train_hr_transform(crop_size): return Compose([ RandomCrop(crop_size), ToTensor(), ])
def display_transform(): return Compose([ToPILImage(), Resize(400), CenterCrop(400), ToTensor()])
def train_hr_transform(crop_size): return Compose([ CenterCrop(crop_size), # Resize((128,128), interpolation=Image.BICUBIC), ToTensor() ])
help='test low resolution image name') parser.add_argument('--model_path', type=str) opt = parser.parse_args() UPSCALE_FACTOR = opt.upscale_factor TEST_MODE = True if opt.test_mode == 'GPU' else False IMAGE_NAME = opt.image_name model = Generator(UPSCALE_FACTOR).eval() if TEST_MODE: model.cuda() model.load_state_dict(torch.load(f'{opt.model_path}')) else: model.load_state_dict( torch.load(f'{opt.model_path}', map_location=lambda storage, loc: storage)) image = Image.open(IMAGE_NAME) image = Variable(ToTensor()(image), volatile=True).unsqueeze(0) if TEST_MODE: image = image.cuda() start = time.clock() out = model(image) elapsed = (time.clock() - start) print('cost' + str(elapsed) + 's') out_img = ToPILImage()(out[0].data.cpu()) out_path = Path('test_outputs') out_path.mkdir(exist_ok=True, parents=True) out_img.save(str(out_path / Path(opt.image_name).name))
normal_factor = 1. # setting to eval mode i2d.eval() #img = Variable(torch.FloatTensor(1), volatile=True) #if args.cuda: # img = img.cuda() # https://discuss.pytorch.org/t/out-of-memory-error-during-evaluation-but-training-works-fine/12274/3 with torch.no_grad(): with open('D:/DataSets/RGB2Depth/20200602_112100/train_images.txt') as f: for line in f: line = line.rstrip('\n') print('line: {}'.format(line)) img_in = ToTensor()( Image.open(line) ).to(device) print('evaluating...') #img = torch.from_numpy(img_in.transpose(2, 0, 1)).float().to(device) img = img_in img = torch.unsqueeze(img, 0) print('img {}'.format(img.shape)) z_fake = i2d(img) z_fake = F.interpolate(z_fake, size=(img.shape[2],img.shape[3]), mode='bilinear', align_corners=True) # resize new line to reduce the computation time z_fake = torch.squeeze(z_fake, 0) z_fake = torch.squeeze(z_fake, 0) img = torch.squeeze(img, 0) print(z_fake) img_color = img.cpu().numpy().transpose(1, 2, 0)
root_folder = vars(args)['root'] model_url = r'Opacity_DN169_BCE_SGD\Opacity_DN169_BCE_SGD.pth' if vars(args)['model_url'] is not None: model_url = vars(args)['model_url'] csv_hm = r"C:\Users\maest\OneDrive\DTU\Semestre 4\Thesis\Code\CheXNet_aproach\Datase_stratification\PADChest_hm_LRUMDP_opacity.csv" if vars(args)['hm_csv'] is not None: csv_hm = vars(args)['hm_csv'] batch_size = 1 radiographic_findings_opacity = ['opacity'] transforms_test = transforms.Compose([ Resize(512), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) hm_dataset = PadChestDataset(csv_hm, radiographic_findings_opacity, root_folder, transform=transforms_test) hm_loader = torch.utils.data.DataLoader(hm_dataset, batch_size=1) unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) for enum, data in enumerate(hm_loader, 0): print(enum) images, labels = data image_hm = images images = images.cuda()
def main(): parser = argparse.ArgumentParser(description='Dataloader test') parser.add_argument('--gpu', default='0', help='gpu id') parser.add_argument('--workers', default=16, type=int, help='num workers for data loading') parser.add_argument('--nb_epoch', default=100, type=int, help='training epoch') parser.add_argument('--lr', default=1e-4, type=float, help='learning rate') parser.add_argument('--power', default=0, type=float, help='lr poly power; 0 indicates step decay by half') parser.add_argument('--batch_size', default=8, type=int, help='batch size') parser.add_argument('--size', default=256, type=int, help='image size') parser.add_argument( '--anchor_imsize', default=416, type=int, help='scale used to calculate anchors defined in model cfg file') parser.add_argument('--data_root', type=str, default='./ln_data/DMS/', help='path to ReferIt splits data folder') parser.add_argument('--split_root', type=str, default='data', help='location of pre-parsed dataset info') parser.add_argument('--dataset', default='referit', type=str, help='referit/flickr/unc/unc+/gref') parser.add_argument('--time', default=20, type=int, help='maximum time steps (lang length) per batch') parser.add_argument('--emb_size', default=512, type=int, help='fusion module embedding dimensions') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument( '--pretrain', default='', type=str, metavar='PATH', help= 'pretrain support load state_dict that are not identical, while have no loss saved as resume' ) parser.add_argument('--print_freq', '-p', default=2000, type=int, metavar='N', help='print frequency (default: 1e3)') parser.add_argument('--savename', default='default', type=str, help='Name head for saved model') parser.add_argument('--seed', default=13, type=int, help='random seed') parser.add_argument('--bert_model', default='bert-base-uncased', type=str, help='bert model') parser.add_argument('--test', dest='test', default=False, action='store_true', help='test') parser.add_argument('--nflim', default=3, type=int, help='nflim') parser.add_argument('--mstage', dest='mstage', default=False, action='store_true', help='if mstage') parser.add_argument('--mstack', dest='mstack', default=False, action='store_true', help='if mstack') parser.add_argument('--w_div', default=0.125, type=float, help='weight of the diverge loss') parser.add_argument('--fusion', default='prod', type=str, help='prod/cat') parser.add_argument('--tunebert', dest='tunebert', default=False, action='store_true', help='if tunebert') parser.add_argument('--large', dest='large', default=False, action='store_true', help='if large mode: fpn16, convlstm out, size 512') global args, anchors_full args = parser.parse_args() if args.large: args.gsize = 16 args.size = 512 else: args.gsize = 8 print( '----------------------------------------------------------------------' ) print(sys.argv[0]) print(args) print( '----------------------------------------------------------------------' ) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu ## fix seed cudnn.benchmark = False cudnn.deterministic = True random.seed(args.seed) np.random.seed(args.seed + 1) torch.manual_seed(args.seed + 2) torch.cuda.manual_seed_all(args.seed + 3) eps = 1e-10 ## following anchor sizes calculated by kmeans under args.anchor_imsize=416 ## An typo of 'refeit' in original experiments, thus removing the anchor for referit. ## Detailed discussion in "https://github.com/zyang-ur/ReSC/issues/5" # if args.dataset=='refeit': # anchors = '30,36, 78,46, 48,86, 149,79, 82,148, 331,93, 156,207, 381,163, 329,285' # elif args.dataset=='flickr': if args.dataset == 'flickr': anchors = '29,26, 55,58, 137,71, 82,121, 124,205, 204,132, 209,263, 369,169, 352,294' else: anchors = '10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326' anchors = [float(x) for x in anchors.split(',')] anchors_full = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)][::-1] ## save logs if args.savename == 'default': args.savename = 'filmconv_nofpn32_%s_batch%d' % (args.dataset, args.batch_size) if not os.path.exists('./logs'): os.mkdir('logs') logging.basicConfig(level=logging.INFO, filename="./logs/%s" % args.savename, filemode="a+", format="%(asctime)-15s %(levelname)-8s %(message)s") logging.info(str(sys.argv)) logging.info(str(args)) input_transform = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = ReferDataset(data_root=args.data_root, split_root=args.split_root, dataset=args.dataset, split='train', imsize=args.size, transform=input_transform, max_query_len=args.time, augment=True) val_dataset = ReferDataset(data_root=args.data_root, split_root=args.split_root, dataset=args.dataset, split='val', imsize=args.size, transform=input_transform, max_query_len=args.time) ## note certain dataset does not have 'test' set: ## 'unc': {'train', 'val', 'trainval', 'testA', 'testB'} test_dataset = ReferDataset(data_root=args.data_root, split_root=args.split_root, dataset=args.dataset, testmode=True, split='val', imsize=args.size, transform=input_transform, max_query_len=args.time) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, drop_last=True, num_workers=args.workers) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, drop_last=True, num_workers=args.workers) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True, drop_last=True, num_workers=0) ## Model model = grounding_model_multihop(NFilm=args.nflim, fusion=args.fusion, intmd=args.mstack, mstage=args.mstage, \ emb_size=args.emb_size, coordmap=True, convlstm=args.large, \ bert_model=args.bert_model, dataset=args.dataset, tunebert=args.tunebert) model = torch.nn.DataParallel(model).cuda() if args.pretrain: model = load_pretrain(model, args, logging) if args.resume: model = load_resume(model, args, logging) print('Num of parameters:', sum([param.nelement() for param in model.parameters()])) logging.info('Num of parameters:%d' % int(sum([param.nelement() for param in model.parameters()]))) if args.tunebert: visu_param = model.module.visumodel.parameters() text_param = model.module.textmodel.parameters() rest_param = [ param for param in model.parameters() if ((param not in visu_param) and (param not in text_param)) ] visu_param = list(model.module.visumodel.parameters()) text_param = list(model.module.textmodel.parameters()) sum_visu = sum([param.nelement() for param in visu_param]) sum_text = sum([param.nelement() for param in text_param]) sum_fusion = sum([param.nelement() for param in rest_param]) print('visu, text, fusion module parameters:', sum_visu, sum_text, sum_fusion) else: visu_param = model.module.visumodel.parameters() rest_param = [ param for param in model.parameters() if param not in visu_param ] visu_param = list(model.module.visumodel.parameters()) sum_visu = sum([param.nelement() for param in visu_param]) sum_text = sum([ param.nelement() for param in model.module.textmodel.parameters() ]) sum_fusion = sum([param.nelement() for param in rest_param]) - sum_text print('visu, text, fusion module parameters:', sum_visu, sum_text, sum_fusion) ## optimizer; rmsprop default if args.tunebert: optimizer = torch.optim.RMSprop([{ 'params': rest_param }, { 'params': visu_param, 'lr': args.lr / 10. }, { 'params': text_param, 'lr': args.lr / 10. }], lr=args.lr, weight_decay=0.0005) else: optimizer = torch.optim.RMSprop([{ 'params': rest_param }, { 'params': visu_param, 'lr': args.lr / 10. }], lr=args.lr, weight_decay=0.0005) ## training and testing best_accu = -float('Inf') if args.test: _ = test_epoch(test_loader, model) else: for epoch in range(args.nb_epoch): adjust_learning_rate(args, optimizer, epoch) train_epoch(train_loader, model, optimizer, epoch) accu_new = validate_epoch(val_loader, model) ## remember best accu and save checkpoint is_best = accu_new > best_accu best_accu = max(accu_new, best_accu) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_loss': accu_new, 'optimizer': optimizer.state_dict(), }, is_best, args, filename=args.savename) print('\nBest Accu: %f\n' % best_accu) logging.info('\nBest Accu: %f\n' % best_accu)
def main(): parser = argparse.ArgumentParser(description='Visual Question Answering') # Experiment params parser.add_argument('--mode', type=str, help='train or test mode', required=True, choices=['train', 'test']) parser.add_argument('--expt_dir', type=str, help='root directory to save model & summaries', required=True) parser.add_argument('--expt_name', type=str, help='expt_dir/expt_name: organize experiments', required=True) parser.add_argument( '--run_name', type=str, help='expt_dir/expt_name/run_name: organize training runs', required=True) parser.add_argument('--model', type=str, help='VQA model', choices=['baseline', 'attention', 'bert'], required=True) # Data params parser.add_argument('--train_img', type=str, help='path to training images directory', required=True) parser.add_argument('--train_file', type=str, help='training dataset file', required=True) parser.add_argument('--val_img', type=str, help='path to validation images directory') parser.add_argument('--val_file', type=str, help='validation dataset file') parser.add_argument('--num_cls', '-K', type=int_min_two, help='top K answers (labels); min=2', default=1000) # Vocab params parser.add_argument( '--vocab_file', type=str, help='vocabulary pickle file (gen. by prepare_data.py)') # Training params parser.add_argument('--batch_size', '-bs', type=int, help='batch size', default=8) parser.add_argument('--num_epochs', '-ep', type=int, help='number of epochs', default=50) parser.add_argument('--learning_rate', '-lr', type=float, help='initial learning rate', default=1e-4) parser.add_argument('--log_interval', type=int, help='interval size for logging training summaries', default=100) parser.add_argument('--save_interval', type=int, help='save model after `n` weight update steps', default=3000) parser.add_argument('--val_size', type=int, help='validation set size for evaluating accuracy', default=10000) # Evaluation params parser.add_argument('--K_eval', type=int, help='top-K labels during evaluation/inference', default=1000) # Model params parser.add_argument( '--model_ckpt', type=str, help='resume training/perform inference; e.g. model_1000.pth') parser.add_argument('--vgg_wts_path', type=str, help='VGG-11 (bn) pre-trained weights (.pth) file') parser.add_argument('--vgg_train', type=str2bool, help='whether to train the VGG encoder', default='false') # parser.add_argument('--model_config', type=str, help='model config file - specifies model architecture') # GPU params # parser.add_argument('--num_gpus', type=int, help='number of GPUs to use for training', default=1) parser.add_argument('--gpu_id', type=int, help='cuda:gpu_id (0,1,2,..) if num_gpus = 1', default=0) parser.add_argument('--opt_lvl', type=int, help='Automatic-Mixed Precision: opt-level (O_)', default=1, choices=[0, 1, 2, 3]) # Misc params parser.add_argument('--num_workers', type=int, help='number of worker threads for Dataloader', default=1) args = parser.parse_args() device = torch.device( 'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu') print('Selected Device: {}'.format(device)) # torch.cuda.get_device_properties(device).total_memory # in Bytes # Train params n_epochs = args.num_epochs batch_size = args.batch_size lr = args.learning_rate # Load vocab (.pickle) file vocab = load_vocab(args.vocab_file) print('Vocabulary loaded from {}'.format(args.vocab_file)) # Unpack vocab word2idx, idx2word, label2idx, idx2label, max_seq_length = [ v for k, v in vocab.items() ] vocab_size = len(word2idx) # Model Config model_config = setup_model_configs(args, vocab_size) image_size = model_config['image_size'] # TODO: Multi-GPU PyTorch Implementation # if args.num_gpus > 1 and torch.cuda.device_count() > 1: # print("Using {} GPUs!".format(torch.cuda.device_count())) # model = nn.DataParallel(model, device_ids=[0, 1]) # model.to(device) # Train if args.mode == 'train': # Setup train log directory log_dir = os.path.join(args.expt_dir, args.expt_name, args.run_name) if not os.path.exists(log_dir): os.makedirs(log_dir) print('Training Log Directory: {}\n'.format(log_dir)) # TensorBoard summaries setup --> /expt_dir/expt_name/run_name/ writer = SummaryWriter(log_dir) # Train log file log_file = setup_logs_file(parser, log_dir) # Dataset & Dataloader train_dataset = VQADataset(args.train_file, args.train_img, word2idx, label2idx, max_seq_length, transform=Compose([ Resize(image_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) print('Question Vocabulary Size: {} \n\n'.format(vocab_size)) print('Train Data Size: {}'.format(train_dataset.__len__())) # Plot data (image, question, answer) for sanity check # plot_data(train_loader, idx2word, idx2label, num_plots=10) # sys.exit() if args.val_file: # Use the same word-index dicts as that obtained for the training set val_dataset = VQADataset(args.val_file, args.val_img, word2idx, label2idx, max_seq_length, transform=Compose([ Resize(image_size), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers) log_msg = 'Validation Data Size: {}\n'.format( val_dataset.__len__()) log_msg += 'Validation Accuracy is computed using {} samples. See --val_size\n'.format( args.val_size) print_and_log(log_msg, log_file) # Num of classes = K + 1 (for UNKNOWN) num_classes = args.num_cls + 1 # Setup model params question_encoder_params = model_config['question_params'] image_encoder_params = model_config['image_params'] # Define model & load to device VQANet = model_config['model'] model = VQANet(question_encoder_params, image_encoder_params, K=num_classes) model.to(device) # Load model checkpoint file (if specified) from `log_dir` if args.model_ckpt: model_ckpt_path = os.path.join(log_dir, args.model_ckpt) checkpoint = torch.load(model_ckpt_path) model.load_state_dict(checkpoint) log_msg = 'Model successfully loaded from {}'.format( model_ckpt_path) + '\nResuming Training...' print_and_log(log_msg, log_file) # Loss & Optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr) # TODO: StepLR Scheduler # scheduler = StepLR(optimizer, step_size=1, gamma=0.1) model, optimizer = amp.initialize(model, optimizer, opt_level="O{}".format(args.opt_lvl)) steps_per_epoch = len(train_loader) start_time = time() curr_step = 0 # TODO: Save model with best validation accuracy best_val_acc = 0.0 for epoch in range(n_epochs): for batch_data in train_loader: # Load batch data image = batch_data['image'] question = batch_data['question'] ques_len = batch_data['ques_len'] label = batch_data['label'] # Sort batch based on sequence length image, question, label, ques_len = sort_batch( image, question, label, ques_len) # Load data onto the available device image = image.to(device) # [B, C, H, W] question = question.to(device) # [B, L] ques_len = ques_len.to(device) # [B] label = label.to(device) # [B] # Forward Pass label_predict = model(image, question, ques_len) # Compute Loss loss = criterion(label_predict, label) # Backward Pass optimizer.zero_grad() # loss.backward() with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() # Print Results - Loss value & Validation Accuracy if (curr_step + 1) % args.log_interval == 0 or curr_step == 1: # Validation set accuracy if args.val_file: validation_metrics = compute_validation_metrics( model, val_loader, device, size=args.val_size) # Reset the mode to training model.train() log_msg = 'Validation Accuracy: {:.2f} % || Validation Loss: {:.4f}'.format( validation_metrics['accuracy'], validation_metrics['loss']) print_and_log(log_msg, log_file) # If current model has the best accuracy on the validation set & >= training accuracy, # save model to disk # Add summaries to TensorBoard writer.add_scalar('Val/Accuracy', validation_metrics['accuracy'], curr_step) writer.add_scalar('Val/Loss', validation_metrics['loss'], curr_step) # Add summaries to TensorBoard writer.add_scalar('Train/Loss', loss.item(), curr_step) # Compute elapsed & remaining time for training to complete time_elapsed = (time() - start_time) / 3600 # total time = time_per_step * steps_per_epoch * total_epochs total_time = (time_elapsed / curr_step) * steps_per_epoch * n_epochs time_left = total_time - time_elapsed log_msg = 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f} | time elapsed: {:.2f}h | time left: {:.2f}h'.format( epoch + 1, n_epochs, curr_step + 1, steps_per_epoch, loss.item(), time_elapsed, time_left) print_and_log(log_msg, log_file) # Save the model if (curr_step + 1) % args.save_interval == 0: print('Saving the model at the {} step to directory:{}'. format(curr_step + 1, log_dir)) save_path = os.path.join( log_dir, 'model_' + str(curr_step + 1) + '.pth') torch.save(model.state_dict(), save_path) curr_step += 1 # Validation set accuracy on the entire set if args.val_file: # Total validation set size total_validation_size = val_dataset.__len__() validation_metrics = compute_validation_metrics( model, val_loader, device, total_validation_size) log_msg = '\nAfter {} epoch:\n'.format(epoch + 1) log_msg += 'Validation Accuracy: {:.2f} % || Validation Loss: {:.4f}\n'.format( validation_metrics['accuracy'], validation_metrics['loss']) print_and_log(log_msg, log_file) # Reset the mode to training model.train() writer.close() log_file.close() # TODO: Test/Inference elif args.mode == 'test': raise NotImplementedError('TODO: test mode')
downsample=opt.downsample) elif opt.model_type == 'rbf' or opt.model_type == 'nerf': model = modules.SingleBVPNet(type='relu', mode=opt.model_type, out_features=img_dataset.img_channels, sidelength=image_resolution, downsample=opt.downsample) else: raise NotImplementedError model.cuda() root_path = os.path.join(opt.logging_root, opt.experiment_name) if opt.mask_path: mask = Image.open(opt.mask_path) mask = ToTensor()(mask) mask = mask.float().cuda() percentage = torch.sum(mask).cpu().numpy() / np.prod(mask.shape) print("mask sparsity %f" % (percentage)) else: mask = torch.rand(image_resolution) < opt.sparsity mask = mask.float().cuda() # Define the loss if opt.prior is None: loss_fn = partial(loss_functions.image_mse, mask.view(-1, 1)) elif opt.prior == 'TV': loss_fn = partial(loss_functions.image_mse_TV_prior, mask.view(-1, 1), opt.k1, model) elif opt.prior == 'FH': loss_fn = partial(loss_functions.image_mse_FH_prior, mask.view(-1, 1),
def main(): # ---------- LOAD DATASET AND FILE SELECTION ---------------------------------------------------------------------- start = time.time() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.cuda.empty_cache() print(device) # file_name_extension = 'Rotation_centered_im1' file_name_extension = 'Rotation_Translation_im1' # file_name_extension = 'Translation_im3' # choose the corresponding database to use cubes_file = 'Npydatabase/wrist_{}.npy'.format(file_name_extension) silhouettes_file = 'Npydatabase/sils_{}.npy'.format(file_name_extension) parameters_file = 'Npydatabase/params_{}.npy'.format(file_name_extension) wrist = np.load(cubes_file) sils = np.load(silhouettes_file) params = np.load(parameters_file) train_im = wrist # 90% training train_sil = sils train_param = params normalize = Normalize(mean=[0.5], std=[0.5]) transforms = Compose([ToTensor(), normalize]) train_dataset = CubeDataset(train_im, train_sil, train_param, transforms) train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1) # # check to iterate inside the test dataloader # for image, sil, param in train_dataloader: # # # print(image[2]) # print(image.size(), param.size()) #torch.Size([batch, 3, 512, 512]) torch.Size([batch, 6]) # im =0 # print(param[im]) # parameter in form tensor([2.5508, 0.0000, 0.0000, 0.0000, 0.0000, 5.0000]) # # image2show = image[im] # indexing random one image # print(image2show.size()) #torch.Size([3, 512, 512]) # plt.imshow((image2show * 0.5 + 0.5).numpy().transpose(1, 2, 0)) # plt.show() # break # break here just to show 1 batch of data count = 0 losses = [] a = [] b = [] c = [] tx = [] ty = [] tz = [] #ground value to be plotted on the graph as line alpha_GT = np.array( m.degrees(params[0,0])) beta_GT = np.array(m.degrees(params[0,1])) gamma_GT = np.array(m.degrees(params[0,2]))#angle in degrer tx_GT = np.array(params[0,3]) ty_GT = np.array(params[0,4]) tz_GT = np.array(params[0,5]) iterations = 100 # ---------- MODEL CREATION ---------------------------------------------------------------------- parser = argparse.ArgumentParser() parser.add_argument('-io', '--filename_obj', type=str, default=os.path.join(data_dir, 'wrist.obj')) parser.add_argument('-or', '--filename_output', type=str, default=os.path.join(result_dir, '{}_regression_animation_6params.gif'.format(file_name_extension))) parser.add_argument('-mr', '--make_reference_image', type=int, default=0) parser.add_argument('-g', '--gpu', type=int, default=0) args = parser.parse_args() # resnet50 = models.resnet50(pretrained=True) model = Myresnet50(filename_obj=args.filename_obj) # model = Model(args.filename_obj, args.filename_ref) model.to(device) model.train(True) bool_first = True Lr_start = 0.001 decreaseat = 40 lr = Lr_start loop = tqdm.tqdm(range(iterations)) for i in loop: for image, silhouette, parameter in train_dataloader: image = image.to(device) imgGT = image parameter = parameter.to(device) print(parameter) silhouette = silhouette.to(device) params = model(image) print(params) model.t = params[0,3:6] model.R = R2Rmat(params[0,0:3]) #angle from resnet are in radian bool_first = True # first_ # print(model.t) # print(model.R) # regression between computed and ground truth image = model.renderer(model.vertices, model.faces, R=model.R, t= model.t, mode='silhouettes') optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss = nn.MSELoss()(params, parameter).to(device) if (i % decreaseat == 0 and i > 2): lr = lr / 10 print('update lr, is now {}'.format(lr)) print('loss is {}'.format(loss)) optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.detach().cpu().numpy()) # print(((model.K).detach().cpu().numpy())) cp_x = ((model.t).detach().cpu().numpy())[0] cp_y = ((model.t).detach().cpu().numpy())[1] cp_z = ((model.t).detach().cpu().numpy())[2] cp_rotMat = (model.R) #cp_rotMat = (model.R).detach().cpu().numpy() r = Rot.from_dcm(cp_rotMat.detach().cpu().numpy()) r_euler = r.as_euler('xyz', degrees=True) a.append(r_euler[0, 0]) # a.append(abs(r_euler[0,0] )) b.append(r_euler[0, 1]) c.append(r_euler[0, 2]) cp_a = r_euler[0, 0] cp_b = r_euler[0, 1] cp_c = r_euler[0, 2] tx.append(cp_x) ty.append(cp_y) tz.append(cp_z) #z axis value images, _, _ = model.renderer(model.vertices, model.faces, torch.tanh(model.textures), R=model.R, t=model.t) img = images.detach().cpu().numpy()[0].transpose(1, 2, 0) if (i == iterations - 1): imgGT = imgGT.squeeze() # float32 from 0-1 imgGT = imgGT.detach().cpu() imgGT = (imgGT * 0.5 + 0.5).numpy().transpose(1, 2, 0) # imgGT = (imgGT * 255).astype(np.uint8) # cast from float32 255.0 to 255 uint8 f = plt.subplot(1, 2, 1) plt.imshow(imgGT) f.set_title('Ground truth \n alpha {:.3f}° tx {}\n' 'beta {:.3f}° ty {}\n ' 'gamma {:.3f}° tz {}'.format(alpha_GT, tx_GT, beta_GT, ty_GT, gamma_GT, tz_GT)) plt.xticks([0, 512]) plt.yticks([]) f = plt.subplot(1, 2, 2) plt.imshow(img) f.set_title('Regression \n alpha {:.3f}° tx {:.3f}\n' 'beta {:.3f}° ty {:.3f}\n' 'gamma {:.3f}° tz {:.3f}'.format(cp_a, cp_x, cp_b, cp_y, cp_c, cp_z)) plt.xticks([0, 512]) plt.yticks([]) plt.savefig('results/3_6params_regression/Final_regression_6params_{}iterations_{}.png'.format(iterations, file_name_extension), bbox_inches='tight', pad_inches=0.05) imsave('/tmp/_tmp_%04d.png' % i, img) loop.set_description('Optimizing (loss %.4f)' % loss.data) count = count + 1 end = time.time() exectime = round((end - start), 2) # format in minute print('time elapsed is: {} sec'.format(exectime)) #----------PLOT SECTION ------------------------------------------------------------------------ make_gif(args.filename_output) fig, (p1, p2, p3) = plt.subplots(3, figsize=(15,10)) #largeur hauteur fig.suptitle("Regression for 1 image, {} epochs in {} sec, rotation and translation, 6 parameters \n lr={} and decrease each {} iterations".format(iterations,exectime, Lr_start, decreaseat), fontsize=14) p1.plot(np.arange(count), losses, label="Global Loss") p1.set( ylabel='BCE Loss') p1.set_yscale('log') p1.set_ylim([0, 1]) p1.set(xlabel='Iterations') # Place a legend to the right of this smaller subplot. p1.legend() p2.plot(np.arange(count), tx, label="x values", color = 'g' ) p2.axhline(y=tx_GT, color = 'g', linestyle= '--' ) p2.plot(np.arange(count), ty, label="y values", color = 'y') p2.axhline(y=ty_GT, color = 'y', linestyle= '--' ) p2.plot(np.arange(count), tz, label="z values", color = 'b') p2.axhline(y=tz_GT, color = 'b', linestyle= '--' ) p2.set(ylabel='Translation value') p2.set_ylim([-5, 10]) p2.set(xlabel='Iterations') p2.legend() p3.plot(np.arange(count), a, label="alpha values", color = 'g') p3.axhline(y=alpha_GT, color = 'g', linestyle= '--' ) p3.plot(np.arange(count), b, label="beta values", color = 'y') p3.axhline(y=beta_GT, color = 'y', linestyle= '--') p3.plot(np.arange(count), c, label="gamma values", color = 'b') p3.axhline(y=gamma_GT, color = 'b', linestyle= '--' ) p3.set(xlabel='iterations', ylabel='Rotation value') p3.set_ylim([-180, 180]) p3.legend() fig.savefig('results/3_6params_regression/regression_1image_6params_{}.pdf'.format(file_name_extension), bbox_inches = 'tight', pad_inches = 0.05) fig.savefig('results/3_6params_regression/regression_1image_6params_{}.png'.format(file_name_extension), bbox_inches = 'tight', pad_inches = 0.05) matplotlib2tikz.save("results/3_6params_regression/regression_1image_6params_{}.tex".format(file_name_extension),figureheight='5.5cm', figurewidth='15cm') plt.show()
def __init__(self, root=expanduser("~") + "/.avalanche/data/core50/", train=True, transform=ToTensor(), target_transform=None, loader=pil_loader, download=True, object_level=True): """ :param root: root for the datasets data. :param train: train or test split. :param transform: eventual transformations to be applied. :param target_transform: eventual transformation to be applied to the targets. :param loader: data loader method from disk. :param download: boolean to automatically download data. Default to True. :param object_level: if the classification is objects based or category based: 50 or 10 way classification problem. Default to True (50-way object classification problem) """ self.train = train # training set or test set self.transform = transform self.target_transform = target_transform self.root = root self.loader = loader self.object_level = object_level self.log = logging.getLogger("avalanche") # any scenario and run is good here since we want just to load the # train images and targets with no particular order scen = 'ni' run = 0 nbatch = 8 if download: self.core_data = CORE50_DATA(data_folder=root) self.log.info("Loading paths...") with open(os.path.join(root, 'paths.pkl'), 'rb') as f: self.train_test_paths = pkl.load(f) self.log.info("Loading labels...") with open(os.path.join(root, 'labels.pkl'), 'rb') as f: self.all_targets = pkl.load(f) self.train_test_targets = [] for i in range(nbatch + 1): self.train_test_targets += self.all_targets[scen][run][i] self.log.info("Loading LUP...") with open(os.path.join(root, 'LUP.pkl'), 'rb') as f: self.LUP = pkl.load(f) self.log.info("Loading labels names...") with open(os.path.join(root, 'labels2names.pkl'), 'rb') as f: self.labels2names = pkl.load(f) self.idx_list = [] if train: for i in range(nbatch + 1): self.idx_list += self.LUP[scen][run][i] else: self.idx_list = self.LUP[scen][run][-1] self.paths = [] self.targets = [] for idx in self.idx_list: self.paths.append(self.train_test_paths[idx]) div = 1 if not self.object_level: div = 5 self.targets.append(self.train_test_targets[idx] // div)
def resize(x, target_shape): x = ToPILImage()(x.cpu().to(torch.float32)) x = Resize(target_shape)(x) x = ToTensor()(x) return x.cuda()
import torch from PIL.Image import Image from torch import Tensor from torchvision.datasets import MNIST from torchvision.transforms import ToTensor, ToPILImage, Compose, Normalize, \ RandomRotation import numpy as np from avalanche.benchmarks import NCScenario, nc_benchmark from avalanche.benchmarks.classic.classic_benchmarks_utils import \ check_vision_benchmark from avalanche.benchmarks.datasets import default_dataset_location from avalanche.benchmarks.utils import AvalancheDataset _default_mnist_train_transform = Compose( [ToTensor(), Normalize((0.1307, ), (0.3081, ))]) _default_mnist_eval_transform = Compose( [ToTensor(), Normalize((0.1307, ), (0.3081, ))]) class PixelsPermutation(object): """ Apply a fixed permutation to the pixels of the given image. Works with both Tensors and PIL images. Returns an object of the same type of the input element. """ def __init__(self, index_permutation: Sequence[int]): self.permutation = index_permutation self._to_tensor = ToTensor()
else: torch.nn.utils.clip_grad_norm_(params, 5.0) if __name__ == "__main__": opt = parse_options( "CIFAR10 EBM using RSM in flex.", path="/g/korbel/mjendrusch/runs/experimental/cifar10-rsm-exp-14-VP", device="cuda:0", batch_size=128, max_epochs=1000, report_interval=1000, checkpoint_interval=50000, ) cifar10 = CIFAR10("examples/", download=False, transform=ToTensor()) data = CIFAR10Dataset(cifar10) data = DataDistribution(data, batch_size=opt.batch_size, device=opt.device) energy = AdaptedUNetEnergy().to(opt.device) training = relaxed_score_matching_training( energy, data, optimizer=torch.optim.Adam, optimizer_kwargs=dict(lr=2e-4), level_weight=scale_level, level_distribution=VPNormalNoise(lambda t: 1e-3 + t * (1.0 - 1e-3)), noise_distribution=VPNormalNoise( lambda t: 1e-2 * (1e-3 + t * (1.0 - 1e-3)) ), # TruncatedNormalNoise(lambda t: 0.01 * torch.ones_like(t))
def __init__(self, index_permutation: Sequence[int]): self.permutation = index_permutation self._to_tensor = ToTensor() self._to_image = ToPILImage()
def train(args): writer = SummaryWriter(log_dir=args.logdir) # Datasets dataset_tr = CUBDataset( root=args.datapath, train=True, transforms=Compose([ Resize(256), RandomCrop((224, 224), pad_if_needed=True), RandomHorizontalFlip(), ToTensor() ]) ) data_loader_tr = DataLoader( dataset_tr, batch_size=args.batch_size, shuffle=True, num_workers=args.number_workers) dataset_val = CUBDataset( root=args.datapath, train=False, transforms=Compose([ CenterCrop(224), ToTensor() ]) ) data_loader_val = DataLoader( dataset_val, batch_size=args.batch_size, shuffle=True, num_workers=args.number_workers) # Model model = BirdNet(num_classes=20).to(args.device) # Optimizer optimizer = Adam( params=model.classifier.parameters(), # Optimize only the classifier layer lr=args.learning_rate, weight_decay=args.weight_decay) # Meters meter_loss = AverageMeter() meter_accuracy = AverageMeter() train_accuracy, train_loss, val_accuracy, val_loss = 0,0,0,0 epoch_bar = tqdm.trange(args.number_epochs, desc='Epoch') for epoch in epoch_bar: epoch_start_time = time() # Training model.train() torch.set_grad_enabled(True) batch_bar = tqdm.tqdm(data_loader_tr, desc='Batch') meter_loss.reset() meter_accuracy.reset() for batch in batch_bar: input_batch = batch[0].to(args.device) target = batch[1].to(args.device) logits = model(input_batch) number_samples = target.shape[0] predictions = logits.argmax(dim=1) accuracy = (predictions == target).float().sum()/number_samples loss = F.cross_entropy(logits, target) meter_accuracy.update(accuracy, number_samples) meter_loss.update(loss, number_samples) optimizer.zero_grad() loss.backward() optimizer.step() # batch_bar.set_postfix({'loss': loss.item()}) train_accuracy, train_loss = meter_accuracy.get_average(), meter_loss.get_average() epoch_bar.set_postfix({"loss": train_loss, "accuracy": train_accuracy}) writer.add_scalar("/train/loss", train_loss, epoch) writer.add_scalar("/train/accuracy", train_accuracy, epoch) # Validation model.eval() torch.set_grad_enabled(False) batch_bar = tqdm.tqdm(data_loader_val, desc='Batch') meter_loss.reset() meter_accuracy.reset() for batch in batch_bar: input_batch = batch[0].to(args.device) target = batch[1].to(args.device) logits = model(input_batch) number_samples = target.shape[0] predictions = logits.argmax(dim=1) accuracy = (predictions == target).float().sum()/number_samples loss = F.cross_entropy(logits, target) meter_accuracy.update(accuracy, number_samples) meter_loss.update(loss, number_samples) val_accuracy, val_loss = meter_accuracy.get_average(), meter_loss.get_average() epoch_time = time()-epoch_start_time epoch_bar.set_postfix({"loss": val_loss, "accuracy": val_accuracy}) writer.add_scalar("/validation/loss", val_loss, epoch) writer.add_scalar("/validation/accuracy", val_accuracy, epoch) writer.add_scalar("time_per_epoch", epoch_time, epoch) torch.save(model.classifier.state_dict(), str(args.logdir / "final_model.pt")) return {"train": {"accuracy": train_accuracy, "loss": train_loss}, "validation": {"accuracy": val_accuracy, "loss": val_loss}}
def target_transform(crop_size): return Compose([ CenterCrop(crop_size), ToTensor(), ])
def __init__(self, mode, roidb_file=VG_SGG_FN, dict_file=VG_SGG_DICT_FN, image_file=IM_DATA_FN, filter_empty_rels=True, num_im=-1, num_val_im=5000, filter_duplicate_rels=True, filter_non_overlap=True, use_proposals=False): """ Torch dataset for VisualGenome :param mode: Must be train, test, or val :param roidb_file: HDF5 containing the GT boxes, classes, and relationships :param dict_file: JSON Contains mapping of classes/relationships to words :param image_file: HDF5 containing image filenames :param filter_empty_rels: True if we filter out images without relationships between boxes. One might want to set this to false if training a detector. :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead :param num_im: Number of images in the entire dataset. -1 for all images. :param num_val_im: Number of images in the validation set (must be less than num_im unless num_im is -1.) :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN proposals """ if mode not in ('test', 'train', 'val'): raise ValueError( "Mode must be in test, train, or val. Supplied {}".format( mode)) self.mode = mode # Initialize self.roidb_file = roidb_file self.dict_file = dict_file self.image_file = image_file self.filter_non_overlap = filter_non_overlap self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train' self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs_one_shot( self.roidb_file, self.mode, num_im, num_val_im=num_val_im, filter_empty_rels=filter_empty_rels, filter_non_overlap=self.filter_non_overlap and self.is_train, ) self.filenames = load_image_filenames(image_file) self.filenames = [ self.filenames[i] for i in np.where(self.split_mask)[0] ] self.ind_to_classes, self.ind_to_predicates = load_info(dict_file) if use_proposals: print("Loading proposals", flush=True) p_h5 = h5py.File(PROPOSAL_FN, 'r') rpn_rois = p_h5['rpn_rois'] rpn_scores = p_h5['rpn_scores'] rpn_im_to_roi_idx = np.array( p_h5['im_to_roi_idx'][self.split_mask]) rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask]) self.rpn_rois = [] for i in range(len(self.filenames)): rpn_i = np.column_stack(( rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] + rpn_num_rois[i]], )) self.rpn_rois.append(rpn_i) else: self.rpn_rois = None # You could add data augmentation here. But we didn't. # tform = [] # if self.is_train: # tform.append(RandomOrder([ # Grayscale(), # Brightness(), # Contrast(), # Sharpness(), # Hue(), # ])) tform = [ SquarePad(), Resize(IM_SCALE), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] self.transform_pipeline = Compose(tform)
def load_data(): train_dataset = FashionMNIST(root='./cache', download=True, train=True, transform=ToTensor()) eval_dataset = FashionMNIST(root='./cache', download=False, train=False, transform=ToTensor()) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) eval_loader = DataLoader(dataset=eval_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) return train_loader, eval_loader
from hbconfig import Config from torch.autograd import Variable from torchvision import transforms from torchvision.transforms import ToTensor from torchsummary import summary import utils from AutoAugment.autoaugment import ImageNetPolicy from basic_utils import saving_config from logger import Logger from miniimagenet_loader import read_dataset_test, _sample_mini_dataset, _mini_batches, _split_train_test, _mini_batches_with_augmentation, AutoEncoder from one_shot_aug.module import PretrainedClassifier, MiniImageNetModel from utils import mkdir_p meta_step_size = 1. # stepsize of outer optimization, i.e., meta-optimization meta_step_size_final = 0. tensor = ToTensor() def augments_dataset(batch, k=5): images = [] # labels=[] for _ in range(k): for img_, label in batch: policy = ImageNetPolicy() transformed = policy(img_) # f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,6)) # ax1.imshow(img_) # ax2.imshow(transformed[0]) # plt.show() tensor = ToTensor() if isinstance(transformed, (list, )):
if __name__ == '__main__': P_TRAIN = 0.8 # proportion of examples to use for training BATCH_SIZE = 32 NUM_WORKERS = 6 NUM_EPOCHS = 50 learning_rate = 0.001 DECAY_RATE = 5 # number of epochs after which to decay the learning rate LR_DECAY = 0.5 # amount to decrease the learning rate every 'DECAY_RATE' epochs CHECKPOINT_RATE = 5 # number of epochs after which to checkpoint the model IMAGE_DIR = '/home/mchobanyan/data/emotion/images/imagenet/' MODEL_DIR = '/home/mchobanyan/data/emotion/models/emotion_detect/imagenet/' dataset = ColorAndGrayImages(image_dir=IMAGE_DIR, colored_transform=Compose([ ToTensor(), Normalize(IMAGENET_MEANS, IMAGENET_STDVS) ]), gray_transform=ToTensor()) print(f'Number of images: {len(dataset)}') train_size = int(len(dataset) * P_TRAIN) val_size = len(dataset) - train_size train_data, val_data = random_split(dataset, [train_size, val_size]) train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True) val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
def train_lr_transform(crop_size, upscale_factor): return Compose([ ToPILImage(), Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC), ToTensor() ])
#!/usr/bin/env python3 import torch from torch import nn from torch.utils.data import DataLoader from torchvision import datasets from torchvision.transforms import ToTensor, Lambda, Compose import matplotlib.pyplot as plt training_data = datasets.FashionMNIST( root="data", train=True, download=True, transform=ToTensor(), ) test_data = datasets.FashionMNIST( root="data", train=False, download=True, transform=ToTensor(), ) batch_size = 64 train_dataloader = DataLoader(training_data, batch_size=batch_size) test_dataloader = DataLoader(test_data, batch_size=batch_size) for X, y in test_dataloader: print("Shape of X [N, C, H, W]: ", X.shape) print("Shape of y: ", y.shape, y.dtype) break
precision = correct / (correct + incorrect) return precision if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 128 num_classes = 10 fully_supervised = False reload = True # image size 3, 32, 32 # batch size must be an even number # shuffle must be True ds = CIFAR10(r'c:\data\tv', download=True, transform=ToTensor()) len_train = len(ds) // 10 * 9 len_test = len(ds) - len_train train, test = random_split(ds, [len_train, len_test]) train_l = DataLoader(train, batch_size=batch_size, shuffle=True, drop_last=True) test_l = DataLoader(test, batch_size=batch_size, shuffle=True, drop_last=True) if fully_supervised: classifier = nn.Sequential(models.Encoder(), models.Classifier()).to(device)
import torch import torch.nn as nn from torch.utils.data import random_split from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from poutyne import Experiment # Instanciate the MNIST dataset train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor()) test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor()) train_dataset, valid_dataset = random_split( train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42) ) # Select CUDA device if available cuda_device = 0 device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu') # Define the network network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10)) epochs = 5 # Define the Experiment and train experiment = Experiment( './simple_model', # Where to log network, optimizer='sgd', loss_function='cross_entropy', device=device, ) experiment.train_dataset(train_dataset, valid_dataset, epochs=epochs)
def fake_data(size=100, image_size=(1, 4, 4), train=False): return FakeData(size=size, image_size=image_size, transform=ToTensor())
parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--epochs', type=int, default=50) args = parser.parse_args() batch_size = args.batch_size epochs = args.epochs lr = 0.01 weight_decay = 1e-5 print('dataset:', args.dataset) print('epochs:', epochs) print('batch size:', batch_size) transform = transforms.Compose([RandomRotation(20), RandomResizedCrop(size=32, scale=(0.8, 1.1)), ToTensor()]) train_loader = DataLoader(datasets.CIFAR10('../data', train=True, download=True, transform=transform), batch_size=batch_size, shuffle=True) test_loader = DataLoader(datasets.CIFAR10('../data', train=False, transform=transforms.Compose([ToTensor()])), batch_size=batch_size, shuffle=True) model = BasicCNN() model.cuda() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay) #optimizer = optim.Adam(model.parameters(), lr=lr)
# load the image file lena = Image.open(file_name) lena.resize((5,5)) print lena.mode print lena.getpixel((0,0)) lena_ycbcr =lena.convert("YCbCr") print lena_ycbcr.mode print lena_ycbcr.getpixel((0,0)) import torch import torch.nn.functional as F from torch.autograd import Variable from torchvision.transforms import ToTensor rgb = ToTensor()(lena) rgb = rgb.view(1, rgb.size(0), rgb.size(1), rgb.size(2)) rgb = Variable(rgb) rgb2ycbcr = Variable(torch.FloatTensor([[0.299, 0.587, 0.114], [-0.169, -0.331, 0.5], [0.5, -0.419, -0.081]]).resize_(3,3,1,1)) print rgb2ycbcr print "---- rgb -----" print rgb ycbcr = F.conv2d(rgb, weight=rgb2ycbcr) print "first pixel:", rgb.data[0,0,0,0]*255 print lena.getpixel((0,0)) print "---- ycbcr -----" print ycbcr
def image_to_tensor(path: str) -> torch.Tensor: image = Image.open(path) image = ToTensor()(image) return image