Ejemplo n.º 1
0
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
# %matplotlib inline

# Use a white background for matplotlib figures
matplotlib.rcParams['figure.facecolor'] = '#ffffff'

dataset = MNIST(root='data/', download=True, transform=ToTensor())

image, label = dataset[0]
print('image.shape:', image.shape)
plt.imshow(image.permute(1,2), cmap='gray')
print('Label:', label)

val_size = 10000
train_size = len(dataset) - val_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(val_ds)

batch_size = 128
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)
Ejemplo n.º 2
0
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

device = 'cuda'

training_data = datasets.FashionMNIST(root='data',
                                      train=True,
                                      download=True,
                                      transform=ToTensor())

test_data = datasets.FashionMNIST(root='data',
                                  train=False,
                                  download=True,
                                  transform=ToTensor())

batch_size = 100

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()

        self.stack = nn.Sequential(
            nn.Conv2d(1, 24, 3, 2, 1),
            nn.ReLU(),
Ejemplo n.º 3
0
def train_hr_transform(crop_size):
    return Compose([
        RandomCrop(crop_size),
        ToTensor(),
    ])
Ejemplo n.º 4
0
def display_transform():
    return Compose([ToPILImage(), Resize(400), CenterCrop(400), ToTensor()])
Ejemplo n.º 5
0
def train_hr_transform(crop_size):
    return Compose([
        CenterCrop(crop_size),
        # Resize((128,128), interpolation=Image.BICUBIC),
        ToTensor()
    ])
Ejemplo n.º 6
0
                    help='test low resolution image name')
parser.add_argument('--model_path', type=str)
opt = parser.parse_args()

UPSCALE_FACTOR = opt.upscale_factor
TEST_MODE = True if opt.test_mode == 'GPU' else False
IMAGE_NAME = opt.image_name

model = Generator(UPSCALE_FACTOR).eval()
if TEST_MODE:
    model.cuda()
    model.load_state_dict(torch.load(f'{opt.model_path}'))
else:
    model.load_state_dict(
        torch.load(f'{opt.model_path}',
                   map_location=lambda storage, loc: storage))

image = Image.open(IMAGE_NAME)
image = Variable(ToTensor()(image), volatile=True).unsqueeze(0)
if TEST_MODE:
    image = image.cuda()

start = time.clock()
out = model(image)
elapsed = (time.clock() - start)
print('cost' + str(elapsed) + 's')
out_img = ToPILImage()(out[0].data.cpu())
out_path = Path('test_outputs')
out_path.mkdir(exist_ok=True, parents=True)
out_img.save(str(out_path / Path(opt.image_name).name))
    normal_factor = 1.

    # setting to eval mode
    i2d.eval()

    #img = Variable(torch.FloatTensor(1), volatile=True)
    #if args.cuda:
    #    img = img.cuda()

    # https://discuss.pytorch.org/t/out-of-memory-error-during-evaluation-but-training-works-fine/12274/3
    with torch.no_grad():
        with open('D:/DataSets/RGB2Depth/20200602_112100/train_images.txt') as f:    
            for line in f:
                line = line.rstrip('\n')
                print('line: {}'.format(line))
                img_in = ToTensor()( Image.open(line) ).to(device)

                print('evaluating...')
                #img = torch.from_numpy(img_in.transpose(2, 0, 1)).float().to(device)
                img = img_in
                img = torch.unsqueeze(img, 0)
                print('img {}'.format(img.shape))

                z_fake = i2d(img)
                z_fake = F.interpolate(z_fake, size=(img.shape[2],img.shape[3]), mode='bilinear', align_corners=True)  # resize new line to reduce the computation time
                z_fake = torch.squeeze(z_fake, 0)
                z_fake = torch.squeeze(z_fake, 0)
                img = torch.squeeze(img, 0)
                print(z_fake)

                img_color = img.cpu().numpy().transpose(1, 2, 0)
Ejemplo n.º 8
0
    root_folder = vars(args)['root']

model_url = r'Opacity_DN169_BCE_SGD\Opacity_DN169_BCE_SGD.pth'
if vars(args)['model_url'] is not None:
    model_url = vars(args)['model_url']

csv_hm = r"C:\Users\maest\OneDrive\DTU\Semestre 4\Thesis\Code\CheXNet_aproach\Datase_stratification\PADChest_hm_LRUMDP_opacity.csv"
if vars(args)['hm_csv'] is not None:
    csv_hm = vars(args)['hm_csv']

batch_size = 1

radiographic_findings_opacity = ['opacity']
transforms_test = transforms.Compose([
    Resize(512),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
hm_dataset = PadChestDataset(csv_hm,
                             radiographic_findings_opacity,
                             root_folder,
                             transform=transforms_test)
hm_loader = torch.utils.data.DataLoader(hm_dataset, batch_size=1)

unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

for enum, data in enumerate(hm_loader, 0):
    print(enum)
    images, labels = data
    image_hm = images
    images = images.cuda()
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description='Dataloader test')
    parser.add_argument('--gpu', default='0', help='gpu id')
    parser.add_argument('--workers',
                        default=16,
                        type=int,
                        help='num workers for data loading')
    parser.add_argument('--nb_epoch',
                        default=100,
                        type=int,
                        help='training epoch')
    parser.add_argument('--lr', default=1e-4, type=float, help='learning rate')
    parser.add_argument('--power',
                        default=0,
                        type=float,
                        help='lr poly power; 0 indicates step decay by half')
    parser.add_argument('--batch_size', default=8, type=int, help='batch size')
    parser.add_argument('--size', default=256, type=int, help='image size')
    parser.add_argument(
        '--anchor_imsize',
        default=416,
        type=int,
        help='scale used to calculate anchors defined in model cfg file')
    parser.add_argument('--data_root',
                        type=str,
                        default='./ln_data/DMS/',
                        help='path to ReferIt splits data folder')
    parser.add_argument('--split_root',
                        type=str,
                        default='data',
                        help='location of pre-parsed dataset info')
    parser.add_argument('--dataset',
                        default='referit',
                        type=str,
                        help='referit/flickr/unc/unc+/gref')
    parser.add_argument('--time',
                        default=20,
                        type=int,
                        help='maximum time steps (lang length) per batch')
    parser.add_argument('--emb_size',
                        default=512,
                        type=int,
                        help='fusion module embedding dimensions')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument(
        '--pretrain',
        default='',
        type=str,
        metavar='PATH',
        help=
        'pretrain support load state_dict that are not identical, while have no loss saved as resume'
    )
    parser.add_argument('--print_freq',
                        '-p',
                        default=2000,
                        type=int,
                        metavar='N',
                        help='print frequency (default: 1e3)')
    parser.add_argument('--savename',
                        default='default',
                        type=str,
                        help='Name head for saved model')
    parser.add_argument('--seed', default=13, type=int, help='random seed')
    parser.add_argument('--bert_model',
                        default='bert-base-uncased',
                        type=str,
                        help='bert model')
    parser.add_argument('--test',
                        dest='test',
                        default=False,
                        action='store_true',
                        help='test')
    parser.add_argument('--nflim', default=3, type=int, help='nflim')
    parser.add_argument('--mstage',
                        dest='mstage',
                        default=False,
                        action='store_true',
                        help='if mstage')
    parser.add_argument('--mstack',
                        dest='mstack',
                        default=False,
                        action='store_true',
                        help='if mstack')
    parser.add_argument('--w_div',
                        default=0.125,
                        type=float,
                        help='weight of the diverge loss')
    parser.add_argument('--fusion', default='prod', type=str, help='prod/cat')
    parser.add_argument('--tunebert',
                        dest='tunebert',
                        default=False,
                        action='store_true',
                        help='if tunebert')
    parser.add_argument('--large',
                        dest='large',
                        default=False,
                        action='store_true',
                        help='if large mode: fpn16, convlstm out, size 512')

    global args, anchors_full
    args = parser.parse_args()
    if args.large:
        args.gsize = 16
        args.size = 512
    else:
        args.gsize = 8
    print(
        '----------------------------------------------------------------------'
    )
    print(sys.argv[0])
    print(args)
    print(
        '----------------------------------------------------------------------'
    )
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    ## fix seed
    cudnn.benchmark = False
    cudnn.deterministic = True
    random.seed(args.seed)
    np.random.seed(args.seed + 1)
    torch.manual_seed(args.seed + 2)
    torch.cuda.manual_seed_all(args.seed + 3)

    eps = 1e-10
    ## following anchor sizes calculated by kmeans under args.anchor_imsize=416
    ## An typo of 'refeit' in original experiments, thus removing the anchor for referit.
    ## Detailed discussion in "https://github.com/zyang-ur/ReSC/issues/5"
    # if args.dataset=='refeit':
    #     anchors = '30,36,  78,46,  48,86,  149,79,  82,148,  331,93,  156,207,  381,163,  329,285'
    # elif args.dataset=='flickr':
    if args.dataset == 'flickr':
        anchors = '29,26,  55,58,  137,71,  82,121,  124,205,  204,132,  209,263,  369,169,  352,294'
    else:
        anchors = '10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326'
    anchors = [float(x) for x in anchors.split(',')]
    anchors_full = [(anchors[i], anchors[i + 1])
                    for i in range(0, len(anchors), 2)][::-1]

    ## save logs
    if args.savename == 'default':
        args.savename = 'filmconv_nofpn32_%s_batch%d' % (args.dataset,
                                                         args.batch_size)
    if not os.path.exists('./logs'):
        os.mkdir('logs')
    logging.basicConfig(level=logging.INFO,
                        filename="./logs/%s" % args.savename,
                        filemode="a+",
                        format="%(asctime)-15s %(levelname)-8s %(message)s")
    logging.info(str(sys.argv))
    logging.info(str(args))

    input_transform = Compose([
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = ReferDataset(data_root=args.data_root,
                                 split_root=args.split_root,
                                 dataset=args.dataset,
                                 split='train',
                                 imsize=args.size,
                                 transform=input_transform,
                                 max_query_len=args.time,
                                 augment=True)
    val_dataset = ReferDataset(data_root=args.data_root,
                               split_root=args.split_root,
                               dataset=args.dataset,
                               split='val',
                               imsize=args.size,
                               transform=input_transform,
                               max_query_len=args.time)
    ## note certain dataset does not have 'test' set:
    ## 'unc': {'train', 'val', 'trainval', 'testA', 'testB'}
    test_dataset = ReferDataset(data_root=args.data_root,
                                split_root=args.split_root,
                                dataset=args.dataset,
                                testmode=True,
                                split='val',
                                imsize=args.size,
                                transform=input_transform,
                                max_query_len=args.time)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              drop_last=True,
                              num_workers=args.workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            drop_last=True,
                            num_workers=args.workers)
    test_loader = DataLoader(test_dataset,
                             batch_size=1,
                             shuffle=False,
                             pin_memory=True,
                             drop_last=True,
                             num_workers=0)

    ## Model
    model = grounding_model_multihop(NFilm=args.nflim, fusion=args.fusion, intmd=args.mstack, mstage=args.mstage, \
        emb_size=args.emb_size, coordmap=True, convlstm=args.large, \
        bert_model=args.bert_model, dataset=args.dataset, tunebert=args.tunebert)
    model = torch.nn.DataParallel(model).cuda()

    if args.pretrain:
        model = load_pretrain(model, args, logging)
    if args.resume:
        model = load_resume(model, args, logging)

    print('Num of parameters:',
          sum([param.nelement() for param in model.parameters()]))
    logging.info('Num of parameters:%d' %
                 int(sum([param.nelement() for param in model.parameters()])))

    if args.tunebert:
        visu_param = model.module.visumodel.parameters()
        text_param = model.module.textmodel.parameters()
        rest_param = [
            param for param in model.parameters()
            if ((param not in visu_param) and (param not in text_param))
        ]
        visu_param = list(model.module.visumodel.parameters())
        text_param = list(model.module.textmodel.parameters())
        sum_visu = sum([param.nelement() for param in visu_param])
        sum_text = sum([param.nelement() for param in text_param])
        sum_fusion = sum([param.nelement() for param in rest_param])
        print('visu, text, fusion module parameters:', sum_visu, sum_text,
              sum_fusion)
    else:
        visu_param = model.module.visumodel.parameters()
        rest_param = [
            param for param in model.parameters() if param not in visu_param
        ]
        visu_param = list(model.module.visumodel.parameters())
        sum_visu = sum([param.nelement() for param in visu_param])
        sum_text = sum([
            param.nelement() for param in model.module.textmodel.parameters()
        ])
        sum_fusion = sum([param.nelement() for param in rest_param]) - sum_text
        print('visu, text, fusion module parameters:', sum_visu, sum_text,
              sum_fusion)

    ## optimizer; rmsprop default
    if args.tunebert:
        optimizer = torch.optim.RMSprop([{
            'params': rest_param
        }, {
            'params': visu_param,
            'lr': args.lr / 10.
        }, {
            'params': text_param,
            'lr': args.lr / 10.
        }],
                                        lr=args.lr,
                                        weight_decay=0.0005)
    else:
        optimizer = torch.optim.RMSprop([{
            'params': rest_param
        }, {
            'params': visu_param,
            'lr': args.lr / 10.
        }],
                                        lr=args.lr,
                                        weight_decay=0.0005)

    ## training and testing
    best_accu = -float('Inf')
    if args.test:
        _ = test_epoch(test_loader, model)
    else:
        for epoch in range(args.nb_epoch):
            adjust_learning_rate(args, optimizer, epoch)
            train_epoch(train_loader, model, optimizer, epoch)
            accu_new = validate_epoch(val_loader, model)
            ## remember best accu and save checkpoint
            is_best = accu_new > best_accu
            best_accu = max(accu_new, best_accu)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_loss': accu_new,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                args,
                filename=args.savename)
        print('\nBest Accu: %f\n' % best_accu)
        logging.info('\nBest Accu: %f\n' % best_accu)
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(description='Visual Question Answering')

    # Experiment params
    parser.add_argument('--mode',
                        type=str,
                        help='train or test mode',
                        required=True,
                        choices=['train', 'test'])
    parser.add_argument('--expt_dir',
                        type=str,
                        help='root directory to save model & summaries',
                        required=True)
    parser.add_argument('--expt_name',
                        type=str,
                        help='expt_dir/expt_name: organize experiments',
                        required=True)
    parser.add_argument(
        '--run_name',
        type=str,
        help='expt_dir/expt_name/run_name: organize training runs',
        required=True)
    parser.add_argument('--model',
                        type=str,
                        help='VQA model',
                        choices=['baseline', 'attention', 'bert'],
                        required=True)

    # Data params
    parser.add_argument('--train_img',
                        type=str,
                        help='path to training images directory',
                        required=True)
    parser.add_argument('--train_file',
                        type=str,
                        help='training dataset file',
                        required=True)
    parser.add_argument('--val_img',
                        type=str,
                        help='path to validation images directory')
    parser.add_argument('--val_file', type=str, help='validation dataset file')
    parser.add_argument('--num_cls',
                        '-K',
                        type=int_min_two,
                        help='top K answers (labels); min=2',
                        default=1000)

    # Vocab params
    parser.add_argument(
        '--vocab_file',
        type=str,
        help='vocabulary pickle file (gen. by prepare_data.py)')

    # Training params
    parser.add_argument('--batch_size',
                        '-bs',
                        type=int,
                        help='batch size',
                        default=8)
    parser.add_argument('--num_epochs',
                        '-ep',
                        type=int,
                        help='number of epochs',
                        default=50)
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=float,
                        help='initial learning rate',
                        default=1e-4)
    parser.add_argument('--log_interval',
                        type=int,
                        help='interval size for logging training summaries',
                        default=100)
    parser.add_argument('--save_interval',
                        type=int,
                        help='save model after `n` weight update steps',
                        default=3000)
    parser.add_argument('--val_size',
                        type=int,
                        help='validation set size for evaluating accuracy',
                        default=10000)

    # Evaluation params
    parser.add_argument('--K_eval',
                        type=int,
                        help='top-K labels during evaluation/inference',
                        default=1000)

    # Model params
    parser.add_argument(
        '--model_ckpt',
        type=str,
        help='resume training/perform inference; e.g. model_1000.pth')
    parser.add_argument('--vgg_wts_path',
                        type=str,
                        help='VGG-11 (bn) pre-trained weights (.pth) file')
    parser.add_argument('--vgg_train',
                        type=str2bool,
                        help='whether to train the VGG encoder',
                        default='false')
    # parser.add_argument('--model_config', type=str, help='model config file - specifies model architecture')

    # GPU params
    # parser.add_argument('--num_gpus',   type=int,   help='number of GPUs to use for training', default=1)
    parser.add_argument('--gpu_id',
                        type=int,
                        help='cuda:gpu_id (0,1,2,..) if num_gpus = 1',
                        default=0)
    parser.add_argument('--opt_lvl',
                        type=int,
                        help='Automatic-Mixed Precision: opt-level (O_)',
                        default=1,
                        choices=[0, 1, 2, 3])

    # Misc params
    parser.add_argument('--num_workers',
                        type=int,
                        help='number of worker threads for Dataloader',
                        default=1)

    args = parser.parse_args()

    device = torch.device(
        'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu')
    print('Selected Device: {}'.format(device))
    # torch.cuda.get_device_properties(device).total_memory  # in Bytes

    # Train params
    n_epochs = args.num_epochs
    batch_size = args.batch_size
    lr = args.learning_rate

    # Load vocab (.pickle) file
    vocab = load_vocab(args.vocab_file)
    print('Vocabulary loaded from {}'.format(args.vocab_file))

    # Unpack vocab
    word2idx, idx2word, label2idx, idx2label, max_seq_length = [
        v for k, v in vocab.items()
    ]
    vocab_size = len(word2idx)

    # Model Config
    model_config = setup_model_configs(args, vocab_size)

    image_size = model_config['image_size']

    # TODO: Multi-GPU PyTorch Implementation
    # if args.num_gpus > 1 and torch.cuda.device_count() > 1:
    #     print("Using {} GPUs!".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model, device_ids=[0, 1])
    # model.to(device)

    # Train
    if args.mode == 'train':
        # Setup train log directory
        log_dir = os.path.join(args.expt_dir, args.expt_name, args.run_name)

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        print('Training Log Directory: {}\n'.format(log_dir))

        # TensorBoard summaries setup  -->  /expt_dir/expt_name/run_name/
        writer = SummaryWriter(log_dir)

        # Train log file
        log_file = setup_logs_file(parser, log_dir)

        # Dataset & Dataloader
        train_dataset = VQADataset(args.train_file,
                                   args.train_img,
                                   word2idx,
                                   label2idx,
                                   max_seq_length,
                                   transform=Compose([
                                       Resize(image_size),
                                       ToTensor(),
                                       Normalize((0.485, 0.456, 0.406),
                                                 (0.229, 0.224, 0.225))
                                   ]))

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size,
            shuffle=True,
            drop_last=True,
            num_workers=args.num_workers)

        print('Question Vocabulary Size: {} \n\n'.format(vocab_size))

        print('Train Data Size: {}'.format(train_dataset.__len__()))

        # Plot data (image, question, answer) for sanity check
        # plot_data(train_loader, idx2word, idx2label, num_plots=10)
        # sys.exit()

        if args.val_file:
            # Use the same word-index dicts as that obtained for the training set
            val_dataset = VQADataset(args.val_file,
                                     args.val_img,
                                     word2idx,
                                     label2idx,
                                     max_seq_length,
                                     transform=Compose([
                                         Resize(image_size),
                                         ToTensor(),
                                         Normalize((0.485, 0.456, 0.406),
                                                   (0.229, 0.224, 0.225))
                                     ]))

            val_loader = torch.utils.data.DataLoader(
                val_dataset,
                batch_size,
                shuffle=True,
                drop_last=True,
                num_workers=args.num_workers)

            log_msg = 'Validation Data Size: {}\n'.format(
                val_dataset.__len__())
            log_msg += 'Validation Accuracy is computed using {} samples. See --val_size\n'.format(
                args.val_size)

            print_and_log(log_msg, log_file)

        # Num of classes = K + 1 (for UNKNOWN)
        num_classes = args.num_cls + 1

        # Setup model params
        question_encoder_params = model_config['question_params']
        image_encoder_params = model_config['image_params']

        # Define model & load to device
        VQANet = model_config['model']

        model = VQANet(question_encoder_params,
                       image_encoder_params,
                       K=num_classes)
        model.to(device)

        # Load model checkpoint file (if specified) from `log_dir`
        if args.model_ckpt:
            model_ckpt_path = os.path.join(log_dir, args.model_ckpt)
            checkpoint = torch.load(model_ckpt_path)

            model.load_state_dict(checkpoint)

            log_msg = 'Model successfully loaded from {}'.format(
                model_ckpt_path) + '\nResuming Training...'

            print_and_log(log_msg, log_file)

        # Loss & Optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr)

        # TODO: StepLR Scheduler
        # scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O{}".format(args.opt_lvl))

        steps_per_epoch = len(train_loader)
        start_time = time()
        curr_step = 0
        # TODO: Save model with best validation accuracy
        best_val_acc = 0.0

        for epoch in range(n_epochs):
            for batch_data in train_loader:
                # Load batch data
                image = batch_data['image']
                question = batch_data['question']
                ques_len = batch_data['ques_len']
                label = batch_data['label']

                # Sort batch based on sequence length
                image, question, label, ques_len = sort_batch(
                    image, question, label, ques_len)

                # Load data onto the available device
                image = image.to(device)  # [B, C, H, W]
                question = question.to(device)  # [B, L]
                ques_len = ques_len.to(device)  # [B]
                label = label.to(device)  # [B]

                # Forward Pass
                label_predict = model(image, question, ques_len)

                # Compute Loss
                loss = criterion(label_predict, label)

                # Backward Pass
                optimizer.zero_grad()
                #                 loss.backward()

                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()

                optimizer.step()

                # Print Results - Loss value & Validation Accuracy
                if (curr_step + 1) % args.log_interval == 0 or curr_step == 1:
                    # Validation set accuracy
                    if args.val_file:
                        validation_metrics = compute_validation_metrics(
                            model, val_loader, device, size=args.val_size)

                        # Reset the mode to training
                        model.train()

                        log_msg = 'Validation Accuracy: {:.2f} %  || Validation Loss: {:.4f}'.format(
                            validation_metrics['accuracy'],
                            validation_metrics['loss'])

                        print_and_log(log_msg, log_file)

                        # If current model has the best accuracy on the validation set & >= training accuracy,
                        # save model to disk

                        # Add summaries to TensorBoard
                        writer.add_scalar('Val/Accuracy',
                                          validation_metrics['accuracy'],
                                          curr_step)
                        writer.add_scalar('Val/Loss',
                                          validation_metrics['loss'],
                                          curr_step)

                    # Add summaries to TensorBoard
                    writer.add_scalar('Train/Loss', loss.item(), curr_step)

                    # Compute elapsed & remaining time for training to complete
                    time_elapsed = (time() - start_time) / 3600
                    # total time = time_per_step * steps_per_epoch * total_epochs
                    total_time = (time_elapsed /
                                  curr_step) * steps_per_epoch * n_epochs
                    time_left = total_time - time_elapsed

                    log_msg = 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f} | time elapsed: {:.2f}h | time left: {:.2f}h'.format(
                        epoch + 1, n_epochs, curr_step + 1, steps_per_epoch,
                        loss.item(), time_elapsed, time_left)

                    print_and_log(log_msg, log_file)

                # Save the model
                if (curr_step + 1) % args.save_interval == 0:
                    print('Saving the model at the {} step to directory:{}'.
                          format(curr_step + 1, log_dir))
                    save_path = os.path.join(
                        log_dir, 'model_' + str(curr_step + 1) + '.pth')
                    torch.save(model.state_dict(), save_path)

                curr_step += 1

            # Validation set accuracy on the entire set
            if args.val_file:
                # Total validation set size
                total_validation_size = val_dataset.__len__()
                validation_metrics = compute_validation_metrics(
                    model, val_loader, device, total_validation_size)

                log_msg = '\nAfter {} epoch:\n'.format(epoch + 1)
                log_msg += 'Validation Accuracy: {:.2f} %  || Validation Loss: {:.4f}\n'.format(
                    validation_metrics['accuracy'], validation_metrics['loss'])

                print_and_log(log_msg, log_file)

                # Reset the mode to training
                model.train()

        writer.close()
        log_file.close()

    # TODO: Test/Inference
    elif args.mode == 'test':
        raise NotImplementedError('TODO: test mode')
Ejemplo n.º 11
0
                                 downsample=opt.downsample)
elif opt.model_type == 'rbf' or opt.model_type == 'nerf':
    model = modules.SingleBVPNet(type='relu',
                                 mode=opt.model_type,
                                 out_features=img_dataset.img_channels,
                                 sidelength=image_resolution,
                                 downsample=opt.downsample)
else:
    raise NotImplementedError
model.cuda()

root_path = os.path.join(opt.logging_root, opt.experiment_name)

if opt.mask_path:
    mask = Image.open(opt.mask_path)
    mask = ToTensor()(mask)
    mask = mask.float().cuda()
    percentage = torch.sum(mask).cpu().numpy() / np.prod(mask.shape)
    print("mask sparsity %f" % (percentage))
else:
    mask = torch.rand(image_resolution) < opt.sparsity
    mask = mask.float().cuda()

# Define the loss
if opt.prior is None:
    loss_fn = partial(loss_functions.image_mse, mask.view(-1, 1))
elif opt.prior == 'TV':
    loss_fn = partial(loss_functions.image_mse_TV_prior, mask.view(-1, 1),
                      opt.k1, model)
elif opt.prior == 'FH':
    loss_fn = partial(loss_functions.image_mse_FH_prior, mask.view(-1, 1),
def main():

    # ---------- LOAD DATASET AND FILE SELECTION ----------------------------------------------------------------------
    start = time.time()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.empty_cache()
    print(device)

    # file_name_extension = 'Rotation_centered_im1'
    file_name_extension = 'Rotation_Translation_im1'
    # file_name_extension = 'Translation_im3'  # choose the corresponding database to use

    cubes_file = 'Npydatabase/wrist_{}.npy'.format(file_name_extension)
    silhouettes_file = 'Npydatabase/sils_{}.npy'.format(file_name_extension)
    parameters_file = 'Npydatabase/params_{}.npy'.format(file_name_extension)

    wrist = np.load(cubes_file)
    sils = np.load(silhouettes_file)
    params = np.load(parameters_file)

    train_im = wrist  # 90% training
    train_sil = sils
    train_param = params

    normalize = Normalize(mean=[0.5], std=[0.5])
    transforms = Compose([ToTensor(), normalize])
    train_dataset = CubeDataset(train_im, train_sil, train_param, transforms)


    train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1)

    # # check to iterate inside the test dataloader
    # for image, sil, param in train_dataloader:
    #
    #     # print(image[2])
    #     print(image.size(), param.size()) #torch.Size([batch, 3, 512, 512]) torch.Size([batch, 6])
    #     im =0
    #     print(param[im])  # parameter in form tensor([2.5508, 0.0000, 0.0000, 0.0000, 0.0000, 5.0000])
    #
    #     image2show = image[im]  # indexing random  one image
    #     print(image2show.size()) #torch.Size([3, 512, 512])
    #     plt.imshow((image2show * 0.5 + 0.5).numpy().transpose(1, 2, 0))
    #     plt.show()
    #     break  # break here just to show 1 batch of data

    count = 0
    losses = []
    a = []
    b = []
    c = []
    tx = []
    ty = []
    tz = []
    #ground value to be plotted on the graph as line
    alpha_GT = np.array( m.degrees(params[0,0]))
    beta_GT =  np.array(m.degrees(params[0,1]))
    gamma_GT =  np.array(m.degrees(params[0,2]))#angle in degrer
    tx_GT =  np.array(params[0,3])
    ty_GT = np.array(params[0,4])
    tz_GT = np.array(params[0,5])

    iterations = 100

    # ---------- MODEL CREATION  ----------------------------------------------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument('-io', '--filename_obj', type=str, default=os.path.join(data_dir, 'wrist.obj'))
    parser.add_argument('-or', '--filename_output', type=str, default=os.path.join(result_dir, '{}_regression_animation_6params.gif'.format(file_name_extension)))
    parser.add_argument('-mr', '--make_reference_image', type=int, default=0)
    parser.add_argument('-g', '--gpu', type=int, default=0)
    args = parser.parse_args()

    # resnet50 = models.resnet50(pretrained=True)

    model = Myresnet50(filename_obj=args.filename_obj)
    # model = Model(args.filename_obj, args.filename_ref)

    model.to(device)

    model.train(True)
    bool_first = True
    Lr_start = 0.001
    decreaseat = 40
    lr = Lr_start
    loop = tqdm.tqdm(range(iterations))
    for i in loop:

        for image, silhouette, parameter in train_dataloader:
            image = image.to(device)
            imgGT = image
            parameter = parameter.to(device)
            print(parameter)
            silhouette = silhouette.to(device)
            params = model(image)
            print(params)
            model.t = params[0,3:6]
            model.R = R2Rmat(params[0,0:3]) #angle from resnet are in radian
            bool_first = True
            # first_
            # print(model.t)
            # print(model.R)

             # regression between computed and ground truth
            image = model.renderer(model.vertices, model.faces, R=model.R, t= model.t, mode='silhouettes')
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss = nn.MSELoss()(params, parameter).to(device)
            if (i % decreaseat == 0 and i > 2):
                lr = lr / 10
                print('update lr, is now {}'.format(lr))

            print('loss is {}'.format(loss))



            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.append(loss.detach().cpu().numpy())
            # print(((model.K).detach().cpu().numpy()))
            cp_x = ((model.t).detach().cpu().numpy())[0]
            cp_y = ((model.t).detach().cpu().numpy())[1]
            cp_z = ((model.t).detach().cpu().numpy())[2]

            cp_rotMat = (model.R) #cp_rotMat = (model.R).detach().cpu().numpy()
            r = Rot.from_dcm(cp_rotMat.detach().cpu().numpy())
            r_euler = r.as_euler('xyz', degrees=True)

            a.append(r_euler[0, 0]) #        a.append(abs(r_euler[0,0] ))
            b.append(r_euler[0, 1])
            c.append(r_euler[0, 2])

            cp_a = r_euler[0, 0]
            cp_b = r_euler[0, 1]
            cp_c = r_euler[0, 2]


            tx.append(cp_x)
            ty.append(cp_y)
            tz.append(cp_z) #z axis value

            images, _, _ = model.renderer(model.vertices, model.faces, torch.tanh(model.textures), R=model.R, t=model.t)

            img = images.detach().cpu().numpy()[0].transpose(1, 2, 0)

            if (i == iterations - 1):
                imgGT = imgGT.squeeze()  # float32 from 0-1
                imgGT = imgGT.detach().cpu()
                imgGT = (imgGT * 0.5 + 0.5).numpy().transpose(1, 2, 0)
                # imgGT = (imgGT * 255).astype(np.uint8)  # cast from float32 255.0 to 255 uint8

                f = plt.subplot(1, 2, 1)
                plt.imshow(imgGT)
                f.set_title('Ground truth \n alpha {:.3f}° tx {}\n'
                            'beta {:.3f}° ty {}\n '
                            'gamma {:.3f}° tz {}'.format(alpha_GT, tx_GT, beta_GT, ty_GT, gamma_GT, tz_GT))
                plt.xticks([0, 512])
                plt.yticks([])
                f = plt.subplot(1, 2, 2)
                plt.imshow(img)
                f.set_title('Regression \n alpha {:.3f}°  tx {:.3f}\n'
                            'beta {:.3f}° ty {:.3f}\n'
                            'gamma {:.3f}° tz {:.3f}'.format(cp_a, cp_x, cp_b, cp_y, cp_c, cp_z))
                plt.xticks([0, 512])
                plt.yticks([])

                plt.savefig('results/3_6params_regression/Final_regression_6params_{}iterations_{}.png'.format(iterations, file_name_extension),
                            bbox_inches='tight', pad_inches=0.05)

            imsave('/tmp/_tmp_%04d.png' % i, img)
            loop.set_description('Optimizing (loss %.4f)' % loss.data)
            count = count + 1

    end = time.time()
    exectime = round((end - start), 2)  # format in minute
    print('time elapsed is: {} sec'.format(exectime))


#----------PLOT SECTION ------------------------------------------------------------------------

    make_gif(args.filename_output)
    fig, (p1, p2, p3) = plt.subplots(3, figsize=(15,10)) #largeur hauteur
    fig.suptitle("Regression for 1 image, {} epochs in {} sec, rotation and translation, 6 parameters \n lr={} and decrease each {} iterations".format(iterations,exectime, Lr_start, decreaseat), fontsize=14)

    p1.plot(np.arange(count), losses, label="Global Loss")
    p1.set( ylabel='BCE Loss')
    p1.set_yscale('log')
    p1.set_ylim([0, 1])
    p1.set(xlabel='Iterations')
    # Place a legend to the right of this smaller subplot.
    p1.legend()

    p2.plot(np.arange(count), tx, label="x values", color = 'g' )
    p2.axhline(y=tx_GT, color = 'g', linestyle= '--' )
    p2.plot(np.arange(count), ty, label="y values", color = 'y')
    p2.axhline(y=ty_GT, color = 'y', linestyle= '--' )
    p2.plot(np.arange(count), tz, label="z values", color = 'b')
    p2.axhline(y=tz_GT, color = 'b', linestyle= '--' )

    p2.set(ylabel='Translation value')
    p2.set_ylim([-5, 10])
    p2.set(xlabel='Iterations')
    p2.legend()

    p3.plot(np.arange(count), a, label="alpha values", color = 'g')
    p3.axhline(y=alpha_GT, color = 'g', linestyle= '--' )
    p3.plot(np.arange(count), b, label="beta values", color = 'y')
    p3.axhline(y=beta_GT, color = 'y', linestyle= '--')
    p3.plot(np.arange(count), c, label="gamma values", color = 'b')
    p3.axhline(y=gamma_GT, color = 'b', linestyle= '--' )

    p3.set(xlabel='iterations', ylabel='Rotation value')
    p3.set_ylim([-180, 180])
    p3.legend()

    fig.savefig('results/3_6params_regression/regression_1image_6params_{}.pdf'.format(file_name_extension), bbox_inches = 'tight', pad_inches = 0.05)
    fig.savefig('results/3_6params_regression/regression_1image_6params_{}.png'.format(file_name_extension), bbox_inches = 'tight', pad_inches = 0.05)
    matplotlib2tikz.save("results/3_6params_regression/regression_1image_6params_{}.tex".format(file_name_extension),figureheight='5.5cm', figurewidth='15cm')
    plt.show()
Ejemplo n.º 13
0
    def __init__(self,
                 root=expanduser("~") + "/.avalanche/data/core50/",
                 train=True,
                 transform=ToTensor(),
                 target_transform=None,
                 loader=pil_loader,
                 download=True,
                 object_level=True):
        """

        :param root: root for the datasets data.
        :param train: train or test split.
        :param transform: eventual transformations to be applied.
        :param target_transform: eventual transformation to be applied to the
            targets.
        :param loader: data loader method from disk.
        :param download: boolean to automatically download data. Default to
            True.
        :param object_level: if the classification is objects based or
            category based: 50 or 10 way classification problem. Default to True
            (50-way object classification problem)
        """

        self.train = train  # training set or test set
        self.transform = transform
        self.target_transform = target_transform
        self.root = root
        self.loader = loader
        self.object_level = object_level
        self.log = logging.getLogger("avalanche")

        # any scenario and run is good here since we want just to load the
        # train images and targets with no particular order
        scen = 'ni'
        run = 0
        nbatch = 8

        if download:
            self.core_data = CORE50_DATA(data_folder=root)

        self.log.info("Loading paths...")
        with open(os.path.join(root, 'paths.pkl'), 'rb') as f:
            self.train_test_paths = pkl.load(f)

        self.log.info("Loading labels...")
        with open(os.path.join(root, 'labels.pkl'), 'rb') as f:
            self.all_targets = pkl.load(f)
            self.train_test_targets = []
            for i in range(nbatch + 1):
                self.train_test_targets += self.all_targets[scen][run][i]

        self.log.info("Loading LUP...")
        with open(os.path.join(root, 'LUP.pkl'), 'rb') as f:
            self.LUP = pkl.load(f)

        self.log.info("Loading labels names...")
        with open(os.path.join(root, 'labels2names.pkl'), 'rb') as f:
            self.labels2names = pkl.load(f)

        self.idx_list = []
        if train:
            for i in range(nbatch + 1):
                self.idx_list += self.LUP[scen][run][i]
        else:
            self.idx_list = self.LUP[scen][run][-1]

        self.paths = []
        self.targets = []

        for idx in self.idx_list:
            self.paths.append(self.train_test_paths[idx])
            div = 1
            if not self.object_level:
                div = 5
            self.targets.append(self.train_test_targets[idx] // div)
def resize(x, target_shape):
    x = ToPILImage()(x.cpu().to(torch.float32))
    x = Resize(target_shape)(x)
    x = ToTensor()(x)
    return x.cuda()
Ejemplo n.º 15
0
import torch
from PIL.Image import Image
from torch import Tensor
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor, ToPILImage, Compose, Normalize, \
    RandomRotation
import numpy as np

from avalanche.benchmarks import NCScenario, nc_benchmark
from avalanche.benchmarks.classic.classic_benchmarks_utils import \
    check_vision_benchmark
from avalanche.benchmarks.datasets import default_dataset_location
from avalanche.benchmarks.utils import AvalancheDataset

_default_mnist_train_transform = Compose(
    [ToTensor(), Normalize((0.1307, ), (0.3081, ))])

_default_mnist_eval_transform = Compose(
    [ToTensor(), Normalize((0.1307, ), (0.3081, ))])


class PixelsPermutation(object):
    """
    Apply a fixed permutation to the pixels of the given image.

    Works with both Tensors and PIL images. Returns an object of the same type
    of the input element.
    """
    def __init__(self, index_permutation: Sequence[int]):
        self.permutation = index_permutation
        self._to_tensor = ToTensor()
Ejemplo n.º 16
0
        else:
            torch.nn.utils.clip_grad_norm_(params, 5.0)


if __name__ == "__main__":
    opt = parse_options(
        "CIFAR10 EBM using RSM in flex.",
        path="/g/korbel/mjendrusch/runs/experimental/cifar10-rsm-exp-14-VP",
        device="cuda:0",
        batch_size=128,
        max_epochs=1000,
        report_interval=1000,
        checkpoint_interval=50000,
    )

    cifar10 = CIFAR10("examples/", download=False, transform=ToTensor())
    data = CIFAR10Dataset(cifar10)
    data = DataDistribution(data, batch_size=opt.batch_size, device=opt.device)

    energy = AdaptedUNetEnergy().to(opt.device)

    training = relaxed_score_matching_training(
        energy,
        data,
        optimizer=torch.optim.Adam,
        optimizer_kwargs=dict(lr=2e-4),
        level_weight=scale_level,
        level_distribution=VPNormalNoise(lambda t: 1e-3 + t * (1.0 - 1e-3)),
        noise_distribution=VPNormalNoise(
            lambda t: 1e-2 * (1e-3 + t * (1.0 - 1e-3))
        ),  # TruncatedNormalNoise(lambda t: 0.01 * torch.ones_like(t))
Ejemplo n.º 17
0
 def __init__(self, index_permutation: Sequence[int]):
     self.permutation = index_permutation
     self._to_tensor = ToTensor()
     self._to_image = ToPILImage()
Ejemplo n.º 18
0
def train(args):
    writer = SummaryWriter(log_dir=args.logdir)

    # Datasets
    dataset_tr = CUBDataset(
        root=args.datapath,
        train=True,
        transforms=Compose([
            Resize(256),
            RandomCrop((224, 224), pad_if_needed=True),
            RandomHorizontalFlip(),
            ToTensor()
        ])
    )
    data_loader_tr = DataLoader(
        dataset_tr,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.number_workers)

    dataset_val = CUBDataset(
        root=args.datapath,
        train=False,
        transforms=Compose([
            CenterCrop(224),
            ToTensor()
        ])
    )
    data_loader_val = DataLoader(
        dataset_val,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.number_workers)

    # Model
    model = BirdNet(num_classes=20).to(args.device)

    # Optimizer
    optimizer = Adam(
        params=model.classifier.parameters(),   # Optimize only the classifier layer
        lr=args.learning_rate,
        weight_decay=args.weight_decay)

    # Meters
    meter_loss = AverageMeter()
    meter_accuracy = AverageMeter()
    train_accuracy, train_loss, val_accuracy, val_loss = 0,0,0,0

    epoch_bar = tqdm.trange(args.number_epochs, desc='Epoch')
    for epoch in epoch_bar:
        epoch_start_time = time()

        # Training
        model.train()
        torch.set_grad_enabled(True)
        batch_bar = tqdm.tqdm(data_loader_tr, desc='Batch')
        meter_loss.reset()
        meter_accuracy.reset()
        for batch in batch_bar:
            input_batch = batch[0].to(args.device)
            target = batch[1].to(args.device)
            logits = model(input_batch)

            number_samples = target.shape[0]
            predictions = logits.argmax(dim=1)
            accuracy = (predictions == target).float().sum()/number_samples
            loss = F.cross_entropy(logits, target)
            meter_accuracy.update(accuracy, number_samples)
            meter_loss.update(loss, number_samples)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # batch_bar.set_postfix({'loss': loss.item()})

        train_accuracy, train_loss = meter_accuracy.get_average(), meter_loss.get_average()
        epoch_bar.set_postfix({"loss": train_loss,
                               "accuracy": train_accuracy})
        writer.add_scalar("/train/loss", train_loss, epoch)
        writer.add_scalar("/train/accuracy", train_accuracy, epoch)

        # Validation
        model.eval()
        torch.set_grad_enabled(False)
        batch_bar = tqdm.tqdm(data_loader_val, desc='Batch')
        meter_loss.reset()
        meter_accuracy.reset()
        for batch in batch_bar:
            input_batch = batch[0].to(args.device)
            target = batch[1].to(args.device)
            logits = model(input_batch)

            number_samples = target.shape[0]
            predictions = logits.argmax(dim=1)
            accuracy = (predictions == target).float().sum()/number_samples
            loss = F.cross_entropy(logits, target)
            meter_accuracy.update(accuracy, number_samples)
            meter_loss.update(loss, number_samples)

        val_accuracy, val_loss = meter_accuracy.get_average(), meter_loss.get_average()
        epoch_time = time()-epoch_start_time

        epoch_bar.set_postfix({"loss": val_loss,
                               "accuracy": val_accuracy})
        writer.add_scalar("/validation/loss", val_loss, epoch)
        writer.add_scalar("/validation/accuracy", val_accuracy, epoch)
        writer.add_scalar("time_per_epoch", epoch_time, epoch)

    torch.save(model.classifier.state_dict(), str(args.logdir / "final_model.pt"))
    return {"train": {"accuracy": train_accuracy, "loss": train_loss},
            "validation": {"accuracy": val_accuracy, "loss": val_loss}}
Ejemplo n.º 19
0
def target_transform(crop_size):
    return Compose([
        CenterCrop(crop_size),
        ToTensor(),
    ])
Ejemplo n.º 20
0
    def __init__(self,
                 mode,
                 roidb_file=VG_SGG_FN,
                 dict_file=VG_SGG_DICT_FN,
                 image_file=IM_DATA_FN,
                 filter_empty_rels=True,
                 num_im=-1,
                 num_val_im=5000,
                 filter_duplicate_rels=True,
                 filter_non_overlap=True,
                 use_proposals=False):
        """
        Torch dataset for VisualGenome
        :param mode: Must be train, test, or val
        :param roidb_file:  HDF5 containing the GT boxes, classes, and relationships
        :param dict_file: JSON Contains mapping of classes/relationships to words
        :param image_file: HDF5 containing image filenames
        :param filter_empty_rels: True if we filter out images without relationships between
                             boxes. One might want to set this to false if training a detector.
        :param filter_duplicate_rels: Whenever we see a duplicate relationship we'll sample instead
        :param num_im: Number of images in the entire dataset. -1 for all images.
        :param num_val_im: Number of images in the validation set (must be less than num_im
               unless num_im is -1.)
        :param proposal_file: If None, we don't provide proposals. Otherwise file for where we get RPN
            proposals
        """
        if mode not in ('test', 'train', 'val'):
            raise ValueError(
                "Mode must be in test, train, or val. Supplied {}".format(
                    mode))
        self.mode = mode

        # Initialize
        self.roidb_file = roidb_file
        self.dict_file = dict_file
        self.image_file = image_file
        self.filter_non_overlap = filter_non_overlap
        self.filter_duplicate_rels = filter_duplicate_rels and self.mode == 'train'

        self.split_mask, self.gt_boxes, self.gt_classes, self.relationships = load_graphs_one_shot(
            self.roidb_file,
            self.mode,
            num_im,
            num_val_im=num_val_im,
            filter_empty_rels=filter_empty_rels,
            filter_non_overlap=self.filter_non_overlap and self.is_train,
        )

        self.filenames = load_image_filenames(image_file)
        self.filenames = [
            self.filenames[i] for i in np.where(self.split_mask)[0]
        ]

        self.ind_to_classes, self.ind_to_predicates = load_info(dict_file)

        if use_proposals:
            print("Loading proposals", flush=True)
            p_h5 = h5py.File(PROPOSAL_FN, 'r')
            rpn_rois = p_h5['rpn_rois']
            rpn_scores = p_h5['rpn_scores']
            rpn_im_to_roi_idx = np.array(
                p_h5['im_to_roi_idx'][self.split_mask])
            rpn_num_rois = np.array(p_h5['num_rois'][self.split_mask])

            self.rpn_rois = []
            for i in range(len(self.filenames)):
                rpn_i = np.column_stack((
                    rpn_scores[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                               rpn_num_rois[i]],
                    rpn_rois[rpn_im_to_roi_idx[i]:rpn_im_to_roi_idx[i] +
                             rpn_num_rois[i]],
                ))
                self.rpn_rois.append(rpn_i)
        else:
            self.rpn_rois = None

        # You could add data augmentation here. But we didn't.
        # tform = []
        # if self.is_train:
        #     tform.append(RandomOrder([
        #         Grayscale(),
        #         Brightness(),
        #         Contrast(),
        #         Sharpness(),
        #         Hue(),
        #     ]))

        tform = [
            SquarePad(),
            Resize(IM_SCALE),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
        self.transform_pipeline = Compose(tform)
Ejemplo n.º 21
0
def load_data():
    train_dataset = FashionMNIST(root='./cache', download=True, train=True, transform=ToTensor())
    eval_dataset = FashionMNIST(root='./cache', download=False, train=False, transform=ToTensor())

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    eval_loader = DataLoader(dataset=eval_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    return train_loader, eval_loader
from hbconfig import Config
from torch.autograd import Variable
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchsummary import summary
import utils
from AutoAugment.autoaugment import ImageNetPolicy
from basic_utils import saving_config
from logger import Logger
from miniimagenet_loader import read_dataset_test, _sample_mini_dataset, _mini_batches, _split_train_test, _mini_batches_with_augmentation, AutoEncoder
from one_shot_aug.module import PretrainedClassifier, MiniImageNetModel
from utils import mkdir_p

meta_step_size = 1.  # stepsize of outer optimization, i.e., meta-optimization
meta_step_size_final = 0.
tensor = ToTensor()


def augments_dataset(batch, k=5):
    images = []
    # labels=[]
    for _ in range(k):
        for img_, label in batch:
            policy = ImageNetPolicy()
            transformed = policy(img_)
            # f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,6))
            # ax1.imshow(img_)
            # ax2.imshow(transformed[0])
            # plt.show()
            tensor = ToTensor()
            if isinstance(transformed, (list, )):
Ejemplo n.º 23
0
if __name__ == '__main__':

    P_TRAIN = 0.8  # proportion of examples to use for training
    BATCH_SIZE = 32
    NUM_WORKERS = 6
    NUM_EPOCHS = 50
    learning_rate = 0.001
    DECAY_RATE = 5  # number of epochs after which to decay the learning rate
    LR_DECAY = 0.5  # amount to decrease the learning rate every 'DECAY_RATE' epochs
    CHECKPOINT_RATE = 5  # number of epochs after which to checkpoint the model
    IMAGE_DIR = '/home/mchobanyan/data/emotion/images/imagenet/'
    MODEL_DIR = '/home/mchobanyan/data/emotion/models/emotion_detect/imagenet/'

    dataset = ColorAndGrayImages(image_dir=IMAGE_DIR,
                                 colored_transform=Compose([
                                     ToTensor(),
                                     Normalize(IMAGENET_MEANS, IMAGENET_STDVS)
                                 ]),
                                 gray_transform=ToTensor())
    print(f'Number of images: {len(dataset)}')

    train_size = int(len(dataset) * P_TRAIN)
    val_size = len(dataset) - train_size
    train_data, val_data = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_data,
                              batch_size=BATCH_SIZE,
                              num_workers=NUM_WORKERS,
                              shuffle=True)
    val_loader = DataLoader(val_data,
                            batch_size=BATCH_SIZE,
                            num_workers=NUM_WORKERS)
Ejemplo n.º 24
0
def train_lr_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC),
        ToTensor()
    ])
Ejemplo n.º 25
0
#!/usr/bin/env python3
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

batch_size = 64

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break
Ejemplo n.º 26
0
    precision = correct / (correct + incorrect)
    return precision


if __name__ == '__main__':

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    batch_size = 128
    num_classes = 10
    fully_supervised = False
    reload = True

    # image size 3, 32, 32
    # batch size must be an even number
    # shuffle must be True
    ds = CIFAR10(r'c:\data\tv', download=True, transform=ToTensor())
    len_train = len(ds) // 10 * 9
    len_test = len(ds) - len_train
    train, test = random_split(ds, [len_train, len_test])
    train_l = DataLoader(train,
                         batch_size=batch_size,
                         shuffle=True,
                         drop_last=True)
    test_l = DataLoader(test,
                        batch_size=batch_size,
                        shuffle=True,
                        drop_last=True)

    if fully_supervised:
        classifier = nn.Sequential(models.Encoder(),
                                   models.Classifier()).to(device)
Ejemplo n.º 27
0
import torch
import torch.nn as nn
from torch.utils.data import random_split
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from poutyne import Experiment

# Instanciate the MNIST dataset
train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor())
test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor())
train_dataset, valid_dataset = random_split(
    train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)
)

# Select CUDA device if available
cuda_device = 0
device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu')

# Define the network
network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10))
epochs = 5

# Define the Experiment and train
experiment = Experiment(
    './simple_model',  # Where to log
    network,
    optimizer='sgd',
    loss_function='cross_entropy',
    device=device,
)
experiment.train_dataset(train_dataset, valid_dataset, epochs=epochs)
Ejemplo n.º 28
0
def fake_data(size=100, image_size=(1, 4, 4), train=False):
    return FakeData(size=size, image_size=image_size, transform=ToTensor())
parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('--epochs', type=int, default=50)
args = parser.parse_args()


batch_size = args.batch_size
epochs = args.epochs
lr = 0.01
weight_decay = 1e-5

print('dataset:', args.dataset)
print('epochs:', epochs)
print('batch size:', batch_size)


transform = transforms.Compose([RandomRotation(20), RandomResizedCrop(size=32, scale=(0.8, 1.1)), ToTensor()])
train_loader = DataLoader(datasets.CIFAR10('../data', train=True, download=True,
						transform=transform),
						batch_size=batch_size, shuffle=True)

test_loader = DataLoader(datasets.CIFAR10('../data', train=False,
						transform=transforms.Compose([ToTensor()])),
						batch_size=batch_size, shuffle=True)


model = BasicCNN()
model.cuda()


optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)	
#optimizer = optim.Adam(model.parameters(), lr=lr)
Ejemplo n.º 30
0
# load the image file
lena = Image.open(file_name)
lena.resize((5,5))
print lena.mode
print lena.getpixel((0,0))

lena_ycbcr =lena.convert("YCbCr")
print lena_ycbcr.mode
print lena_ycbcr.getpixel((0,0))


import torch
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision.transforms import ToTensor
rgb = ToTensor()(lena)
rgb = rgb.view(1, rgb.size(0), rgb.size(1), rgb.size(2))
rgb = Variable(rgb)
rgb2ycbcr = Variable(torch.FloatTensor([[0.299, 0.587, 0.114], [-0.169, -0.331, 0.5], [0.5, -0.419, -0.081]]).resize_(3,3,1,1))
print rgb2ycbcr


print "---- rgb -----"
print rgb
ycbcr = F.conv2d(rgb, weight=rgb2ycbcr)

print "first pixel:", rgb.data[0,0,0,0]*255
print lena.getpixel((0,0))

print "---- ycbcr -----"
print ycbcr
Ejemplo n.º 31
0
def image_to_tensor(path: str) -> torch.Tensor:
    image = Image.open(path)
    image = ToTensor()(image)
    return image