def main(args):
    if args.init_checkpoint:
        override_config(args)
    elif args.data_path is None:
        raise ValueError('one of init_checkpoint/data_path must be choosed.')

    if args.save_path is None:
        raise ValueError('Where do you want to save your trained model?')
    
    if args.save_path and not os.path.exists(args.save_path):
        os.makedirs(args.save_path)
    
    # Write logs to checkpoint and console
    set_logger(args)

    with open(args.data_path) as fin:
        entity2id = bidict()
        relation2id = bidict()
        train_triples = []
        for line in fin:
            _tmp = [x.strip() for x in re.split("[,\t]", line) if x.strip()][:3]
            if len(_tmp) < 3:
                continue
            e1, relation, e2 = tuple(_tmp)
            if not e1 in entity2id:
                entity2id[e1] = len(entity2id)
            if not e2 in entity2id:
                entity2id[e2] = len(entity2id)
            if not relation in relation2id:
                relation2id[relation] = len(relation2id)
            train_triples.append((entity2id[e1], relation2id[relation], entity2id[e2]))

    nentity = len(entity2id)
    nrelation = len(relation2id)
    
    args.nentity = nentity
    args.nrelation = nrelation
    
    logging.info('Model: %s' % args.model)
    logging.info('Data Path: %s' % args.data_path)
    logging.info('#entity: %d' % nentity)
    logging.info('#relation: %d' % nrelation)
    
    logging.info('#train: %d' % len(train_triples))
    
    #All true triples
    all_true_triples = train_triples
    
    kge_model = KGEModel(
        model_name=args.model,
        nentity=nentity,
        nrelation=nrelation,
        hidden_dim=args.hidden_dim,
        gamma=args.gamma,
        double_entity_embedding=args.double_entity_embedding,
        double_relation_embedding=args.double_relation_embedding
    )
    
    logging.info('Model Parameter Configuration:')
    for name, param in kge_model.named_parameters():
        logging.info('Parameter %s: %s, require_grad = %s' % (name, str(param.size()), str(param.requires_grad)))

    if args.cuda:
        kge_model = kge_model.cuda()
    
    # Set training dataloader iterator
    train_dataloader_head = DataLoader(
        TrainDataset(train_triples, nentity, nrelation, args.negative_sample_size, 'head-batch'), 
        batch_size=args.batch_size,
        shuffle=True, 
        num_workers=max(1, args.cpu_num//2),
        collate_fn=TrainDataset.collate_fn
    )
    
    train_dataloader_tail = DataLoader(
        TrainDataset(train_triples, nentity, nrelation, args.negative_sample_size, 'tail-batch'), 
        batch_size=args.batch_size,
        shuffle=True, 
        num_workers=max(1, args.cpu_num//2),
        collate_fn=TrainDataset.collate_fn
    )
    
    train_iterator = BidirectionalOneShotIterator(train_dataloader_head, train_dataloader_tail)
    
    # Set training configuration
    current_learning_rate = args.learning_rate
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, kge_model.parameters()), 
        lr=current_learning_rate
    )
    if args.warm_up_steps:
        warm_up_steps = args.warm_up_steps
    else:
            warm_up_steps = args.max_steps // 2

    if args.init_checkpoint:
        # Restore model from checkpoint directory
        logging.info('Loading checkpoint %s...' % args.init_checkpoint)
        checkpoint = torch.load(os.path.join(args.init_checkpoint, 'checkpoint'))
        init_step = checkpoint['step']
        kge_model.load_state_dict(checkpoint['model_state_dict'])
        
        current_learning_rate = checkpoint['current_learning_rate']
        warm_up_steps = checkpoint['warm_up_steps']
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        logging.info('Ramdomly Initializing %s Model...' % args.model)
        init_step = 0
    
    step = init_step
    
    logging.info('Start Training...')
    logging.info('init_step = %d' % init_step)
    logging.info('learning_rate = %d' % current_learning_rate)
    logging.info('batch_size = %d' % args.batch_size)
    logging.info('negative_adversarial_sampling = %d' % args.negative_adversarial_sampling)
    logging.info('hidden_dim = %d' % args.hidden_dim)
    logging.info('gamma = %f' % args.gamma)
    logging.info('negative_adversarial_sampling = %s' % str(args.negative_adversarial_sampling))
    if args.negative_adversarial_sampling:
        logging.info('adversarial_temperature = %f' % args.adversarial_temperature)
    
    # Set valid dataloader as it would be evaluated during training
    
    training_logs = []
    
    #Training Loop
    for step in range(init_step, args.max_steps):
        
        log = kge_model.train_step(kge_model, optimizer, train_iterator, args)
        
        training_logs.append(log)
        
        if step >= warm_up_steps:
            current_learning_rate = current_learning_rate / 10
            logging.info('Change learning_rate to %f at step %d' % (current_learning_rate, step))
            optimizer = torch.optim.Adam(
                filter(lambda p: p.requires_grad, kge_model.parameters()), 
                lr=current_learning_rate
            )
            warm_up_steps = warm_up_steps * 3
        
        if step % args.save_checkpoint_steps == 0:
            save_variable_list = {
                'step': step, 
                'current_learning_rate': current_learning_rate,
                'warm_up_steps': warm_up_steps
            }
            save_model(kge_model, optimizer, save_variable_list, args, entity2id, relation2id)
            
        if step % args.log_steps == 0:
            metrics = {}
            for metric in training_logs[0].keys():
                metrics[metric] = sum([log[metric] for log in training_logs])/len(training_logs)
            log_metrics('Training average', step, metrics)
            training_logs = []
            
    save_variable_list = {
        'step': step, 
        'current_learning_rate': current_learning_rate,
        'warm_up_steps': warm_up_steps
    }
    save_model(kge_model, optimizer, save_variable_list, args, entity2id, relation2id)
        
    if args.evaluate_train:
        logging.info('Evaluating on Training Dataset...')
        metrics = kge_model.test_step(kge_model, train_triples, all_true_triples, args)
        log_metrics('Test', step, metrics)
def main():
    global best_acc1, start_epoch
    model = get_model(config.get_string('arch'))

    model.cuda()

    learning_rate = scale_lr(
        config.get_float('optimizer.lr'),
        config.get_int('dataloader.batch_size')
    )

    optimizer = optim.SGD(
        model.parameters(),
        lr=learning_rate,
        momentum=config.get_float('optimizer.momentum'),
        weight_decay=config.get_float('optimizer.weight_decay'),
        nesterov=config.get_bool('optimizer.nesterov')
    )
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer,
        config.get_list('scheduler.milestones')
    )

    if tpp.distributed:
        model = DistributedDataParallel(model, device_ids=[tpp.local_rank])

    normalize = T.Normalize(
        config.get_list('dataset.mean'),
        config.get_list('dataset.std')
    )
    train_transform = T.Compose([
        # UT.RandomCrop(32, padding=4),
        # UT.RandomHorizontalFlip(),
        T.RandomCrop(32, padding=4),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        normalize
    ])

    val_transform = T.Compose([
        T.ToTensor(),
        normalize
    ])

    train_set = CIFAR10(
        config.get_string('dataset.root'), train=True, transform=train_transform, download=True
    )
    val_set = CIFAR10(
        config.get_string('dataset.root'), train=False, transform=val_transform, download=False
    )

    train_sampler = None
    val_sampler = None
    if tpp.distributed:
        train_sampler = DistributedSampler(train_set)
        val_sampler = DistributedSampler(val_set)

    train_loader = DataLoader(
        train_set,
        batch_size=config.get_int('dataloader.batch_size'),
        pin_memory=True,
        shuffle=(train_sampler is None),
        num_workers=config.get_int('dataloader.num_workers'),
        sampler=train_sampler
    )
    val_loader = DataLoader(
        val_set,
        batch_size=config.get_int('dataloader.batch_size'),
        pin_memory=True,
        num_workers=config.get_int('dataloader.num_workers'),
        sampler=val_sampler
    )

    for epoch in range(start_epoch, config.get_int('strategy.num_epochs')):
        # for epoch in range(start_epoch, 1):

        if tpp.distributed:
            train_sampler.set_epoch(epoch)

        train(model, train_loader, criterion, optimizer, epoch)
        acc1 = validate(model, val_loader, criterion, epoch)
        scheduler.step()

        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint({
            'epoch': epoch + 1,
            'arch': config.get_string('arch'),
            'state_dict': model.module.state_dict() if tpp.distributed else model.state_dict(),
            'best_acc1': best_acc1,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
        }, is_best=is_best, folder=experiment_path)
def train(load_model, cuda_visible):
    device = init_device_seed(1234, cuda_visible)

    dataset = CartoonGANDataset('./data/cartoon_dataset', ['photo', 'cartoon', 'cartoon_smoothed'], False)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

    os.makedirs('./model', exist_ok=True)

    generator = CartoonGANGenerator().to(device)
    discriminator = CartoonGANDiscriminator().to(device)
    feature_extractor = VGG19().to(device)

    epoch = 0

    if load_model:
        checkpoint = torch.load('./model/cartoongan', map_location=device)
        generator.load_state_dict(checkpoint['generator_state_dict'])
        discriminator.load_state_dict(checkpoint['discriminator_state_dict'])
        epoch = checkpoint['epoch']

    optimizer_gen = optim.Adam(generator.parameters(), lr=2e-4, betas=(0.5, 0.999))
    optimizer_disc = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.999))
    
    criterion_gen = nn.L1Loss()
    criterion_disc = nn.BCEWithLogitsLoss()

    while epoch <= 200:
        epoch += 1

        generator.train()
        discriminator.train()

        pbar = tqdm(range(len(dataloader)))
        pbar.set_description('Epoch {}'.format(epoch))
        total_loss_gen = .0
        total_loss_con = .0
        total_loss_disc = .0

        for idx, (img_photo, [img_cartoon, img_cartoon_blur]) in enumerate(dataloader):
            img_photo = img_photo.to(device, dtype=torch.float32)
            img_cartoon = img_cartoon.to(device, dtype=torch.float32)
            img_cartoon_blur = img_cartoon_blur.to(device, dtype=torch.float32)

            # Initializaiton phase
            if epoch <= 10:
                optimizer_gen.zero_grad()

                gen_photo = generator(img_photo)
                x_features = feature_extractor((img_photo + 1) / 2).detach()
                Gx_features = feature_extractor((gen_photo + 1) / 2)

                loss_con = criterion_gen(Gx_features, x_features) * 10
                loss_con.backward()
                optimizer_gen.step()

                total_loss_con += loss_con.item()
                pbar.set_postfix_str('CLoss: ' + str(np.around(total_loss_con / (idx + 1), 4)))
                pbar.update()
                continue

            # Discriminator loss and update
            optimizer_disc.zero_grad()

            gen_photo = generator(img_photo).detach()
            label_gen = discriminator(gen_photo)
            label_cartoon = discriminator(img_cartoon)
            label_cartoon_blur = discriminator(img_cartoon_blur)
            
            loss_generated_disc = criterion_disc(label_gen, torch.zeros_like(label_gen))
            loss_cartoon_disc = criterion_disc(label_cartoon, torch.ones_like(label_cartoon))
            loss_blur_disc = criterion_disc(label_cartoon_blur, torch.zeros_like(label_cartoon_blur))
            loss_disc = loss_generated_disc + loss_cartoon_disc + loss_blur_disc

            loss_disc.backward()
            optimizer_disc.step()

            # Generator loss and update
            optimizer_gen.zero_grad()
            gen_photo = generator(img_photo)

            x_features = feature_extractor((img_photo + 1) / 2).detach()
            Gx_features = feature_extractor((gen_photo + 1) / 2)

            loss_con = criterion_gen(Gx_features, x_features) * 10
            label_gen = discriminator(gen_photo)
            loss_generated_gen = criterion_disc(label_gen, torch.ones_like(label_gen))
            loss_gen = loss_generated_gen + loss_con

            loss_gen.backward()
            optimizer_gen.step()
            optimizer_gen.zero_grad()

            # Loss display
            total_loss_gen += loss_generated_gen.item()
            total_loss_con += loss_con.item()
            total_loss_disc += loss_disc.item()
            pbar.set_postfix_str('G_GAN: {}, G_Content: {}, D: {}'.format(
                np.around(total_loss_gen / (idx + 1), 4),
                np.around(total_loss_con / (idx + 1), 4),
                np.around(total_loss_disc / (idx + 1), 4)))
            pbar.update()

        # Save checkpoint per epoch
        torch.save({
            'generator_state_dict': generator.state_dict(),
            'discriminator_state_dict': discriminator.state_dict(),
            'epoch': epoch,
        }, './model/cartoongan')
Beispiel #4
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {'batch_size': opt.batch_size,
                       'shuffle': True,
                       'drop_last': True,
                       'collate_fn': collater,
                       'num_workers': opt.num_workers}

    val_params = {'batch_size': opt.batch_size,
                  'shuffle': False,
                  'drop_last': True,
                  'collate_fn': collater,
                  'num_workers': opt.num_workers}

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    if params.project_name == 'coco' or params.project_name == 'shape':
        training_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set,
            transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef])]))

        val_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set,
            transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef])]))
    else:
        training_set = KITTIDataset(data_path=params.train_data_path, class_list = params.obj_list,
            transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef])]))

        val_set = KITTIDataset(data_path=params.val_data_path, class_list = params.obj_list,
            transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef])]))

    training_generator = DataLoader(training_set, **training_params)
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef,
        ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.')

        print(f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}')
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:
        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'.format(
                            step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(),
                            reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss}, step)
                    writer.add_scalars('Classfication_loss', {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'.format(
                        epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')

                model.train()
                           
                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print('[Info] Stop training at epoch {}. The lowest loss achieved is {}'.format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Beispiel #5
0
def train_model(model,
                inputs,
                outputs,
                partition,
                loss_f,
                eval_f,
                opt,
                epochs,
                args,
                model_name='model'):
    model.cuda()
    model_path = args.model_path + '/' + model_name

    train_inputs = inputs[partition[0]]
    train_outputs = outputs[partition[0]]
    eval_inputs = inputs[partition[1]]
    eval_outputs = outputs[partition[1]]

    train_data = TensorDataset(train_inputs, train_outputs)

    # Resample train set to a normal distribution over targets
    #if isinstance(outputs, torch.FloatTensor):
    #    resample_probs = build_resample_probs(train_outputs)
    #    resample_indices = torch.multinomial(resample_probs, train_outputs.size(0), replacement=True)
    trainval_inputs = train_inputs  #copy.deepcopy(train_inputs)
    trainval_outputs = train_outputs  #copy.deepcopy(train_outputs)
    #if isinstance(outputs, torch.FloatTensor):
    #    train_inputs = train_inputs[resample_indices]
    #    train_outputs = train_outputs[resample_indices]

    # Pre-calculate the mean of the eval data and from it the error for R^2
    #try:
    #    data_mean = torch.mean(eval_outputs).detach()
    #    data_error = eval_f(eval_outputs.detach(), torch.ones(eval_outputs.size(0))*data_mean).detach() / eval_outputs.size(0)
    #except:
    #data_mean = torch.mode(eval_outputs, dim=0).values.view(-1).detach()
    data_error = torch.FloatTensor([1]).to(eval_inputs)

    train_losses = 0
    losses = torch.zeros(args.epochs, 3)

    try:
        # Run train/eval loop over specified number of epochs
        for i_epoch in range(args.epochs):
            # Increase batch size according to specified schedule
            args.batch_size = int(args.batch_size + args.batch_size_annealing)
            if i_epoch < (args.epochs - epochs):
                continue

            # Prep Data Loader
            train_loader = DataLoader(train_data,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      pin_memory=True)

            # Set model to training mode
            model.train()

            i_shuffle = shuffle_data(train_inputs, train_outputs)

            # Batch training data
            for i_batch, batch in enumerate(train_loader):
                batch_inputs, batch_outputs = batch
                batch_inputs = get_sequences(batch_inputs, rand=True)
                batch_outputs = batch_inputs
                '''
                # Build batches
                batch_indices = slice(i_batch*args.batch_size, (i_batch+1)*args.batch_size)
                batch_inputs = train_inputs[i_shuffle[batch_indices]].cuda()
                batch_outputs = train_outputs[i_shuffle[batch_indices]].cuda()
                '''

                # If the last batch is size 0, just skip it
                if batch_outputs.size(0) == 0:
                    continue

                # Perform gradient update on batch
                batch_losses = step(model, batch_inputs, batch_outputs, loss_f,
                                    opt, args.n_layers)
                train_losses += torch.sum(batch_losses).detach().cpu().item()
            train_losses = train_losses / train_inputs.size(0)

            # Set model to evaluation mode (turn off dropout and stuff)
            model.eval()

            n_batches_eval = min((eval_inputs.size(0) // args.batch_size), 10)
            sum_loss = 0

            # Batch the eval data
            #eval_inputs, eval_outputs = shuffle_data(eval_inputs, eval_outputs)
            i_shuffle = shuffle_data(eval_inputs, eval_outputs)
            for i_batch in range(n_batches_eval):
                batch_indices = slice(i_batch * args.batch_size,
                                      (i_batch + 1) * args.batch_size)
                batch_inputs = eval_inputs[i_shuffle[batch_indices]]
                batch_inputs = get_sequences(batch_inputs, rand=True)
                batch_outputs = batch_inputs

                # Same reasoning as training: sometimes encounter 0-size batches
                if batch_outputs.size(0) == 0:
                    continue

                # Don't need to track operations/gradients for evaluation
                with torch.no_grad():
                    # Build a sum of evaluation losses to average over later
                    predictions, _ = model(batch_inputs, args.n_layers)
                    predictions = predictions.permute(0, 2,
                                                      1)[batch_outputs < 4]
                    weighting = normpdf(batch_outputs.shape)[batch_outputs < 4]
                    sum_loss += torch.sum(
                        eval_f(predictions.squeeze(),
                               batch_outputs[batch_outputs < 4].squeeze()) *
                        weighting).item()

            n_batches_trainval = min(
                (trainval_inputs.size(0) // args.batch_size), 10)
            sum_loss2 = 0
            # Batch the eval data
            #trainval_inputs, trainval_outputs = shuffle_data(trainval_inputs, trainval_outputs)
            i_shuffle = shuffle_data(trainval_inputs, trainval_outputs)
            for i_batch in range(n_batches_trainval):
                batch_indices = slice(i_batch * args.batch_size,
                                      (i_batch + 1) * args.batch_size)
                batch_inputs = trainval_inputs[i_shuffle[batch_indices]]
                batch_inputs = get_sequences(batch_inputs, rand=True)
                batch_outputs = batch_inputs

                # Same reasoning as training: sometimes encounter 0-size batches
                if batch_outputs.size(0) == 0:
                    continue

                # Don't need to track operations/gradients for evaluation
                with torch.no_grad():
                    # Build a sum of evaluation losses to average over later
                    predictions, _ = model(batch_inputs, args.n_layers)
                    predictions = predictions.permute(0, 2,
                                                      1)[batch_outputs < 4]
                    weighting = normpdf(batch_outputs.shape)[batch_outputs < 4]
                    sum_loss2 += torch.sum(
                        eval_f(predictions.squeeze(),
                               batch_outputs[batch_outputs < 4].squeeze()) *
                        weighting).item()

            # Calculate and print mean train and eval loss over the epoch
            mean_loss = sum_loss / (args.batch_size * n_batches_eval + 1
                                    )  #eval_inputs.size(0)#
            mean_loss2 = sum_loss2 / (args.batch_size * n_batches_trainval + 1
                                      )  #trainval_inputs.size(0)#
            losses[i_epoch, 0] = train_losses
            losses[i_epoch, 1] = mean_loss
            losses[i_epoch, 2] = mean_loss2
            print(
                'Epoch %d Mean Train / TrainVal / Eval Loss and R^2 Value: %.3f / %.3f / %.3f / %.3f '
                % (i_epoch + 1, losses[i_epoch, 0], losses[i_epoch, 2],
                   losses[i_epoch, 1], 1 - (mean_loss / data_error).item()),
                end='\r')

            if (i_epoch + 1) % args.save_rate == 0:
                save_model(model, opt, model_path + '_%d.ptm' % (i_epoch + 1))

        print('')  # to keep only the final epoch losses from each fold
        return model.cpu(), losses.cpu()
    except (Exception, KeyboardInterrupt) as e:
        save_model(model, opt, model_path + '_%d.ptm' % i_epoch)
        #raise e
        return e, losses.cpu()
Beispiel #6
0

train_dataset = datasets.ImageFolder('/home/mshah1/workhorse3/caltech-101/',
                                     transform=image_transforms['train'],
                                     is_valid_file=is_train_file)

test_dataset = datasets.ImageFolder('/home/mshah1/workhorse3/caltech-101/',
                                    transform=image_transforms['valid'],
                                    is_valid_file=is_test_file)
test_len = len(test_dataset) // 2
val_len = len(test_dataset) - test_len
test_dataset, val_dataset = random_split(test_dataset, [test_len, val_len])

# Dataloader iterators, make sure to shuffle
dataloaders = {
    'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
    'val': DataLoader(val_dataset, batch_size=batch_size, shuffle=False),
    'test': DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
}

model = get_pretrained_model('vgg16', 102)

# Find total parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(p.numel() for p in model.parameters()
                             if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

if train_on_gpu:
    model = model.to('cuda')
Beispiel #7
0
def dataloader(args):
    """Return the dataloader for selected dataset.
    Now have:
    - MNIST
    - FashionMNIST
    - CIFAR10
    - CIFAR100
    - SVHN
    - CelebA (https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZ
      zg?resourcekey=0-rJlzl934LzC-Xp28GeIBzQ)
    - STL10
    - LSUN
    - Fake data

    Parameters
    ----------
    batch_size : int
        Minibatch size.
    dataset_name : str
        Name of the selected dataset.

    Returns
    -------
    tr_set:
        Dataloader for training set.
    te_set:
        Dataloader for test set.

    """

    # resize images or not
    if args.img_resize:
        transform3c = transforms.Compose([
            transforms.Resize(args.img_size),
            transforms.CenterCrop(args.img_size),  # if H != W
            transforms.ToTensor(),
            transforms.Normalize((.5, .5, .5), (.5, .5, .5))])
        transform1c = transforms.Compose([
            transforms.Resize(args.img_size),
            transforms.CenterCrop(args.img_size),  # if H != W
            transforms.ToTensor(), transforms.Normalize((.5), (.5))])
    else:
        transform3c = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((.5, .5, .5),
                                                              (.5, .5, .5))])
        transform1c = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((.5), (.5))])
    # create dataloaders
    datapath, dataset_name, batch_size = 'data', args.dataset, args.batch_size
    if dataset_name == 'mnist':  # handwritten digits, (1, 28, 28)
        tr_set = thv.datasets.MNIST(datapath, train=True, download=True,
                                    transform=transform1c)
        te_set = thv.datasets.MNIST(datapath, train=False, download=True,
                                    transform=transform1c)
    elif dataset_name == 'fashion-mnist':  # fashion (Zalando), (1, 28, 28)
        tr_set = thv.datasets.FashionMNIST(datapath, train=True, download=True,
                                           transform=transform1c)
        te_set = thv.datasets.FashionMNIST(datapath, train=False,
                                           download=True,
                                           transform=transform1c)
    elif dataset_name == 'cifar10':  # 10-class image recognition, (3, 32 32)
        tr_set = thv.datasets.CIFAR10(datapath, train=True, download=True,
                                      transform=transform3c)
        te_set = thv.datasets.CIFAR10(datapath, train=False, download=True,
                                      transform=transform3c)
    elif dataset_name == 'cifar100':  # 100-class image recognition, (3, 32 32)
        tr_set = thv.datasets.CIFAR100(datapath, train=True, download=True,
                                       transform=transform3c)
        te_set = thv.datasets.CIFAR100(datapath, train=False, download=True,
                                       transform=transform3c)
    elif dataset_name == 'svhn':  # digit recognition, (3, 32, 32)
        tr_set = thv.datasets.SVHN(os.path.join(datapath, 'SVHN'),
                                   split='train', download=True,
                                   transform=transform3c)
        te_set = thv.datasets.SVHN(os.path.join(datapath, 'SVHN'),
                                   split='test', download=True,
                                   transform=transform3c)
    elif dataset_name == 'celeba':  # celebrity face, (3, 218, 178)
        celeba = dset.ImageFolder(root='data/celeba', transform=transform3c)
        tr_len = int(len(celeba) * 0.8)
        te_len = len(celeba) - tr_len
        tr_set, te_set = torch.utils.data.random_split(celeba,
                                                       [tr_len, te_len])
    elif dataset_name == 'stl10':  # 10-class image recognition, (3, 96, 96)
        tr_set = thv.datasets.STL10(datapath, split='train', download=True,
                                    transform=transform3c)
        te_set = thv.datasets.STL10(datapath, split='test', download=True,
                                    transform=transform3c)
    elif dataset_name == 'lsun':
        tr_classes = [c + '_train' for c in args.lsun_classes.split(',')]
        te_classes = [c + '_test' for c in args.lsun_classes.split(',')]
        tr_set = dset.LSUN(root='data/lsun', classes=tr_classes)
        te_set = dset.LSUN(root='data/lsun', classes=te_classes)
    elif dataset_name == 'fake':
        tr_set = dset.FakeData(
                               image_size=(3, args.img_size, args.img_size),
                               transform=transforms.ToTensor())
        te_set = dset.FakeData(size=1024,
                               image_size=(3, args.img_size, args.img_size),
                               transform=transforms.ToTensor())
    tr_set = DataLoader(tr_set, batch_size=batch_size, shuffle=True,
                        drop_last=True)
    te_set = DataLoader(te_set, batch_size=batch_size, shuffle=True,
                        drop_last=True)
    args.img_channels = 1 if dataset_name in ['mnist', 'fashion-mnist'] else 3
    if not args.img_resize:  # use original size
        if dataset_name in ['mnist', 'fashion-mnist']:
            args.img_size = 28
        elif dataset_name in ['cifar10', 'cifar100', 'svhn']:
            args.img_size = 32
        elif dataset_name == 'celeba':
            args.img_size = [218, 178]
        elif dataset_name == 'stl10':
            args.img_size = 96
    return tr_set, te_set
    batch_size = 64
    print('Number of episodes to run to cover the set once: {}'.format(
        sortofclevr.get_epoch_size(batch_size)))

    # get a sample
    sample = sortofclevr[0]
    print(repr(sample))
    print('__getitem__ works.')

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data import DataLoader

    dataloader = DataLoader(dataset=sortofclevr,
                            collate_fn=sortofclevr.collate_fn,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=0)

    # try to see if there is a speed up when generating batches w/ multiple workers
    import time

    s = time.time()
    for i, batch in enumerate(dataloader):
        print('Batch # {} - {}'.format(i, type(batch)))

    print('Number of workers: {}'.format(dataloader.num_workers))
    print(
        'time taken to exhaust the dataset for a batch size of {}: {}s'.format(
            batch_size,
            time.time() - s))
Beispiel #9
0
datasets = ['signal/ecal/m1/xyz_64','signal/ecal/m5/xyz_64','signal/ecal/m10/xyz_64',
    'signal/ecal/m50/xyz_64','signal/ecal/m100/xyz_64','signal/ecal/m500/xyz_64', 'signal/ecal/m1000/xyz_64', 'background/ecal/data1/xyz_64']

#original list of hits (not reconstructed images)
#m_ori_file_tab = ['data/ecal/signal/ecal_sign_10_test.npy','data/ecal/signal/ecal_sign_10.npy']
mass_tab = [1,5,10,50,100,500,1000,0]

net_name = 'model_multimass_1'

all_transforms = transforms.Compose([transforms.ToTensor()])

for j in range(7): #evaluate trained network on data
    
    with h5py.File('dataset.hdf5', 'a') as f:
        n_test = len(f[datasets[j]][:,0,0,0])
    
    nam = title[j] #description to ass as title to figures

    all_transforms = transforms.Compose([transforms.ToTensor()]) #extra transf. to apply to images

    event_test = my_Dataset_test_single(n_test,datasets[j],mass_tab[j],transform=all_transforms) #create test dataset

    test_loader = DataLoader(dataset=event_test, batch_size=batch_size, shuffle=False) #create iterator

    outs,labels,im_tab,m_tab = test_any_model(model,test_loader,nam,batch_size,dim,depth=3) #evaluate model

    # append group with new dataset with results
    with h5py.File('dataset.hdf5', 'a') as f:
        #append file with array of images
        new_data = f.create_dataset(datasets[j]+'/'+net_name,data=outs)
        new_data.attrs['date'] = time.time()
Beispiel #10
0
        idx_to_word = {v:k for k,v in word_to_idx.items()}
        return word_to_idx, idx_to_word

    def __len__(self):
        return self.embeddings.shape[0]

    def __getitem__(self,i):
        word = self.idx_to_word[i]
        embedding = self.embeddings[i].astype(np.float32)
        to_return = {
            'embedding': embedding,
            #'glove': embedding[:self.const.glove_dim],
            #'visual': embedding[self.const.glove_dim:],
            'word': word,
        }
        return to_return


if __name__=='__main__':
    concat_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_embeddings_recon_loss_trained_on_google/' + \
        'concat_glove_and_visual')
    data_const = ConcatEmbedDatasetConstants(concat_dir)
    dataset = ConcatEmbedDataset(data_const)
    dataloader = DataLoader(dataset,batch_size=100)
    for data in dataloader:
        import pdb; pdb.set_trace()

def get_train_val_loader(opt):
    def worker_init_fn(worker_id):
        random.seed(worker_id + opt.random_seed)
        np.random.seed(worker_id + opt.random_seed)

    adapt = opt.adapt_init_MNMT or opt.adapt_prop_MNMT
    train_dataset = BT_MNMTDataset(
        data_path=opt.data_path,
        words_limit=opt.eval_words_limit,
        src_lang=opt.src_lang,
        tgt_lang=opt.tgt_lang,
        mode="train",
        scale=opt.d_scale,
        bpe=opt.bpe,
        adapt=adapt,
    )

    if opt.d_scale == "small" or adapt:
        train_loader = DataLoader(
            train_dataset,
            batch_size=opt.MNMT_batch_size,
            shuffle=True,
            num_workers=opt.workers,
            collate_fn=train_dataset.train_collate_fn,
            worker_init_fn=worker_init_fn,
        )
        valid_dataset = BT_MNMTDataset(
            data_path=opt.data_path,
            words_limit=opt.eval_words_limit,
            src_lang=opt.src_lang,
            tgt_lang=opt.tgt_lang,
            mode="valid",
            scale=opt.d_scale,
            bpe=opt.bpe,
            adapt=adapt,
        )
        valid_loader = DataLoader(
            valid_dataset,
            batch_size=opt.MNMT_batch_size,
            shuffle=False,
            num_workers=opt.workers,
            collate_fn=valid_dataset.val_eval_collate_fn,
            worker_init_fn=worker_init_fn,
        )
    else:
        train_batch_sampler = MyBatchSampler(
            sorted_insts=train_dataset.MNMT_tgt_insts,
            batch_size=opt.MNMT_batch_size,
            shuffle=True,
            drop_last=False,
        )
        train_loader = DataLoader(
            train_dataset,
            batch_sampler=train_batch_sampler,
            num_workers=opt.workers,
            collate_fn=train_dataset.train_collate_fn,
            worker_init_fn=worker_init_fn,
        )
        valid_loader = None

    return train_loader, valid_loader
def entangled_loss(targets, receiver_output_1, receiver_output_2):
    acc_1 = (receiver_output_1.argmax(dim=1) == targets[:, 0]).detach().float()
    acc_2 = (receiver_output_2.argmax(dim=1) == targets[:, 1]).detach().float()
    loss_1 = F.cross_entropy(receiver_output_1, targets[:, 0], reduction="none")
    loss_2 = F.cross_entropy(receiver_output_2, targets[:, 1], reduction="none")
    acc = (acc_1 * acc_2).mean(dim=0)
    loss = loss_1 + loss_2
    return loss, {f'accuracy': acc.item(),
                  f'first_accuracy': acc_1.mean(dim=0).item(),
                  f'second_accuracy': acc_2.mean(dim=0).item()}


if __name__ == "__main__":
    full, _, _ = prepare_datasets(5, 2)
    loader = DataLoader(full, batch_size=32, shuffle=True)
    model = Vision()
    optimizer = torch.optim.Adam(model.parameters())
    for epoch in range(100):
        acc = 0
        for i, (input, target) in enumerate(tqdm(loader)):
            output_1, output_2 = model(input)
            loss, logs = entangled_loss(target, output_1, output_2)
            acc += logs['accuracy']
            optimizer.zero_grad()
            loss.mean().backward()
            optimizer.step()
        print(acc/i)
        if (acc/i) > 0.99:
            break
    torch.save(model.state_dict(), 'vision_model.pth')
Beispiel #13
0
    num_hops=args.num_hops, 
    percent=args.train_percent, 
    split='train', 
    use_coalesce=use_coalesce, 
    node_label=args.node_label, 
    ratio_per_hop=args.ratio_per_hop, 
    max_nodes_per_hop=args.max_nodes_per_hop, 
    directed=directed, 
) 
if False:  # visualize some graphs
    import networkx as nx
    from torch_geometric.utils import to_networkx
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
    for g in loader:
        f = plt.figure(figsize=(20, 20))
        limits = plt.axis('off')
        g = g.to(device)
        node_size = 100
        with_labels = True
        G = to_networkx(g, node_attrs=['z'])
        labels = {i: G.nodes[i]['z'] for i in range(len(G))}
        nx.draw(G, node_size=node_size, arrows=True, with_labels=with_labels,
                labels=labels)
        f.savefig('tmp_vis.png')
        pdb.set_trace()

dataset_class = 'SEALDynamicDataset' if args.dynamic_val else 'SEALDataset'
val_dataset = eval(dataset_class)(
        if n < NN-NN2:
            partition['train'].remove(f'{n}')
        else:
            partition['validation'].remove(f'{n}')
            
    
training_set = MyDataSet(partition['train'], labels)
validation_set = MyDataSet(partition['validation'], labels)

bs = 10

params = {'batch_size': bs,
          'shuffle': True,
          'num_workers': 0}

training_loader = DataLoader(training_set, **params)
validation_loader = DataLoader(validation_set, **params)
 
##### Optimization

model = BackwardNN().to(device)

optimizer = torch.optim.RMSprop(model.parameters(),lr=1e-5, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False)

max_epoch = 30
          
loss_plot = torch.zeros(max_epoch)
loss_val_plot = torch.zeros(max_epoch)

for epoch in range(max_epoch):
    loss_plot[epoch], loss_val_plot[epoch] = train(model, device, training_loader, optimizer, epoch)
# create pytorch compatible dataset that has API for automated loaders
trainset = TensorDataset(
    torch.Tensor(x_train.tolist()).view(-1, 1, 28, 28),
    torch.Tensor(y_train.tolist()).long())

valset = TensorDataset(
    torch.Tensor(x_val.tolist()).view(-1, 1, 28, 28),
    torch.Tensor(y_val.tolist()).long())

testset = TensorDataset(
    torch.Tensor(x_test.tolist()).view(-1, 1, 28, 28),
    torch.Tensor(y_test.tolist()).long())

# create pytorch mini-batch loader DataLoader for the dataset
trainloader = DataLoader(trainset, batch_size=250, shuffle=True)

valloader = DataLoader(valset, batch_size=250, shuffle=True)

# for test set, we want to maintain the sequence of the data
testsampler = SequentialSampler(testset)
testloader = DataLoader(testset,
                        batch_size=250,
                        shuffle=False,
                        sampler=testsampler)


# define and initialize a multilayer-perceptron, a criterion, and an optimizer
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
Beispiel #16
0
            total_loss_test += loss.data
        accuracy = float(correct) / len(data_tar.dataset)
        res = 'Test: total loss: {:.6f}, correct: [{}/{}], testing accuracy: {:.4f}'.format(
            total_loss_test, correct, len(data_tar.dataset), accuracy
        )
        res1 = '{:} {:.6f} {:.4f}'.format(e,total_loss_test,accuracy)
    tqdm.write(res)
    RESULT_TEST.append([e, total_loss_test, accuracy])
    log_test.write(res1 + '\n')


if __name__ == '__main__':
    #rootdir = '../../../data/office_caltech_10/'
    torch.manual_seed(1)
    i = 0
    data_src = DataLoader(dataset = MyTrainData_src(i),batch_size=BATCH_SIZE[0],shuffle=True, drop_last= True)
    data_tar = DataLoader(dataset = MyTrainData_tar(i),batch_size=BATCH_SIZE[1],shuffle=True, drop_last= True)
    '''
    data_src = data_loader.load_data(
        root_dir=rootdir, domain='amazon', batch_size=BATCH_SIZE[0])
    data_tar = data_loader.load_test(
        root_dir=rootdir, domain='webcam', batch_size=BATCH_SIZE[1])
    '''
    model = DaNN.DaNN(n_input=2048, n_hidden=256, n_class=65)
    model = model.to(DEVICE)
    optimizer = optim.SGD(
        model.parameters(),
        lr=LEARNING_RATE,
        momentum=MOMEMTUN,
        weight_decay=L2_WEIGHT
    )
Beispiel #17
0
model = Classifier().cuda()
loss = nn.CrossEntropyLoss(
)  # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
optimizer = torch.optim.SGD(model.parameters(), lr=0.001,
                            momentum=0.9)  # optimizer with SGDM

num_epoch = 100

batch_size = 64
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, None)
train_val_loader = DataLoader(train_val_set,
                              batch_size=batch_size,
                              shuffle=True)

model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss(
)  # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
# optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
optimizer = torch.optim.SGD(model_best.parameters(), lr=0.001,
                            momentum=0.9)  # optimizer with SGDM
num_epoch = 100

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    def __init__(self,
                 use_gpu,
                 source_names,
                 target_names,
                 **kwargs
                 ):
        super(ImageDataManager, self).__init__(use_gpu, source_names, target_names, **kwargs)

        print('=> Initializing TRAIN (source) datasets')
        train = []
        self._num_train_pids = 0
        self._num_train_cams = 0

        for name in self.source_names:
            dataset = init_imgreid_dataset(
                root=self.root, name=name)

            for img_path, pid, camid in dataset.train:
                pid += self._num_train_pids
                camid += self._num_train_cams
                train.append((img_path, pid, camid))

            self._num_train_pids += dataset.num_train_pids
            self._num_train_cams += dataset.num_train_cams

        self.train_sampler = build_train_sampler(
            train, self.train_sampler,
            train_batch_size=self.train_batch_size,
            num_instances=self.num_instances,
        )
        self.trainloader = DataLoader(
            ImageDataset(train, transform=self.transform_train), sampler=self.train_sampler,
            batch_size=self.train_batch_size, shuffle=False, num_workers=self.workers,
            pin_memory=self.use_gpu, drop_last=True
        )
        mean, std = calculate_mean_and_std(self.trainloader, len(train))
        print('mean and std:', mean, std)

        print('=> Initializing TEST (target) datasets')
        self.testloader_dict = {name: {'query': None, 'gallery': None} for name in target_names}
        self.testdataset_dict = {name: {'query': None, 'gallery': None} for name in target_names}

        for name in self.target_names:
            dataset = init_imgreid_dataset(
                root=self.root, name=name)

            self.testloader_dict[name]['query'] = DataLoader(
                ImageDataset(dataset.query, transform=self.transform_test),
                batch_size=self.test_batch_size, shuffle=False, num_workers=self.workers,
                pin_memory=self.use_gpu, drop_last=False
            )

            self.testloader_dict[name]['gallery'] = DataLoader(
                ImageDataset(dataset.gallery, transform=self.transform_test),
                batch_size=self.test_batch_size, shuffle=False, num_workers=self.workers,
                pin_memory=self.use_gpu, drop_last=False
            )

            self.testdataset_dict[name]['query'] = dataset.query
            self.testdataset_dict[name]['gallery'] = dataset.gallery

        print('\n')
        print('  **************** Summary ****************')
        print('  train names      : {}'.format(self.source_names))
        print('  # train datasets : {}'.format(len(self.source_names)))
        print('  # train ids      : {}'.format(self.num_train_pids))
        print('  # train images   : {}'.format(len(train)))
        print('  # train cameras  : {}'.format(self.num_train_cams))
        print('  test names       : {}'.format(self.target_names))
        print('  *****************************************')
        print('\n')
def test(
        cfg,
        data,
        weights=None,
        batch_size=16,
        img_size=416,
        conf_thres=0.001,
        iou_thres=0.6,  # for nms
        save_json=False,
        single_cls=False,
        augment=False,
        model=None,
        dataloader=None):
    # Initialize/load model and set device
    if model is None:
        device = torch_utils.select_device(opt.device, batch_size=batch_size)
        verbose = opt.task == 'test'

        # Remove previous
        for f in glob.glob('test_batch*.png'):
            os.remove(f)

        # Initialize model
        model = Darknet(cfg, img_size)

        # Load weights
        attempt_download(weights)
        if weights.endswith('.pt'):  # pytorch format
            model.load_state_dict(
                torch.load(weights, map_location=device)['model'])
        else:  # darknet format
            load_darknet_weights(model, weights)

        # Fuse
        model.fuse()
        model.to(device)

        if device.type != 'cpu' and torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:  # called by train.py
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_cfg(data)
    nc = 1 if single_cls else int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names
    iouv = torch.linspace(0.5, 0.95,
                          10).to(device)  # iou vector for [email protected]:0.95
    iouv = iouv[0].view(1)  # comment for [email protected]:0.95
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:
        dataset = LoadImagesAndLabels(path,
                                      img_size,
                                      batch_size,
                                      rect=True,
                                      single_cls=opt.single_cls)
        batch_size = min(batch_size, len(dataset))
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=min([
                                    os.cpu_count(),
                                    batch_size if batch_size > 1 else 0, 8
                                ]),
                                pin_memory=True,
                                collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    _ = model(torch.zeros(
        (1, 3, img_size, img_size),
        device=device)) if device.type != 'cpu' else None  # run once
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                 '[email protected]', 'F1')
    p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=s)):
        imgs = imgs.to(
            device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = imgs.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Plot images with bounding boxes
        f = 'test_batch%g.png' % batch_i  # filename
        if batch_i < 1 and not os.path.exists(f):
            plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = torch_utils.time_synchronized()
            inf_out, train_out = model(
                imgs, augment=augment)  # inference and training outputs
            t0 += torch_utils.time_synchronized() - t

            # Compute loss
            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_out, targets,
                                     model)[1][:3]  # GIoU, obj, cls

            # Run NMS
            t = torch_utils.time_synchronized()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                  torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[si].shape[1:], box, shapes[si][0],
                             shapes[si][1])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({
                        'image_id': image_id,
                        'category_id': coco91class[int(p[5])],
                        'bbox': [round(x, 3) for x in b],
                        'score': round(p[4], 5)
                    })

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0],
                                  niou,
                                  dtype=torch.bool,
                                  device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(
                        -1)  # prediction indices
                    pi = (cls == pred[:,
                                      5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                            1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[
                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(
                                        detected
                                ) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append(
                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        if niou > 1:
            p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64),
                         minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Print speeds
    if verbose or save_json:
        t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
            img_size, img_size, batch_size)  # tuple
        print(
            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
            % t)

    # Save JSON
    if save_json and map and len(jdict):
        print('\nCOCO mAP with pycocotools...')
        imgIds = [
            int(Path(x).stem.split('_')[-1])
            for x in dataloader.dataset.img_files
        ]
        with open('results.json', 'w') as file:
            json.dump(jdict, file)

        try:
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval
        except:
            print(
                'WARNING: missing pycocotools package, can not compute official COCO mAP. See requirements.txt.'
            )

        # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
        cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')
                      [0])  # initialize COCO ground truth api
        cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api

        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
        cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()
        # mf1, map = cocoEval.stats[:2]  # update to pycocotools results ([email protected]:0.95, [email protected])

    # Return results
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
Beispiel #20
0
def main(args):
    utils.init_distributed_mode(args)
    print("git:\n  {}\n".format(utils.get_sha()))

    if args.frozen_weights is not None:
        assert args.masks, "Frozen training is meant for segmentation only"
    print(args)

    device = torch.device(args.device)

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    model, criterion, postprocessors = build_model(args)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('number of params:', n_parameters)

    param_dicts = [
        {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": args.lr_backbone,
        },
    ]
    optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
                                  weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

    dataset_train = build_dataset(image_set='train', args=args)
    dataset_val = build_dataset(image_set='val', args=args)

    if args.distributed:
        sampler_train = DistributedSampler(dataset_train)
        sampler_val = DistributedSampler(dataset_val, shuffle=False)
    else:
        sampler_train = torch.utils.data.RandomSampler(dataset_train)
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, args.batch_size, drop_last=True)

    data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                   collate_fn=utils.collate_fn, num_workers=args.num_workers)
    data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val,
                                 drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers)

    if args.dataset_file == "coco_panoptic":
        # We also evaluate AP during panoptic training, on original coco DS
        coco_val = datasets.coco.build("val", args)
        base_ds = get_coco_api_from_dataset(coco_val)
    else:
        base_ds = get_coco_api_from_dataset(dataset_val)

    if args.frozen_weights is not None:
        checkpoint = torch.load(args.frozen_weights, map_location='cpu')
        model_without_ddp.detr.load_state_dict(checkpoint['model'])

    output_dir = Path(args.output_dir)
    if args.resume:
        if args.resume.startswith('https'):
            checkpoint = torch.hub.load_state_dict_from_url(
                args.resume, map_location='cpu', check_hash=True)
        else:
            checkpoint = torch.load(args.resume, map_location='cpu')
        
        del checkpoint['model']['class_embed.weight']
        del checkpoint['model']['class_embed.bias']
        del checkpoint['model']['query_embed.weight']
        
        model_without_ddp.load_state_dict(checkpoint['model'])
        if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            args.start_epoch = checkpoint['epoch'] + 1

    if args.eval:
        test_stats, coco_evaluator = evaluate(model, criterion, postprocessors,
                                              data_loader_val, base_ds, device, args.output_dir)
        if args.output_dir:
            utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth")
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            sampler_train.set_epoch(epoch)
        train_stats = train_one_epoch(
            model, criterion, data_loader_train, optimizer, device, epoch,
            args.clip_max_norm)
        lr_scheduler.step()
        if args.output_dir:
            checkpoint_paths = [output_dir / 'checkpoint.pth']
            # extra checkpoint before LR drop and every 100 epochs
            if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 100 == 0:
                checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
            for checkpoint_path in checkpoint_paths:
                utils.save_on_master({
                    'model': model_without_ddp.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'epoch': epoch,
                    'args': args,
                }, checkpoint_path)

        test_stats, coco_evaluator = evaluate(
            model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir
        )

        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
                     **{f'test_{k}': v for k, v in test_stats.items()},
                     'epoch': epoch,
                     'n_parameters': n_parameters}

        if args.output_dir and utils.is_main_process():
            with (output_dir / "log.txt").open("a") as f:
                f.write(json.dumps(log_stats) + "\n")

            # for evaluation logs
            if coco_evaluator is not None:
                (output_dir / 'eval').mkdir(exist_ok=True)
                if "bbox" in coco_evaluator.coco_eval:
                    filenames = ['latest.pth']
                    if epoch % 50 == 0:
                        filenames.append(f'{epoch:03}.pth')
                    for name in filenames:
                        torch.save(coco_evaluator.coco_eval["bbox"].eval,
                                   output_dir / "eval" / name)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
Beispiel #21
0
    def predict(self, x):
        output = self.forward(x)
        _, prediction = torch.max(output, 1)
        return prediction

dataset_dir = './MNIST/'
transform = transforms.Compose([transforms.Resize((32, 32)),
                                transforms.ToTensor()])
batch_size = 64

train_dataset = torchvision.datasets.MNIST(root=dataset_dir, train=True, transform=transform, download=True)
val_dataset = torchvision.datasets.MNIST(root=dataset_dir, train=False, transform=transform, download=True)

print('train dataset: {} \nval dataset: {}'.format(len(train_dataset), len(val_dataset)))

train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
net = Net()
net.to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
loss_fc = nn.CrossEntropyLoss()
logger = Logger('./logs')

iter_count = 0
NUM_EPOCH = 300
for epoch in range(NUM_EPOCH):
Beispiel #22
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv, coco or openimages')
    parser.add_argument('--data_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--resume', help='Checkpoint to load the model from')
    parser.add_argument('--resume_attr', help='Checkpoint to load the attributes model from')
    parser.add_argument('--resume_rel', help='Checkpoint to load the relationships from')
    parser.add_argument('--detector_snapshot', help='Detector snapshot')
    parser.add_argument('--finetune_detector', action='store_true', default=False, help='Enable finetuning the detector')
    parser.add_argument('--lr_step_size', type=int, default=20, help="After how many epochs the lr is decreased")
    parser.add_argument('--lr', type=int, default=1e-4, help="Initial learning rate")

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)
    parser.add_argument('--bs', help='Batch size', type=int, default=64)
    parser.add_argument('--net', help='Network to use', default='fasterrcnn')
    parser.add_argument('--train_rel', action='store_true', default=False, help='Enable training relationships')
    parser.add_argument('--train_attr', action='store_true', default=False, help='Enable training attributes')

    parser.add_argument('--log_interval', help='Iterations before outputting stats', type=int, default=1)
    parser.add_argument('--checkpoint_interval', help='Iterations before saving an intermediate checkpoint', type=int,
                        default=80)
    parser.add_argument('--iterations', type=int, help='Iterations for every batch', default=32)

    parser = parser.parse_args()

    # asserts
    assert parser.train_rel or parser.train_attr, "You have to train one of attribute or relation networks!"
    assert not (not parser.train_rel and parser.resume_rel), "It is useless to load relationships when you do not train them!"
    assert not (not parser.train_attr and parser.resume_attr), "It is useless to load attributes when you do not train them!"

    # This becomes the minibatch size
    parser.bs = parser.bs // parser.iterations
    print('With {} iterations the effective batch size is {}'.format(parser.iterations, parser.bs))

    # Create the data loaders
    if parser.dataset == 'openimages':
        if parser.data_path is None:
            raise ValueError('Must provide --data_path when training on OpenImages')

        dataset_train = OidDatasetVRD(parser.data_path, subset='train',
                                   transform=Compose(
                                       [ToTensor(), Augment(), Resizer(min_side=600, max_side=1000)]))
        # dataset_val = OidDatasetVRD(parser.data_path, subset='validation',
        #                         transform=Compose([ToTensor(), Resizer(min_side=600, max_side=1000)]))

    elif parser.dataset == 'dummy':
        # dummy dataset used only for debugging purposes
        raise NotImplementedError()

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    # if training one of relationships or attributes, balance!
    # if not (parser.train_attr and parser.train_rel):
    print('Dataloader is using the BalancedSampler!')
    sampler_train = BalancedSampler(dataset_train, batch_size=parser.bs, train_rel=parser.train_rel, train_attr=parser.train_attr)
    dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collate_fn, batch_sampler=sampler_train)
    # dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=parser.bs, collate_fn=collate_fn, shuffle=True)

    # if dataset_val is not None:
    #    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #    dataloader_val = DataLoader(dataset_val, num_workers=12, collate_fn=collate_fn, batch_sampler=sampler_val)

    # Create the detection model
    detector = create_detection_model(dataset_train.num_classes(), parser)

    # Create the experiment folder
    if parser.train_attr and parser.train_rel:
        mode = 'attr-and-rel'
    elif parser.train_attr:
        mode = 'only-attr'
    elif parser.train_rel:
        mode = 'only-rel'
    experiment_fld = 'vrd_{}_experiment_{}_{}_resnet{}_{}'.format(mode, parser.net, parser.dataset, parser.depth,
                                                        time.strftime("%Y%m%d%H%M%S", time.localtime()))
    experiment_fld = os.path.join('outputs', experiment_fld)
    if not os.path.exists(experiment_fld):
        os.makedirs(experiment_fld)

    logger = SummaryWriter(experiment_fld)

    use_gpu = True

    #if use_gpu:
    #    detector = detector.cuda()
    #    detector = torch.nn.DataParallel(detector).cuda()

    if parser.detector_snapshot:
        checkpoint = torch.load(parser.detector_snapshot)
        weights = checkpoint['model']
        weights = {k.replace('module.', ''): v for k, v in weights.items()}
        detector.load_state_dict(weights)
        print('Correctly loaded the detector checkpoint {}'.format(parser.detector_snapshot))

    # Create the VRD model given the detector
    model = VRD(detector, dataset=dataset_train, train_relationships=parser.train_rel,
                train_attributes=parser.train_attr, finetune_detector=parser.finetune_detector)
    if use_gpu:
        model = model.cuda()
        model = torch.nn.DataParallel(model).cuda()

    optimizer = optim.Adam(model.parameters(), lr=parser.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=parser.lr_step_size)

    # Load checkpoint if needed
    start_epoch = 0
    # load relationships
    if parser.resume_rel:
        print('Loading relationship checkpoint {}'.format(parser.resume_rel))
        rel_checkpoint = torch.load(parser.resume_rel)
        model.module.relationships_net.load_state_dict(rel_checkpoint['model_rel'])
        if not parser.resume_attr:
            print('Resuming also scheduler and optimizer...')
            start_epoch = rel_checkpoint['epoch']
            optimizer.load_state_dict(rel_checkpoint['optimizer'])
            scheduler.load_state_dict(rel_checkpoint['scheduler'])
    if parser.resume_attr:
        print('Loading attributes checkpoint {}'.format(parser.resume_attr))
        attr_checkpoint = torch.load(parser.resume_attr)
        model.module.attributes_net.load_state_dict(attr_checkpoint['model_attr'])
        if not parser.resume_rel:
            print('Resuming also scheduler and optimizer...')
            start_epoch = attr_checkpoint['epoch']
            optimizer.load_state_dict(attr_checkpoint['optimizer'])
            scheduler.load_state_dict(attr_checkpoint['scheduler'])
    if parser.resume:
        print('Loading both attributes and relationships models {}'.format(parser.resume))
        checkpoint = torch.load(parser.resume)
        model.module.relationships_net.load_state_dict(checkpoint['model_rel'])
        model.module.attributes_net.load_state_dict(checkpoint['model_attr'])
        start_epoch = checkpoint['epoch']
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
    print('Checkpoint loaded!')

    loss_hist = collections.deque(maxlen=500)

    model.train()
    # model.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in tqdm.trange(start_epoch, parser.epochs):
        logger.add_scalar("learning_rate", optimizer.param_groups[0]['lr'],
                          epoch_num * len(dataloader_train))

        model.train()
        # model.module.freeze_bn()

        epoch_loss = []
        log_losses_mean = {}
        running_loss_sum = 0

        data_progress = tqdm.tqdm(dataloader_train)
        old_tensors_set = {}
        optimizer.zero_grad()
        for minibatch_idx, data in enumerate(data_progress):

            images, targets = data

            images = list(image.cuda().float() for image in images)
            targets = [{k: v.cuda() for k, v in t.items()} for t in targets]
            #images, targets = images.cuda(), targets.cuda()

            loss_dict = model(images, targets)
            #classification_loss = classification_loss.mean()
            #regression_loss = regression_loss.mean()
            #loss = classification_loss + regression_loss
            loss = sum(loss for loss in loss_dict.values())
            monitor_loss = loss.clone().detach()
            loss /= parser.iterations
            running_loss_sum += float(loss.item())

            loss.backward()

            if len(log_losses_mean) == 0:
                log_losses_mean = clone_tensor_dict(loss_dict)
                log_losses_mean['total_loss'] = float(monitor_loss.item())
            else:
                loss_dict['total_loss'] = monitor_loss
                log_losses_mean = Counter(clone_tensor_dict(loss_dict)) + Counter(log_losses_mean)

            if (minibatch_idx + 1) % parser.iterations == 0:
                data_progress.set_postfix(dict(it=minibatch_idx // parser.iterations, loss=running_loss_sum))

                # all minibatches have been accumulated. Zero the grad
                optimizer.step()
                optimizer.zero_grad()
                running_loss_sum = 0

            # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)

            # loss_hist.append(float(loss))
            # epoch_loss.append(float(loss))

            if (minibatch_idx + 1) % (parser.log_interval * parser.iterations) == 0:
                # compute the mean
                log_losses_mean = {k: (v / (parser.log_interval * parser.iterations)) for k, v in log_losses_mean.items()}

                logger.add_scalars("logs/losses", log_losses_mean,
                                   epoch_num * len(dataloader_train) + minibatch_idx)
                log_losses_mean = {}
            # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

            if (minibatch_idx + 1) % (parser.checkpoint_interval * parser.iterations) == 0:
                # Save an intermediate checkpoint
                save_checkpoint({
                    'model_rel': model.module.relationships_net.state_dict() if parser.train_rel else None,
                    'model_attr': model.module.attributes_net.state_dict() if parser.train_attr else None,
                    'model_det': model.module.detector.state_dict() if parser.finetune_detector else None,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'epoch': epoch_num
                }, experiment_fld, overwrite=True)

            if (minibatch_idx + 1) % 5 == 0:
                # flush cuda memory every tot iterations
                torch.cuda.empty_cache()

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, model)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, model)

        # TODO: write evaluation code for openimages
        scheduler.step()

        save_checkpoint({
            'model_rel': model.module.relationships_net.state_dict() if parser.train_rel else None,
            'model_attr': model.module.attributes_net.state_dict() if parser.train_attr else None,
            'model_det': model.module.detector.state_dict() if parser.finetune_detector else None,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
            'epoch': epoch_num
        }, experiment_fld, overwrite=False)

    model.eval()
                'LEARN_RATE': LEARN_RATE,
                'EPOCHS': EPOCHS,
                'WARMUP_STEPS': WARMUP_STEPS,
                'SEQUENCE_LENGTH': SEQUENCE_LENGTH,
            },
            config_file,
            sort_keys=True,
            indent=4,
            separators=(',', ': '))

# Load and initialize model
MODEL_CLASS = load_model(MODEL_PREFIX)
TOKENIZER = MODEL_CLASS[0].from_pretrained(MODEL_NAME)
CONFIG = MODEL_CLASS[1].from_pretrained(MODEL_NAME, num_labels=3)
MODEL = MODEL_CLASS[2].from_pretrained(MODEL_NAME, config=CONFIG)

# Load training data
train_dataset = dataset(
    tokenize(chain(*(load_semeval(DATASET, 'train', lang) for lang in LANGS)),
             TOKENIZER, SEQUENCE_LENGTH))
train_sampler = RandomSampler(train_dataset)
train_dataset = DataLoader(train_dataset,
                           sampler=train_sampler,
                           batch_size=TRAIN_BATCH_SIZE,
                           drop_last=True)

# Run Training
training(train_dataset, val_datasets(TOKENIZER, SEQUENCE_LENGTH), MODEL,
         EXPERIMENT, LEARN_RATE, WARMUP_STEPS, TRAIN_BATCH_SIZE, EPOCHS,
         ACCUMULATION_STEPS)
Beispiel #24
0
def train(args):
    """Sets up the model to train"""
    # Create a writer object to log events during training
    writer = SummaryWriter(pjoin('runs', 'exp_1'))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load splits
    x_train, y_train, x_val, y_val, seismic = train_val_split(args)

    # Convert to torch tensors in the form (N, C, L)
    x_train = torch.from_numpy(np.expand_dims(x_train, 1)).float().to(device)
    y_train = torch.from_numpy(np.expand_dims(y_train, 1)).float().to(device)
    x_val = torch.from_numpy(np.expand_dims(x_val, 1)).float().to(device)
    y_val = torch.from_numpy(np.expand_dims(y_val, 1)).float().to(device)
    seismic = torch.from_numpy(np.expand_dims(seismic, 1)).float().to(device)

    # Set up the dataloader for training dataset
    dataset = SeismicLoader(x_train, y_train)
    train_loader = DataLoader(dataset=dataset,
                              batch_size=args.batch_size,
                              shuffle=False)

    # import tcn
    model = TCN(1, 1, args.tcn_layer_channels, args.kernel_size,
                args.dropout).to(device)

    # Set up loss
    criterion = torch.nn.MSELoss()

    # Define Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=args.weight_decay,
                                 lr=args.lr)

    # Set up list to store the losses
    train_loss = [np.inf]
    val_loss = [np.inf]
    iter = 0
    # Start training
    for epoch in range(args.n_epoch):
        for x, y in train_loader:
            model.train()
            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            writer.add_scalar(tag='Training Loss',
                              scalar_value=loss.item(),
                              global_step=iter)
            if epoch % 20 == 0:
                with torch.no_grad():
                    model.eval()
                    y_pred = model(x_val)
                    loss = criterion(y_pred, y_val)
                    val_loss.append(loss.item())
                    writer.add_scalar(tag='Validation Loss',
                                      scalar_value=loss.item(),
                                      global_step=iter)
            print(
                'epoch:{} - Training loss: {:0.4f} | Validation loss: {:0.4f}'.
                format(epoch, train_loss[-1], val_loss[-1]))

            if epoch % 100 == 0:
                with torch.no_grad():
                    model.eval()
                    AI_inv = model(seismic)
                fig, ax = plt.subplots()
                ax.imshow(AI_inv[:, 0].detach().cpu().numpy().squeeze().T,
                          cmap="rainbow")
                ax.set_aspect(4)
                writer.add_figure('Inverted Acoustic Impedance', fig, iter)
        iter += 1

    writer.close()

    # Set up directory to save results
    results_directory = 'results'
    seismic_offsets = np.expand_dims(marmousi_seismic().squeeze()[:, 100:600],
                                     1)
    seismic_offsets = torch.from_numpy(
        (seismic_offsets - seismic_offsets.mean()) /
        seismic_offsets.std()).float()
    with torch.no_grad():
        model.cpu()
        model.eval()
        AI_inv = model(seismic_offsets)

    if not os.path.exists(
            results_directory
    ):  # Make results directory if it doesn't already exist
        os.mkdir(results_directory)
        print('Saving results...')
    else:
        print('Saving results...')

    np.save(pjoin(results_directory, 'AI.npy'), marmousi_model().T[:, 100:600])
    np.save(pjoin(results_directory, 'AI_inv.npy'),
            AI_inv.detach().numpy().squeeze())
    print('Results successfully saved.')
Beispiel #25
0
from torchsummary import summary

if __name__ == "__main__":

    # make data
    n_samples = 1000
    n_length = 2048
    n_channel = 18
    n_classes = 6
    data, label = read_data_generated(n_samples=n_samples,
                                      n_length=n_length,
                                      n_channel=n_channel,
                                      n_classes=n_classes)
    print(data.shape, Counter(label))
    dataset = MyDataset(data, label)
    dataloader = DataLoader(dataset, batch_size=64)

    # make model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    ## change the hyper-parameters for your own data
    # (n_block, downsample_gap, increasefilter_gap) = (8, 1, 2)
    # 34 layer (16*2+2): 16, 2, 4
    # 98 layer (48*2+2): 48, 6, 12
    model = ResNet1D(in_channels=n_channel,
                     base_filters=128,
                     kernel_size=16,
                     stride=2,
                     n_block=48,
                     groups=32,
                     n_classes=n_classes,
                     downsample_gap=6,
Beispiel #26
0
    # Set up model
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(opt.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(opt.weights_path))

    model.eval()  # Set in evaluation mode

    dataloader = DataLoader(
        ImageFolder(opt.image_folder, transform= \
            transforms.Compose([DEFAULT_TRANSFORMS, Resize(opt.img_size)])),
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.n_cpu,
    )

    classes = load_classes(opt.class_path)  # Extracts class labels from file

    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

    imgs = []  # Stores image paths
    img_detections = []  # Stores detections for each image index

    print("\nPerforming object detection:")
    prev_time = time.time()
    for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
        # Configure input
Beispiel #27
0
        intersection_genes = utils.parse_gene_file(args.gene_file)

        disease_train_data_dirs = []
        disease_tune_data_dirs = []
        for disease in classes:
            _, disease_dirs = datasets.extract_dirs_with_label(data_dirs, disease, sample_to_label)
            train_dirs, tune_dirs = utils.train_tune_split(disease_dirs, args.tune_study_count)

            disease_train_data_dirs.extend(train_dirs)
            disease_tune_data_dirs.extend(tune_dirs)

        train_loaders = []
        for data_dir in disease_train_data_dirs:
            train_dataset = datasets.RefineBioDataset([data_dir], classes, sample_to_label,
                                                      intersection_genes)
            train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, pin_memory=True)
            train_loaders.append(train_loader)

        tune_dataset = datasets.RefineBioDataset(disease_tune_data_dirs, classes, sample_to_label,
                                                 intersection_genes)
        tune_loader = DataLoader(tune_dataset, batch_size=16, num_workers=2, pin_memory=True)

        classifier = models.ThreeLayerNet(len(intersection_genes))

        results = train_with_irm(classifier, train_loaders, tune_loader, args.num_epochs,
                                 args.loss_scaling_factor, logger)

    if mode == 'erm':
        disease_train_data_dirs = []
        disease_tune_data_dirs = []
        for disease in classes:
import utils

# Model path
MODEL_PATH = os.path.join(os.getcwd(), "models")

# Labels
LABELS = utils.get_label_dict()

# Hyperparameters
EPOCHS = 50
BATCH_SIZE = 10
LEARNING_RATE = 0.0001

# Dataset
train_set, test_set, bad_set = utils.load_dataset()
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, 
    shuffle=True, drop_last=True)
test_loader = DataLoader(test_set, shuffle=True)

# Device
DEVICE = torch.device("cuda")

# Model
vae = VAE(512)
vae.load_state_dict(torch.load(os.path.join(MODEL_PATH, "vae_epoch50.pth")))
classifier = VAEClassifier(vae, len(LABELS), 256).to(DEVICE)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
opt = torch.optim.Adam(
 def getBalancedLoader(self, P=14, K=10):
   train_batch_sampler = BalancedBatchSampler(self, n_classes=P, n_samples=K)
   return DataLoader(self, batch_sampler=train_batch_sampler, num_workers= self.num_workers)
Beispiel #30
0
def readFolder(dataset, folder):
    dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}
    loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)
    return loader