Example #1
0
    def train(self, data, emb_dim, hidden_size, p_dropout, lr, l2_penalty,
              epochs):

        device = torch.device('cuda:0' if torch.cuda.is_avaliable() else 'cpu')
        encoder, dual_encoder = self.create_model(emb_dim, hidden_size,
                                                  p_dropout)
        encoder.to(device)
        dual_encoder.to(device)
        self.initialize_df(data)
        training_dataframe, validation_dataframe = train_test_split(
            self.df, test_size=0.05)

        optimizer = torch.optim.Adam(dual_encoder.parameters(),
                                     lr=lr,
                                     weight_decay=l2_penalty)

        loss_func = torch.nn.BCEWithLogitsLoss()
        loss_func.to(device)

        for epoch in range(epochs):
            train(epoch, training_dataframe, self.embed_dict, dual_encoder,
                  optimizer, loss_func, device)
            val(epoch, validation_dataframe, self.embed_dict, dual_encoder,
                optimizer, loss_func, device)
        self.dual_encoder = dual_encoder
Example #2
0
    def fit(self, x_train, y_train, x_val, y_val):

        x_train, x_val = self.preprocess_data(x_train, x_val)
        train((x_train, x_train, x_val, x_val),
              self.network,
              self.train_fn,
              self.val_fn,
              hlayer_fn=self.hlayer_fn,
              pred_fn=self.pred_fn,
              salmap_fn=self.salmap_fn,
              epochs=self.network_kwargs['epochs'],
              batchsize=self.network_kwargs['batch_size'],
              save_path=self.save_dir)
Example #3
0
 def fit(self, x_train, y_train, x_val,y_val):
     
     x_train, x_val = self.preprocess_data(x_train, x_val)
     train((x_train, x_train, x_val,x_val),
                          self.network, 
                          self.train_fn, 
                          self.val_fn,
                          hlayer_fn = self.hlayer_fn,
                          pred_fn = self.pred_fn,
                          salmap_fn = self.salmap_fn,
                          epochs=self.network_kwargs['epochs'],
                          batchsize=self.network_kwargs['batch_size'],
                          save_path = self.save_dir)
Example #4
0
def main():
    global args, best_metric

    # specify dataset
    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics400':
        num_class = 400
    elif args.dataset == 'kinetics200':
        num_class = 200
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    data_root = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "data/{}/access".format(args.dataset))

    # create model
    org_model = VideoModule(num_class=num_class,
                            base_model_name=args.arch,
                            dropout=args.dropout,
                            pretrained=args.pretrained,
                            pretrained_model=args.pretrained_model)
    num_params = 0
    for param in org_model.parameters():
        num_params += param.reshape((-1, 1)).shape[0]
    print("Model Size is {:.3f}M".format(num_params / 1000000))

    model = torch.nn.DataParallel(org_model).cuda()
    # model = org_model

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_metric = checkpoint['best_metric']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    # Data loading code
    ## train data
    train_transform = torchvision.transforms.Compose([
        org_model.get_augmentation(),
        Stack(mode=args.mode),
        ToTorchFormatTensor(),
        GroupNormalize(),
    ])
    train_dataset = VideoDataSet(root_path=data_root,
                                 list_file=args.train_list,
                                 t_length=args.t_length,
                                 t_stride=args.t_stride,
                                 num_segments=args.num_segments,
                                 image_tmpl=args.image_tmpl,
                                 transform=train_transform,
                                 phase="Train")
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               drop_last=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    ## val data
    val_transform = torchvision.transforms.Compose([
        GroupScale(256),
        GroupCenterCrop(224),
        Stack(mode=args.mode),
        ToTorchFormatTensor(),
        GroupNormalize(),
    ])
    val_dataset = VideoDataSet(root_path=data_root,
                               list_file=args.val_list,
                               t_length=args.t_length,
                               t_stride=args.t_stride,
                               num_segments=args.num_segments,
                               image_tmpl=args.image_tmpl,
                               transform=val_transform,
                               phase="Val")
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.mode != "3D":
        cudnn.benchmark = True

    validate(val_loader, model, criterion, args.print_freq, args.start_epoch)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, args.lr, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch,
              args.print_freq)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            metric = validate(val_loader, model, criterion, args.print_freq,
                              epoch + 1)

            # remember best prec@1 and save checkpoint
            is_best = metric > best_metric
            best_metric = max(metric, best_metric)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_metric': best_metric,
                    'optimizer': optimizer.state_dict(),
                }, is_best, epoch + 1, args.experiment_root)
Example #5
0
def main():
    parser = init_parser()
    args = parser.parse_args()

    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    model = VarPred(in_channels=args.in_channels,
                    out_dim=args.out_dim,
                    input_mode=args.input_mode)

    model.cuda()
    model = nn.DataParallel(model)
    cudnn.benchmark = True
    # optimizer = torch.optim.SGD(model.module.parameters(),
    # lr=args.lr,
    # momentum=0.9)
    optimizer = torch.optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss().cuda()

    train_list, test_list = split_indices(args.data_dir, args.test_ratio)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    train_loader = torch.utils.data.DataLoader(PairDataset(
        args.data_dir,
        train_list,
        image_tmpl='pair_{:06d}.jpg',
        transform=transform),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               worker_init_fn=worker_init_fn)
    test_loader = torch.utils.data.DataLoader(PairDataset(
        args.data_dir,
        test_list,
        image_tmpl='pair_{:06d}.jpg',
        transform=transform),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              worker_init_fn=worker_init_fn)
    train_logger = os.path.join(args.result_dir, 'train.log')
    val_logger = os.path.join(args.result_dir, 'val.log')

    best_prec1 = 0
    for epoch in range(args.epochs):
        # adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              train_logger=train_logger,
              args=args)
        with open(train_logger, 'a') as f:
            f.write('\n')

        save_checkpoint(state={
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        },
                        is_best=False,
                        result_dir=args.result_dir,
                        filename='ep_' + str(epoch) + '_checkpoint.pth.tar')

        # evaluate on validation set
        if (epoch + 1) % 1 == 0 or epoch == args.epochs - 1:
            prec1 = validate(test_loader,
                             model,
                             criterion,
                             val_logger=val_logger,
                             epoch=epoch)

            # remember best prec@1 and save checkpoint
            if prec1 > best_prec1:
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best=is_best,
                    result_dir=args.result_dir)
def main(config_path):
    path_to_config = Path(config_path)

    if not (path_to_config.exists()):
        raise ValueError('{} doesn\'t exist'.format(path_to_config))
    elif path_to_config.suffix.lower(
    ) != '.json' or not path_to_config.is_file():
        raise ValueError('{} is not .json config file'.format(path_to_config))

    model_configs = load_json(path_to_config)
    path_to_data = model_configs['path_to_data']
    train_model = model_configs['train_model']
    workers_num = model_configs['workers_num']
    batch_size = model_configs['batch_size']
    img_size = model_configs['img_size']

    transforms = Compose([
        Resize(*img_size),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    transforms_val = Compose([
        Resize(*img_size),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    data_loaders = get_data_loaders(path_to_data, transforms, transforms_val,
                                    batch_size, workers_num)

    model_selector = get_models_selector()
    model_type = model_configs['model_type']
    model = model_selector[model_type](True)
    criterion = nn.BCEWithLogitsLoss()
    metric = ConfMatrix()

    device = 'cpu'
    if cuda.is_available() and model_configs['cuda_usage']:
        device = 'cuda'

    criterion.to(device)
    metric.to(device)

    if device is not 'cpu' and cuda.device_count() > 1:
        model = nn.DataParallel(model).cuda()
    elif device is not 'cpu':
        model = model.cuda()

    optimizer = optim.SGD(model.parameters(),
                          lr=model_configs['learning_rate'],
                          momentum=0.9)

    info_paths = model_configs['info_paths']

    writer = SummaryWriter(logdir=info_paths['log_dir'])
    total_epochs = model_configs['epochs']

    best_f1_score = 0.
    for epoch in range(total_epochs):
        model.train()
        train(model,
              data_loaders['train'],
              epoch,
              optimizer,
              criterion,
              metric,
              writer,
              device=device)
        model.eval()
        pr, recall, f1_score = val(model,
                                   criterion,
                                   metric,
                                   data_loaders['val'],
                                   epoch,
                                   writer,
                                   device=device)
        if f1_score > best_f1_score:
            best_f1_score = f1_score
            path_to_save = os.path.join(model_configs['path_to_save_model'],
                                        'best_model_{}.pth'.format(epoch))
            save(model.state_dict(), path_to_save)
Example #7
0
" training "
criterion = torch.nn.L1Loss(reduction='mean')  #MSELoss()  sum
optimizer = optim.Adam(model.parameters())
CLIP = 1
N_EPOCHS = 100
batch_size = 1
val_batch_size = 1
train_loss_list = []
valid_loss_list = []

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, input_data, label_data, optimizer, criterion,
                       CLIP, batch_size, device, epoch, N_EPOCHS)
    valid_loss = evaluate(model, input_data, label_data, criterion,
                          val_batch_size, device, epoch, N_EPOCHS)

    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.6f}')
    print(f'\t Val. Loss: {valid_loss:.6f}')

    train_loss_list.append(train_loss)
    valid_loss_list.append(valid_loss)

torch.save(model.state_dict(), 'tut2-model.pt')
Example #8
0
		assert os.path.isfile(args.resume), "=> no checkpoint found at '{}'".format(args.resume)
		print("=> loading checkpoint '{}'".format(args.resume))
		checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage)
		args.start_epoch = checkpoint['epoch']
		model.load_state_dict(checkpoint['state_dict'])
		optimizer.load_state_dict(checkpoint['optimizer'])
		scheduler.load_state_dict(checkpoint['scheduler'])
		print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
	if args.multigpu:
		model = nn.DataParallel(model)
	iteration = (args.start_epoch-1)*len(train_loader)
	
	# Training & Validation
	for epoch in range(1, args.epochs + 1):
		print("\nepoch {}".format(epoch))
		train(args, model, device, train_loader, optimizer, epoch, iteration)
		scheduler.step()
		iteration += len(train_loader)
		validation_loss, validation_accuracy = val(args, model, device, test_loader, iteration)
		if epoch % args.save_interval == 0:
			saved_weight = os.path.join(args.path2weight, "pt_"+args.dataset+"_ft_"+args.ft_dataset+"_"+args.usenet+"_epoch"+ str(epoch) +".pth")
			if args.multigpu:
				torch.save(model.module.cpu().state_dict(), saved_weight)
				model_state = model.module.cpu().state_dict()
			else:
				torch.save(model.cpu().state_dict(), saved_weight)
				model_state = model.cpu().state_dict()

			# Save checkpoint
			checkpoint = "{}/{}_{}_checkpoint.pth.tar".format(args.path2weight, args.dataset, args.usenet)
			torch.save({'epoch': epoch + 1,
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume,
                                map_location=lambda storage, loc: storage)
        args.start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))
    if not args.no_multigpu:
        model = nn.DataParallel(model)

    # FractalDB Pre-training
    iteration = (args.start_epoch - 1) * len(train_loader)
    for epoch in range(args.start_epoch, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, criterion, epoch)
        scheduler.step()
        iteration += len(train_loader)
        if args.val:
            validation_loss = validate(args, model, device, val_loader,
                                       criterion, iteration)
        if epoch % args.save_interval == 0:
            if args.no_multigpu:
                model_state = model.cpu().state_dict()
            else:
                model_state = model.module.cpu().state_dict()
            saved_weight = "{}/{}_{}_epoch{}.pth.tar".format(
                args.path2weight, args.dataset, args.usenet, epoch)
            torch.save(model_state, saved_weight.replace('.tar', ''))
            checkpoint = "{}/{}_{}_checkpoint.pth.tar".format(
                args.path2weight, args.dataset, args.usenet)
Example #10
0
def train_model(args):
    global best_metric, epoch_resume
    epoch_resume = 0
    best_metric = 0
    model = get_model(args)

    if args.distribute:
        model = model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank])
    else:
        model = torch.nn.DataParallel(model).cuda()
    writer = None
    if is_main_process():
        log_file = args.model_name + '_' + args.dataset + '_t_length_' + str(
            args.t_length) + '_t_stride_' + str(
                args.t_stride) + '_batch_' + str(
                    args.batch_size) + '_lr_' + str(
                        args.lr) + "_logfile_" + time.strftime(
                            "%d_%b_%Y_%H:%M:%S", time.localtime())
        log_file = os.path.join(args.log_dir, args.model_name, log_file)
        writer = SummaryWriter(log_dir=log_file)
        print(model)
    dataloaders, dataset_sizes, samplers = get_dataloader(args)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                weight_decay=1e-4,
                                momentum=0.9)
    criterion = nn.CrossEntropyLoss().cuda()

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=args.num_epochs)
    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        epoch_resume = checkpoint['epoch']
        best_metric = checkpoint['best_metric']
        model_dict = model.state_dict()
        idx = 0
        print(len(model_dict))
        print(len(checkpoint['state_dict']))
        for k, v in checkpoint['state_dict'].items():
            k = k.replace('module.', '')
            if k in model_dict:
                if v.shape == model_dict[k].shape:
                    model_dict[k] = v.cuda()
                    idx += 1
        print(idx)
        print('upload parameter already')
        model.load_state_dict(model_dict)
        optimizer.load_state_dict(checkpoint['optimizer'])
        print(("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch'])))
        print(best_metric)
    elif is_main_process():
        print(("=> no checkpoint found at '{}'".format(args.resume)))

    for epoch in range(epoch_resume, args.num_epochs):
        if args.distribute:
            samplers['train'].set_epoch(epoch)
            samplers['val'].set_epoch(epoch)
        end = time.time()
        train(dataloaders['train'],
              model,
              criterion,
              optimizer,
              epoch,
              args.print_freq,
              writer,
              args=args)
        scheduler.step()
        if epoch >= 0:
            metric = validate(dataloaders['val'],
                              model,
                              criterion,
                              args.print_freq,
                              epoch + 1,
                              writer,
                              args=args)
            if is_main_process():
                print(metric)
                #       remember best prec@1 and save checkpoint
                is_best = metric > best_metric
                best_metric = max(metric, best_metric)
                print(best_metric)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'best_metric': best_metric,
                        'optimizer': optimizer.state_dict(),
                    },
                    is_best,
                    str('current'),
                    args.check_dir,
                    args=args,
                    name=args.model_name)

        time_elapsed = time.time() - end
        if is_main_process():
            print(
                f"Training complete in {time_elapsed//3600}h {(time_elapsed%3600)//60}m {time_elapsed %60}s"
            )
Example #11
0
def main(args, models_mngr):
    best_prec1 = 0

    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    model = torch.nn.DataParallel(models_mngr.get_model(args.arch))
    if args.cpu:
        model.cpu()
    else:
        model.cuda()

    if args.logs:
        if args.logs_dir == 'logs_dir':
            writer = SummaryWriter(f'log_dir/{args.arch}')
        else:
            writer = SummaryWriter(args.logs_dir)
    else:
        writer = None

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    trn_loader = train_loader(args.workers, args.batch_size, normalize)
    val_loader = validate_loader(args.workers, args.batch_size, normalize)

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss()
    if args.cpu:
        criterion = criterion.cpu()
    else:
        criterion = criterion.cuda()

    if args.half:
        model.half()
        criterion.half()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, args.cpu, args.half, args.print_freq)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr)

        # train for one epoch
        train(trn_loader, model, criterion, optimizer, epoch, args.cpu,
              args.half, args.print_freq, writer)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, args.cpu, args.half,
                         args.print_freq)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        if epoch > 0 and epoch % args.save_every == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best, filename=os.path.join(args.save_dir, 'checkpoint_{}.tar'.format(epoch)))

        save_checkpoint({
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, filename=os.path.join(args.save_dir, 'model.th'))