Esempio n. 1
0
 def build_dataset(self, mode):
     """Builds train/val dataset."""
     if not hasattr(self.config, 'data'):
         return
     assert isinstance(mode, str)
     mode = mode.lower()
     self.logger.info(f'Building `{mode}` dataset ...')
     if mode not in ['train', 'val']:
         raise ValueError(f'Invalid dataset mode `{mode}`!')
     dataset = BaseDataset(**self.config.data[mode])
     if mode == 'train':
         self.train_loader = IterDataLoader(
             dataset=dataset,
             batch_size=self.batch_size,
             shuffle=True,
             num_workers=self.config.data.get('num_workers', 2),
             current_iter=self.iter,
             repeat=self.config.data.get('repeat', 1))
     elif mode == 'val':
         self.val_loader = IterDataLoader(dataset=dataset,
                                          batch_size=self.val_batch_size,
                                          shuffle=False,
                                          num_workers=self.config.data.get(
                                              'num_workers', 2),
                                          current_iter=0,
                                          repeat=1)
     else:
         raise NotImplementedError(
             f'Not implemented dataset mode `{mode}`!')
     self.logger.info(f'Finish building `{mode}` dataset.')
Esempio n. 2
0
def cmapss_hyperopt(fd,
                    window_size,
                    batch_size=1,
                    shuffle=True,
                    percent_fail_runs=None,
                    percent_broken=None,
                    normalization='minmax'):
    """Build a dev and eval set from the training data to do hyperopt on."""
    train_data = CMAPSSDataset(fd,
                               'train',
                               window_size=window_size,
                               normalization=normalization,
                               percent_fail_runs=percent_fail_runs,
                               percent_broken=percent_broken)
    split_idx = np.argwhere(train_data.targets == 1)
    num_train = int(0.8 * len(split_idx))

    dev_data = torch.utils.data.Subset(train_data,
                                       np.arange(split_idx[num_train]))
    eval_data = torch.utils.data.Subset(
        train_data, np.arange(split_idx[num_train], split_idx[-1] + 1))

    return BaseDataset(dev_data,
                       eval_data,
                       batch_size,
                       shuffle,
                       dvc_file=os.path.join(DATA_ROOT, 'CMAPSS.dvc'))
Esempio n. 3
0
def cmapss(fd,
           window_size,
           batch_size=1,
           shuffle=True,
           percent_fail_runs=None,
           percent_broken=None,
           normalization='minmax'):
    """CMAPSS construction function to get a BaseDataset."""
    train_data = CMAPSSDataset(fd,
                               'train',
                               window_size=window_size,
                               normalization=normalization,
                               percent_fail_runs=percent_fail_runs,
                               percent_broken=percent_broken)
    test_data = CMAPSSDataset(fd,
                              'test',
                              window_size=window_size,
                              normalization=normalization,
                              percent_fail_runs=percent_fail_runs,
                              percent_broken=percent_broken)

    return BaseDataset(train_data,
                       test_data,
                       batch_size,
                       shuffle,
                       dvc_file=os.path.join(DATA_ROOT, 'CMAPSS.dvc'))
def no_adaption_dataset(source_dataset, target_dataset, **kwargs):
    """Build a dataset with training data from source and eval data from target dataset."""
    if isinstance(source_dataset, dict) and isinstance(target_dataset, dict):
        source_dataset, target_dataset = _build_datasets(
            source_dataset, target_dataset, kwargs)

    return BaseDataset(source_dataset.train_data,
                       target_dataset.eval_data,
                       dvc_file=(source_dataset.dvc_file,
                                 target_dataset.dvc_file),
                       **kwargs)
Esempio n. 5
0
def sample_batch() -> torch.Tensor:
    resolution = 64
    val = dict(root_dir='data/demo.zip',
                data_format='zip',
                resolution=resolution)
    
    dataset = BaseDataset(**val)
    val_loader = LocalIterDataloader(
                dataset=dataset,
                batch_size=args.batch_size,
                shuffle=False,
                current_iter=0,
                repeat=1)

    return next(val_loader)['image'].cuda()
def office_dslr_dataset(batch_size, shuffle):
    """Create a dataset from the dslr part of Office-31."""
    trans = transforms.Compose([
        transforms.CenterCrop(244),
        transforms.ToTensor(),
        transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
    ])

    data_path = os.path.join(DATA_ROOT, 'Office-31', 'dslr', 'images')
    data = torchvision.datasets.ImageFolder(data_path, transform=trans)

    return BaseDataset(data,
                       data,
                       batch_size,
                       shuffle,
                       dvc_file=os.path.join(DATA_ROOT, 'Office-31.dvc'))
Esempio n. 7
0
def main(args):
    transform = getTransforms()

    data_path = os.path.join('data', args.data)
    if not os.path.exists(data_path):
        print('ERROR: No dataset named {}'.format(args.data))
        exit(1)

    testset = BaseDataset(list_path=os.path.join(data_path, 'val.lst'),
                          transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=1,
                                             shuffle=True,
                                             num_workers=1)

    class_list = getClassList(data_path)

    model = ResNet(num_layers=18, num_classes=len(class_list)).to(DEVICE)
    model.eval()

    output_dir = os.path.join('outputs', args.data)
    model_state_file = os.path.join(output_dir, 'checkpoint.pth.tar')

    model_file = args.model_file
    if len(model_file) == 0:
        model_file = model_state_file

    if os.path.exists(model_file):
        checkpoint = torch.load(model_file)
        if 'state_dict' in checkpoint.keys():
            model.load_state_dict(checkpoint['state_dict'], strict=False)
        else:
            model.load_state_dict(checkpoint, strict=False)
        print('=> loaded {}'.format(model_file))

    else:
        print('model_file "{}" does not exists.'.format(model_file))
        exit(1)

    accuracy = test(model=model,
                    dataloader=testloader,
                    device=DEVICE,
                    classes=class_list)

    print('Accuracy: {:.2f}%'.format(100 * accuracy))
Esempio n. 8
0
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())
        print()


if __name__ == '__main__':
    args = parser.parse_args()
    with open(args.config, 'r') as f:
        options = json.load(f)
        options = namedtuple('options', options.keys())(**options)
    # Load model
    mesh = Mesh()
    model = CMR(mesh,
                options.num_layers,
                options.num_channels,
                pretrained_checkpoint=args.checkpoint)
    # Setup evaluation dataset
    dataset = BaseDataset(options, args.dataset, is_train=False)
    # Run evaluation
    run_evaluation(model,
                   args.dataset,
                   dataset,
                   mesh,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq)
Esempio n. 9
0
        # params +=['--num_workers',0]

        args = parser.parse_args(params)
        args.batch_size =128
        args.num_workers =0
        

    model = hmr(config.SMPL_MEAN_PARAMS)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)
    model.cuda()
    model.eval()

    # Setup evaluation dataset
    # dataset = BaseDataset(None, '3dpw', is_train=False, bMiniTest=False)
    dataset = BaseDataset(None, '3dpw', is_train=False, bMiniTest=False, bEnforceUpperOnly=False)
    
    # Run evaluation
    # result_file_name = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/result_3dpw_urs_11_04_59961_4030.pkl'
    result_file_name = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/spin_11-06-42861-upper0_2_ours_lc3d_all-8935.pkl'
    run_evaluation(model, '3dpw',dataset , result_file_name,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq, num_workers=args.num_workers)

    # # Setup evaluation dataset
    # dataset = BaseDataset(None, 'h36m-p1', is_train=False, bMiniTest=False)
    
    # # Run evaluation
    # args.result_file = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/result_h36m_spin'
    # run_evaluation(model, 'h36m-p1', dataset, args.result_file,
Esempio n. 10
0
    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
        print()

if __name__ == '__main__':
    args = parser.parse_args()
    model = hmr(config.SMPL_MEAN_PARAMS)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)
    model.eval()

    # Setup evaluation dataset
    dataset = BaseDataset(None, args.dataset, is_train=False)
    # Run evaluation
    run_evaluation(model, args.dataset, dataset, args.result_file,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq)
Esempio n. 11
0
    args = docopt(__doc__)

    dataset_file = args['--dataset']
    VERBOSE = True

    ENC_EMB_DIM = 256  #768
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    DROPOUT = 0
    FORCING_RATIO = 1
    BATCH_SIZE = 8

    #dataset = BaseDataset([dataset_file])
    #dataloader = SequenceLoader(dataset, BATCH_SIZE, 'train')
    dataset = BaseDataset([dataset_file])
    dataset.convert()
    dataloader = SequenceLoader(dataset, BATCH_SIZE, 'train')
    #embedding_model = ContextualEmbeddingModel('bert', dataset.max_num_variables, dataset.max_num_constants)
    '''
    print(dataset.questions)
    questions = [q for q in dataset.src_vocab.indices2words(dataset.questions[0].tolist())]
    equations = [e for e in dataset.tgt_vocab.indices2words(dataset.equations[0].tolist())]
    print(questions)
    print(equations)
    print(dataset.alignments)
    print(dataset.solutions)
    '''

    #criterion = nn.CrossEntropyLoss() #TODO: add ignore index for pad token?
    model = RNNModel(len(dataset.src_vocab),
Esempio n. 12
0
if __name__ == '__main__':
    args = parser.parse_args()

    # load danet configures
    cfg_from_file(args.danet_cfg_file)
    cfg.DANET.REFINEMENT = EasyDict(cfg.DANET.REFINEMENT)
    cfg.MSRES_MODEL.EXTRA = EasyDict(cfg.MSRES_MODEL.EXTRA)

    if args.regressor == 'hmr':
        model = hmr(path_config.SMPL_MEAN_PARAMS)
    elif args.regressor == 'danet':
        model = DaNet(args, path_config.SMPL_MEAN_PARAMS, pretrained=False)

    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)

    model.eval()

    # Setup evaluation dataset
    dataset = BaseDataset(args, args.dataset, is_train=False)

    # Run evaluation
    run_evaluation(model,
                   args.dataset,
                   dataset,
                   args.result_file,
                   batch_size=args.batch_size,
                   shuffle=args.shuffle,
                   log_freq=args.log_freq,
                   options=args)
Esempio n. 13
0
def main(cli_args):
    # Read from config file and make args
    with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f:
        args = AttrDict(json.load(f))
    logger.info("Training/evaluation parameters {}".format(args))
    logger.info("cliargs parameters {}".format(cli_args))

    args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir)
    args.model_mode = cli_args.model_mode
    args.margin = cli_args.margin

    init_logger()
    set_seed(args)

    model_link = None
    if cli_args.transformer_mode.upper() == "T5":
        model_link = "t5-base"
    elif cli_args.transformer_mode.upper() == "ELECTRA":
        model_link = "google/electra-base-discriminator"
    elif cli_args.transformer_mode.upper() == "ALBERT":
        model_link = "albert-base-v2"
    elif cli_args.transformer_mode.upper() == "ROBERTA":
        model_link = "roberta-base"
    elif cli_args.transformer_mode.upper() == "BERT":
        model_link = "bert-base-uncased"

    print(model_link)
    tokenizer = AutoTokenizer.from_pretrained(model_link)

    args.test_file = os.path.join(cli_args.dataset, args.test_file)
    args.dev_file = os.path.join(cli_args.dataset, args.dev_file)
    args.train_file = os.path.join(cli_args.dataset, args.train_file)
    # Load dataset
    train_dataset = BaseDataset(args, tokenizer,
                                mode="train") if args.train_file else None
    dev_dataset = BaseDataset(args, tokenizer,
                              mode="dev") if args.dev_file else None
    test_dataset = BaseDataset(args, tokenizer,
                               mode="test") if args.test_file else None

    if dev_dataset == None:
        args.evaluate_test_during_training = True  # If there is no dev dataset, only use testset

    args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1
    args.save_steps = args.logging_steps
    labelNumber = train_dataset.getLabelNumber()

    labels = [str(i) for i in range(labelNumber)]
    config = AutoConfig.from_pretrained(model_link)

    # GPU or CPU
    args.device = "cuda:{}".format(
        cli_args.gpu
    ) if torch.cuda.is_available() and not args.no_cuda else "cpu"
    config.device = args.device
    args.model_mode = cli_args.model_mode

    model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type,
                                            args.model_name_or_path, config,
                                            labelNumber, args.margin)
    model.to(args.device)

    if args.do_train:
        global_step, tr_loss = train(args, model, train_dataset, dev_dataset,
                                     test_dataset)
        logger.info(" global_step = {}, average loss = {}".format(
            global_step, tr_loss))

    results = {}
    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c) for c in sorted(
                glob.glob(args.output_dir + "/**/" + "pytorch_model.bin",
                          recursive=True)))
        if not args.eval_all_checkpoints:
            checkpoints = checkpoints[-1:]
        else:
            logging.getLogger("transformers.configuration_utils").setLevel(
                logging.WARN)  # Reduce logging
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1]
            model = MODEL_LIST[args.model_type].from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args,
                              model,
                              test_dataset,
                              mode="test",
                              global_step=global_step)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))
Esempio n. 14
0
def main(args):
    transform = getTransforms()

    data_path = os.path.join('data', args.data)
    if not os.path.exists(data_path):
        print('ERROR: No dataset named {}'.format(args.data))
        exit(1)

    trainset = BaseDataset(list_path=os.path.join(data_path, 'train.lst'),
                           transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.train_batch,
                                              shuffle=True,
                                              num_workers=1)

    testset = BaseDataset(list_path=os.path.join(data_path, 'val.lst'),
                          transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=args.val_batch,
                                             shuffle=True,
                                             num_workers=1)

    model = ResNet(num_layers=18,
                   num_classes=args.num_classes,
                   pretrained=True).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)

    summary(model, input_size=(3, 32, 32))

    max_epoch = args.max_epoch
    last_epoch = 0
    best_val_loss = None
    best_accuracy = None
    train_losses = []
    val_losses = []
    accuracies = []

    output_dir = os.path.join('outputs', args.data)
    model_state_file = os.path.join(output_dir, 'checkpoint.pth.tar')
    os.makedirs(output_dir, exist_ok=True)

    if os.path.exists(model_state_file):
        checkpoint = torch.load(model_state_file)
        last_epoch = checkpoint['epoch']
        best_val_loss = checkpoint['best_val_loss']
        best_accuracy = checkpoint['best_accuracy']
        train_losses = checkpoint['train_losses']
        val_losses = checkpoint['val_losses']
        accuracies = checkpoint['accuracies']
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('=> loaded checkpoint (epoch {})'.format(last_epoch))

    for epoch in range(last_epoch, max_epoch):
        print('Epoch {}'.format(epoch))

        train_loss = train(model=model,
                           dataloader=trainloader,
                           criterion=criterion,
                           optimizer=optimizer,
                           device=DEVICE)
        val_loss = val(model=model,
                       dataloader=testloader,
                       criterion=criterion,
                       device=DEVICE)
        accuracy = test(model=model, dataloader=testloader, device=DEVICE)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        accuracies.append(accuracy)

        print('Loss: train = {}, val = {}, acc. = {}'.format(
            train_loss, val_loss, accuracy))

        # if best_val_loss is None or val_loss < best_val_loss:
        #     best_val_loss = val_loss
        #     torch.save(
        #         model.state_dict(),
        #         os.path.join(output_dir, 'best.pth')
        #     )
        if best_accuracy is None or accuracy > best_accuracy:
            best_accuracy = accuracy
            torch.save(model.state_dict(),
                       os.path.join(output_dir, 'best.pth'))

        print('=> saving checkpoint to {}'.format(model_state_file))
        torch.save(
            {
                'epoch': epoch + 1,
                'best_val_loss': best_val_loss,
                'best_accuracy': best_accuracy,
                'train_losses': train_losses,
                'val_losses': val_losses,
                'accuracies': accuracies,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }, model_state_file)

        if (epoch + 1) % 100 == 0:
            # plt.plot(range(epoch+1), train_losses, label="train")
            # plt.plot(range(epoch+1), val_losses, label="val")
            # plt.yscale('log')
            # plt.legend()
            # plt.savefig(os.path.join(output_dir, 'losses.png'))
            # plt.clf()

            fig, ax1 = plt.subplots()
            ax2 = ax1.twinx()
            ax1.plot(range(epoch + 1), train_losses, label='train')
            ax1.plot(range(epoch + 1), val_losses, label='val')
            ax1.set_xscale('log')
            ax1.set_yscale('log')
            ax2.plot(range(epoch + 1),
                     accuracies,
                     color='red',
                     label='accuracy')
            ax2.set_xscale('log')
            handler1, label1 = ax1.get_legend_handles_labels()
            handler2, label2 = ax2.get_legend_handles_labels()
            ax1.legend(handler1 + handler2,
                       label1 + label2,
                       loc=3,
                       borderaxespad=0.)
            plt.savefig(os.path.join(output_dir, 'losses.png'))
            plt.clf()
Esempio n. 15
0
def main(cli_args):
    # Read from config file and make args
    max_checkpoint = "checkpoint-best"

    args = torch.load(os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_args.bin"))
    args.test_file = cli_args.test_file
    with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f:
        config = json.load(f)
        args.data_dir = config["data_dir"]
        if args.test_file == None:
            args.test_file = config["test_file"]
    logger.info("Testing parameters {}".format(args))

    args.model_mode = cli_args.model_mode
    args.device = "cuda:"+str(cli_args.gpu)

    init_logger()

    labels = ["0", "1"]
    config = CONFIG_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
        num_labels=2,
        id2label={str(i): label for i, label in enumerate(labels)},
        label2id={label: i for i, label in enumerate(labels)},
    )

    tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
        do_lower_case=args.do_lower_case
    )
    args.device = "cuda:{}".format(cli_args.gpu) if torch.cuda.is_available() and not args.no_cuda else "cpu"
    config.device = args.device
    print(args.test_file)
    # Load dataset
    test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None

    logger.info("Testing model checkpoint to {}".format(max_checkpoint))
    global_step = max_checkpoint.split("-")[-1]
    model = MODEL_LIST[cli_args.model_mode](args.model_type, args.model_name_or_path, config)
    model.load_state_dict(torch.load(os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_model.bin")))

    model.to(args.device)

    if "KOSAC" in args.model_mode:
        preds, labels, result, txt_all, polarity_ids, intensity_ids = evaluate(args, model, test_dataset, mode="test",
                                                                             global_step=global_step)
    else:
        preds, labels, result, txt_all= evaluate(args, model, test_dataset, mode="test",
                                                                               global_step=global_step)
    pred_and_labels = pd.DataFrame([])
    pred_and_labels["data"] = txt_all
    pred_and_labels["pred"] = preds
    pred_and_labels["label"] = labels
    pred_and_labels["result"] = preds == labels
    decode_result = list(
        pred_and_labels["data"].apply(lambda x: tokenizer.convert_ids_to_tokens(tokenizer(x)["input_ids"])))
    pred_and_labels["tokenizer"] = decode_result

    if "KOSAC" in args.model_mode:
        tok_an = [list(zip(x, test_dataset.convert_ids_to_polarity(y)[:len(x) + 1], test_dataset.convert_ids_to_intensity(z)[:len(x) + 1])) for x, y, z in
                  zip(decode_result, polarity_ids, intensity_ids)]
        pred_and_labels["tokenizer_analysis(token,polarity,intensitiy)"] = tok_an

    pred_and_labels.to_excel(os.path.join("ckpt", cli_args.result_dir, "test_result_" + max_checkpoint + ".xlsx"),
                             encoding="cp949")
Esempio n. 16
0
    def init_fn(self):
        if self.options.rank == 0:
            self.summary_writer.add_text('command_args', print_args())

        if self.options.regressor == 'hmr':
            # HMR/SPIN model
            self.model = hmr(path_config.SMPL_MEAN_PARAMS, pretrained=True)
            self.smpl = SMPL(path_config.SMPL_MODEL_DIR,
                             batch_size=cfg.TRAIN.BATCH_SIZE,
                             create_transl=False).to(self.device)
        elif self.options.regressor == 'pymaf_net':
            # PyMAF model
            self.model = pymaf_net(path_config.SMPL_MEAN_PARAMS,
                                   pretrained=True)
            self.smpl = self.model.regressor[0].smpl

        if self.options.distributed:
            # For multiprocessing distributed, DistributedDataParallel constructor
            # should always set the single device scope, otherwise,
            # DistributedDataParallel will use all available devices.
            if self.options.gpu is not None:
                torch.cuda.set_device(self.options.gpu)
                self.model.cuda(self.options.gpu)
                # When using a single GPU per process and per
                # DistributedDataParallel, we need to divide the batch size
                # ourselves based on the total number of GPUs we have
                self.options.batch_size = int(self.options.batch_size /
                                              self.options.ngpus_per_node)
                self.options.workers = int(
                    (self.options.workers + self.options.ngpus_per_node - 1) /
                    self.options.ngpus_per_node)
                self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                    self.model)
                self.model = torch.nn.parallel.DistributedDataParallel(
                    self.model,
                    device_ids=[self.options.gpu],
                    output_device=self.options.gpu,
                    find_unused_parameters=True)
            else:
                self.model.cuda()
                # DistributedDataParallel will divide and allocate batch_size to all
                # available GPUs if device_ids are not set
                self.model = torch.nn.parallel.DistributedDataParallel(
                    self.model, find_unused_parameters=True)
            self.models_dict = {'model': self.model.module}
        else:
            self.model = self.model.to(self.device)
            self.models_dict = {'model': self.model}

        cudnn.benchmark = True

        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=cfg.SOLVER.BASE_LR,
                                          weight_decay=0)

        self.optimizers_dict = {'optimizer': self.optimizer}

        if self.options.single_dataset:
            self.train_ds = BaseDataset(self.options,
                                        self.options.single_dataname,
                                        is_train=True)
        else:
            self.train_ds = MixedDataset(self.options, is_train=True)

        self.valid_ds = BaseDataset(self.options,
                                    self.options.eval_dataset,
                                    is_train=False)

        if self.options.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                self.train_ds)
            val_sampler = None
        else:
            train_sampler = None
            val_sampler = None

        self.train_data_loader = DataLoader(self.train_ds,
                                            batch_size=self.options.batch_size,
                                            num_workers=self.options.workers,
                                            pin_memory=cfg.TRAIN.PIN_MEMORY,
                                            shuffle=(train_sampler is None),
                                            sampler=train_sampler)

        self.valid_loader = DataLoader(dataset=self.valid_ds,
                                       batch_size=cfg.TEST.BATCH_SIZE,
                                       shuffle=False,
                                       num_workers=cfg.TRAIN.NUM_WORKERS,
                                       pin_memory=cfg.TRAIN.PIN_MEMORY,
                                       sampler=val_sampler)

        # Load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)
        self.evaluation_accumulators = dict.fromkeys([
            'pred_j3d', 'target_j3d', 'target_theta', 'pred_verts',
            'target_verts'
        ])

        # Create renderer
        try:
            self.renderer = OpenDRenderer()
        except:
            print('No renderer for visualization.')
            self.renderer = None

        if cfg.MODEL.PyMAF.AUX_SUPV_ON:
            self.iuv_maker = IUV_Renderer(
                output_size=cfg.MODEL.PyMAF.DP_HEATMAP_SIZE)

        self.decay_steps_ind = 1
        self.decay_epochs_ind = 1
Esempio n. 17
0
def main(cli_args):
    # Read from config file and make args
    max_checkpoint = "checkpoint-best"

    args = torch.load(
        os.path.join("ckpt", cli_args.result_dir, max_checkpoint,
                     "training_args.bin"))
    with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f:
        args = AttrDict(json.load(f))
    logger.info("Training/evaluation parameters {}".format(args))
    logger.info("cliargs parameters {}".format(cli_args))

    args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir)
    args.model_mode = cli_args.model_mode
    args.device = "cuda:{}".format(
        cli_args.gpu
    ) if torch.cuda.is_available() and not args.no_cuda else "cpu"

    init_logger()
    set_seed(args)

    model_link = None
    if cli_args.transformer_mode.upper() == "T5":
        model_link = "t5-base"
    elif cli_args.transformer_mode.upper() == "ELECTRA":
        model_link = "google/electra-base-discriminator"
    elif cli_args.transformer_mode.upper() == "ALBERT":
        model_link = "albert-base-v2"
    elif cli_args.transformer_mode.upper() == "ROBERTA":
        model_link = "roberta-base"
    elif cli_args.transformer_mode.upper() == "BERT":
        model_link = "bert-base-uncased"

    tokenizer = AutoTokenizer.from_pretrained(model_link)

    args.test_file = os.path.join(cli_args.dataset, args.test_file)
    args.dev_file = os.path.join(cli_args.dataset, args.train_file)
    args.train_file = os.path.join(cli_args.dataset, args.train_file)
    # Load dataset
    train_dataset = BaseDataset(args, tokenizer,
                                mode="train") if args.train_file else None
    dev_dataset = BaseDataset(args, tokenizer,
                              mode="dev") if args.dev_file else None
    test_dataset = BaseDataset(args, tokenizer,
                               mode="test") if args.test_file else None

    if dev_dataset == None:
        args.evaluate_test_during_training = True  # If there is no dev dataset, only use testset

    args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1
    args.save_steps = args.logging_steps
    labelNumber = train_dataset.getLabelNumber()

    labels = [str(i) for i in range(labelNumber)]
    config = AutoConfig.from_pretrained(model_link)

    args.device = "cuda:{}".format(
        cli_args.gpu
    ) if torch.cuda.is_available() and not args.no_cuda else "cpu"
    config.device = args.device
    args.model_mode = cli_args.model_mode

    logger.info("Testing model checkpoint to {}".format(max_checkpoint))
    global_step = max_checkpoint.split("-")[-1]

    # GPU or CPU
    args.device = "cuda:{}".format(
        cli_args.gpu
    ) if torch.cuda.is_available() and not args.no_cuda else "cpu"
    config.device = args.device
    args.model_mode = cli_args.model_mode

    model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type,
                                            args.model_name_or_path, config,
                                            labelNumber, -0.75)
    model.load_state_dict(
        torch.load(
            os.path.join("ckpt", cli_args.result_dir, max_checkpoint,
                         "training_model.bin")))

    model.to(args.device)

    preds, labels, result, txt_all = evaluate(args,
                                              model,
                                              test_dataset,
                                              mode="test",
                                              global_step=global_step)
    pred_and_labels = pd.DataFrame([])
    pred_and_labels["data"] = txt_all
    pred_and_labels["pred"] = preds
    pred_and_labels["label"] = labels
    pred_and_labels["result"] = preds == labels
    decode_result = list(pred_and_labels["data"].apply(
        lambda x: tokenizer.convert_ids_to_tokens(tokenizer(x)["input_ids"])))
    pred_and_labels["tokenizer"] = decode_result

    pred_and_labels.to_csv(os.path.join(
        "ckpt", cli_args.result_dir, "test_result_" + max_checkpoint + ".csv"),
                           encoding="utf-8")
def main(cli_args):
    # Read from config file and make args
    with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f:
        args = AttrDict(json.load(f))
    logger.info("Training/evaluation parameters {}".format(args))
    logger.info("cliargs parameters {}".format(cli_args))

    args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir)
    args.model_mode = cli_args.model_mode

    init_logger()
    set_seed(args)

    labels = ["0", "1"]
    config = CONFIG_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
        num_labels=2,
        id2label={str(i): label
                  for i, label in enumerate(labels)},
        label2id={label: i
                  for i, label in enumerate(labels)},
    )

    tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path, do_lower_case=args.do_lower_case)
    # GPU or CPU
    args.device = "cuda:{}".format(
        cli_args.gpu
    ) if torch.cuda.is_available() and not args.no_cuda else "cpu"
    config.device = args.device
    args.model_mode = cli_args.model_mode

    model = MODEL_LIST[cli_args.model_mode](args.model_type,
                                            args.model_name_or_path, config)
    model.to(args.device)

    # Load dataset
    train_dataset = DATASET_LIST[cli_args.model_mode](
        args, tokenizer, mode="train") if args.train_file else None
    dev_dataset = BaseDataset(args, tokenizer,
                              mode="dev") if args.dev_file else None
    test_dataset = BaseDataset(args, tokenizer,
                               mode="test") if args.test_file else None

    args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 2
    args.save_steps = args.logging_steps

    if dev_dataset == None:
        args.evaluate_test_during_training = True  # If there is no dev dataset, only use testset

    if args.do_train:
        global_step, tr_loss = train(args, model, train_dataset, dev_dataset,
                                     test_dataset)
        logger.info(" global_step = {}, average loss = {}".format(
            global_step, tr_loss))

    results = {}
    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c) for c in sorted(
                glob.glob(args.output_dir + "/**/" + "pytorch_model.bin",
                          recursive=True)))
        if not args.eval_all_checkpoints:
            checkpoints = checkpoints[-1:]
        else:
            logging.getLogger("transformers.configuration_utils").setLevel(
                logging.WARN)  # Reduce logging
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split("-")[-1]
            model = MODEL_LIST[args.model_type].from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args,
                              model,
                              test_dataset,
                              mode="test",
                              global_step=global_step)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))
Esempio n. 19
0
 def __init__(self, base_path):
     BaseDataset.__init__(self, base_path)
Esempio n. 20
0
                                  args.crop_setting, 'keypoints.pkl')
        log_dir = os.path.join(cfg.BASE_DATA_DIR, 'cmr_pck_results.txt')
        with open(gt_kp_path, 'rb') as f:
            gt = pkl.load(f)

        calc = CalcPCK(
            all_kps,
            gt,
            num_imgs=cfg.DATASET_SIZES[args.dataset][args.crop_setting],
            log_dir=log_dir,
            dataset=args.dataset,
            crop_setting=args.crop_setting,
            pck_eval_threshold=args.pck_eval_threshold)
        calc.eval()


if __name__ == '__main__':
    args = parser.parse_args()

    # Load model
    mesh = Mesh()
    model = CMR(mesh,
                args.num_layers,
                args.num_channels,
                pretrained_checkpoint=args.pretrained_checkpoint)
    # Setup evaluation dataset
    dataset = BaseDataset(args, args.dataset)

    # Run evaluation
    run_evaluation(model, args, dataset, mesh)