Beispiel #1
0
    def __init__(self,
                 config: ConfigParser,
                 model: nn.Module,
                 precision_threshold: float = 0.0,
                 recall_threshold: float = 0.0,
                 logger=None):
        self.config = config
        self.logger = logger if logger else config.get_logger('inference')
        self.p_threshold: float = precision_threshold
        self.r_threshold: float = recall_threshold

        self.device, self.device_ids = prepare_device(config['n_gpu'],
                                                      self.logger)
        self.state_dicts = []
        checkpoints = [config.resume] if config.resume is not None else list(
            config.save_dir.glob('**/model_best.pth'))
        for checkpoint in checkpoints:
            self.logger.info(f'Loading checkpoint: {checkpoint} ...')
            state_dict = torch.load(checkpoint,
                                    map_location=self.device)['state_dict']
            self.state_dicts.append(
                {k.replace('module.', ''): v
                 for k, v in state_dict.items()})

        self.model = model
Beispiel #2
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_batches=0,
                 training=True,
                 num_workers=4,
                 pin_memory=True,
                 config=None,
                 teacher_idx=None,
                 seed=8888):

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.num_batches = num_batches
        self.training = training

        self.transform_train = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.6959, 0.6537, 0.6371),
                                 (0.3113, 0.3192, 0.3214)),
        ])
        self.transform_val = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize((0.6959, 0.6537, 0.6371),
                                 (0.3113, 0.3192, 0.3214)),
        ])

        self.data_dir = data_dir
        if config == None:
            config = ConfigParser.get_instance()
        cfg_trainer = config['trainer']
        self.train_dataset, self.val_dataset = get_clothing1m(
            config['data_loader']['args']['data_dir'],
            cfg_trainer,
            num_samples=self.num_batches * self.batch_size,
            train=training,
            #         self.train_dataset, self.val_dataset = get_clothing1m(config['data_loader']['args']['data_dir'], cfg_trainer, num_samples=260000, train=training,
            transform_train=self.transform_train,
            transform_val=self.transform_val,
            teacher_idx=teacher_idx,
            seed=seed)

        super().__init__(self.train_dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         pin_memory,
                         val_dataset=self.val_dataset)
Beispiel #3
0
 def __init__(self, num_examp, num_classes=10, beta=0.3):
     super().__init__()
     self.num_classes = num_classes
     self.config = ConfigParser.get_instance()
     self.USE_CUDA = torch.cuda.is_available()
     self.target = torch.zeros(
         num_examp,
         self.num_classes).cuda() if self.USE_CUDA else torch.zeros(
             num_examp, self.num_classes)
     self.beta = beta
Beispiel #4
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_batches=0,
                 training=True,
                 num_workers=4,
                 pin_memory=True,
                 num_class=50,
                 teacher_idx=None):

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.num_batches = num_batches
        self.training = training

        self.transform_train = transforms.Compose([
            transforms.RandomCrop(227),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        self.transform_val = transforms.Compose([
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        self.transform_imagenet = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])

        self.data_dir = data_dir
        config = ConfigParser.get_instance()
        cfg_trainer = config['trainer']
        self.train_dataset, self.val_dataset = get_webvision(
            config['data_loader']['args']['data_dir'],
            cfg_trainer,
            num_samples=self.num_batches * self.batch_size,
            train=training,
            transform_train=self.transform_train,
            transform_val=self.transform_val,
            num_class=num_class,
            teacher_idx=teacher_idx)

        super().__init__(self.train_dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         pin_memory,
                         val_dataset=self.val_dataset)
Beispiel #5
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_batches=0,
                 training=True,
                 num_workers=4,
                 pin_memory=True,
                 config=None,
                 teacher_idx=None,
                 seed=888):

        if config is None:
            config = ConfigParser.get_instance()
        cfg_trainer = config['trainer']

        transform_train = transforms.Compose([
            #transforms.ColorJitter(brightness= 0.4, contrast= 0.4, saturation= 0.4, hue= 0.1),
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408),
                                 (0.2675, 0.2565, 0.2761)),
        ])
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5071, 0.4867, 0.4408),
                                 (0.2675, 0.2565, 0.2761)),
        ])
        self.data_dir = data_dir
        #         cfg_trainer = config['trainer']

        noise_file = '%sCIFAR100_%.1f_Asym_%s.json' % (
            config['data_loader']['args']['data_dir'], cfg_trainer['percent'],
            cfg_trainer['asym'])

        self.train_dataset, self.val_dataset = get_cifar100(
            config['data_loader']['args']['data_dir'],
            cfg_trainer,
            train=training,
            transform_train=transform_train,
            transform_val=transform_val,
            noise_file=noise_file,
            teacher_idx=teacher_idx,
            seed=seed)

        super().__init__(self.train_dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         pin_memory,
                         val_dataset=self.val_dataset)
Beispiel #6
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 shuffle=True,
                 validation_split=0.0,
                 num_batches=0,
                 training=True,
                 num_workers=4,
                 pin_memory=True,
                 config=None,
                 teacher_idx=None,
                 seed=888):
        if config == None:
            config = ConfigParser.get_instance()
        cfg_trainer = config['trainer']

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])
        self.data_dir = data_dir

        noise_file = '%sCIFAR10_%.1f_Asym_%s.json' % (
            config['data_loader']['args']['data_dir'], cfg_trainer['percent'],
            cfg_trainer['asym'])

        self.train_dataset, self.val_dataset = get_cifar10(
            config['data_loader']['args']['data_dir'],
            cfg_trainer,
            train=training,
            transform_train=transform_train,
            transform_val=transform_val,
            noise_file=noise_file,
            teacher_idx=teacher_idx,
            seed=seed)

        super().__init__(self.train_dataset,
                         batch_size,
                         shuffle,
                         validation_split,
                         num_workers,
                         pin_memory,
                         val_dataset=self.val_dataset)
Beispiel #7
0
def run(config_file):
    # 命令行解析器
    args = argparse.ArgumentParser(description='text classification')

    # 添加命令:配置、重启、计算
    args.add_argument('-c',
                      '--config',
                      default=config_file,
                      type=str,
                      help='config file path (default: None)')
    args.add_argument('-r',
                      '--resume',
                      default=None,
                      type=str,
                      help='path to latest checkpoint (default: None)')
    args.add_argument('-d',
                      '--device',
                      default='0,1',
                      type=str,
                      help='indices of GPUs to enable (default: all)')

    # 客户参数:可修改的配置
    CustomArgs = collections.namedtuple('CustomArgs', 'flags type target')
    # 添加客户参数:学习率、批量长度
    options = [
        CustomArgs(['--lr', '--learning_rate'],
                   type=float,
                   target='optimizer;args;lr'),
        CustomArgs(['--bs', '--batch_size'],
                   type=int,
                   target='data_process;args;batch_size')
    ]
    # 配置解析器
    config = ConfigParser.from_args(args, options)
    # 打印训练的模型类型
    print(config.config['model_arch']['type'].lower())

    # 训练
    if 'bert' in config.config['model_arch']['type'].lower():
        main(config, use_transformers=True)
    else:
        main(config, use_transformers=False)
Beispiel #8
0
def run(config_file, model_path, text_list):
    args = argparse.ArgumentParser(description='text classification')

    # 配置文件、模型路径、计算代理
    args.add_argument('-c', '--config', default=config_file, type=str, help='config file path (default: None)')
    # default=model_path
    args.add_argument('-r', '--resume', default=model_path, type=str, help='path to latest checkpoint (default: None)')
    # default='0',
    args.add_argument('-d', '--device', default='0', type=str, help='indices of GPUs to enable (default: all)')

    CustomArgs = collections.namedtuple('CustomArgs', 'flags type target')
    options = [
        CustomArgs(['--lr', '--learning_rate'], type=float, target='optimizer;args;lr'),
        CustomArgs(['--bs', '--batch_size'], type=int, target='data_process;args;batch_size')
    ]
    config = ConfigParser.from_args(args, options)
    print(config.config['model_arch']['type'].lower())

    if 'bert' in config.config['model_arch']['type'].lower():
        main(config, use_transformers=True, text_list=text_list)
    else:
        main(config, use_transformers=False, text_list=text_list)
Beispiel #9
0
def trainClothing1m(parse, config: ConfigParser):
    # implementation for WandB
    wandb_run_name_list = wandbRunlist(config, parse)
    
    if parse.no_wandb:
        wandb.init(config=config, project='noisylabel', entity='goguryeo', name=wandb_run_name)
    
    # By default, pytorch utilizes multi-threaded cpu
    # Set to handle whole procedures on a single core
    numthread = torch.get_num_threads()
    torch.set_num_threads(numthread)
    logger = config.get_logger('train')
    
    # Set seed for reproducibility
    fix_seed(config['seed'])
    
    data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size= config['data_loader']['args']['batch_size'],
        shuffle=False if parse.distillation else config['data_loader']['args']['shuffle'] ,
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory']
    )

    # valid_data_loader = data_loader.split_validation()

    valid_data_loader = None
    
    # test_data_loader = None

    test_data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=128,
        shuffle=False,
        validation_split=0.0,
        training=False,
        num_workers=0
    ).split_validation()

    print('---------')
    # build model architecture, then print to console
#     model = config.initialize('arch', module_arch)
    model = getattr(module_arch, 'resnet50')(pretrained=True,
                                             num_classes=config["num_classes"])
    
    if parse.no_wandb: wandb.watch(model)
    
    if parse.distillation:
        teacher = config.initialize('arch', module_arch)
        
        data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size= config['data_loader']['args']['batch_size'],
        shuffle=config['data_loader']['args']['shuffle'],
#         validation_split=config['data_loader']['args']['validation_split'],
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory'],
        teacher_idx = extract_cleanidx(teacher, data_loader, parse))
    else:
        teacher = None

    # get function handles of loss and metrics
    logger.info(config.config)
    if hasattr(data_loader.dataset, 'num_raw_example'):
        num_examp = data_loader.dataset.num_raw_example
    else:
        num_examp = len(data_loader.dataset)
    
    if config['train_loss']['type'] == 'ELRLoss':
        train_loss = getattr(module_loss, 'ELRLoss')(num_examp=num_examp,
                                                     num_classes=config['num_classes'],
                                                     beta=config['train_loss']['args']['beta'])
    elif config['train_loss']['type'] == 'SCELoss':
        train_loss = getattr(module_loss, 'SCELoss')(alpha=config['train_loss']['args']['alpha'],
                                                     beta=config['train_loss']['args']['beta'],
                                                     num_classes=config['num_classes'])
    elif config['train_loss']['type'] == 'GCELoss':
        train_loss = getattr(module_loss, 'GCELoss')(q=config['train_loss']['args']['q'],
                                                     k=config['train_loss']['args']['k'],
                                                     trainset_size=num_examp,
                                                     truncated=config['train_loss']['args']['truncated'])
    elif config['train_loss']['type'] == 'GTLoss':
        train_loss = getattr(module_loss, 'GTLoss')()
        
    else:
        train_loss = getattr(module_loss, 'CCELoss')()
        
    print (train_loss)

        
    val_loss = getattr(module_loss, config['val_loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = config.initialize('optimizer', torch.optim, [{'params': trainable_params}])

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler, optimizer)

    if config['train_loss']['type'] == 'ELRLoss':
        trainer = RealDatasetTrainer(model, train_loss, metrics, optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     parse=parse,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode = parse.mode,
                                     entropy = parse.entropy,
                                     threshold = parse.threshold
                                )
    elif config['train_loss']['type'] == 'SCELoss':
        trainer = RealDatasetTrainer(model, train_loss, metrics, optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     parse=parse,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode = parse.mode,
                                     entropy = parse.entropy,
                                     threshold = parse.threshold                                  
                                )
    elif config['train_loss']['type'] == 'GCELoss':
        if config['train_loss']['args']['truncated'] == False:
            trainer = RealDatasetTrainer(model, train_loss, metrics, optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     parse=parse,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode = parse.mode,
                                     entropy = parse.entropy,
                                     threshold = parse.threshold
                                    )
        elif config['train_loss']['args']['truncated'] == True:
            trainer= RealDatasetTrainer(model, train_loss, metrics, optimizer,
                                      config=config,
                                      data_loader=data_loader,
                                      parse=parse,
                                      teacher=teacher,
                                      valid_data_loader=valid_data_loader,
                                      test_data_loader=test_data_loader,
                                      lr_scheduler=lr_scheduler,
                                      val_criterion=val_loss,
                                      mode = parse.mode,
                                      entropy = parse.entropy,
                                      threshold = parse.threshold
                                     )
    else:
        trainer = RealDatasetTrainer(model, train_loss, metrics, optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     parse=parse,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode = parse.mode,
                                     entropy = parse.entropy,
                                     threshold = parse.threshold
                                    )

    trainer.train()
    
    logger = config.get_logger('trainer', config['trainer']['verbosity'])
    cfg_trainer = config['trainer']
def main(config: ConfigParser, args: argparse.Namespace):
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.backends.cudnn.benchmark = True
    # torch.backends.cudnn.deterministic = True
    # torch.autograd.set_detect_anomaly(True)

    print("n_gpu", str(torch.cuda.device_count()))
    logger = config.get_logger('train')

    # setup data_loader instances
    train_datasets = []
    for corpus in config['train_datasets'].keys():
        train_datasets.append(
            config.init_obj(f'train_datasets.{corpus}',
                            module_dataset,
                            logger=logger))
    train_dataset = ConcatDataset(train_datasets)

    valid_datasets = {}
    for corpus in config['valid_datasets'].keys():
        valid_datasets[corpus] = config.init_obj(f'valid_datasets.{corpus}',
                                                 module_dataset,
                                                 logger=logger)

    # build model architecture, then print to console
    model: nn.Module = config.init_obj('arch', module_arch)
    logger.info(model)

    # get function handles of metrics
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # freeze encoder's parameters if args.freeze_encoder is true
    frozen_parameters = []
    not_frozen_parameters = []
    if args.freeze_encoder:
        for name, p in model.named_parameters:
            if "encoder" in name:
                p.requires_grad = False
                frozen_parameters.append(name)
            else:
                not_frozen_parameters.append(name)

        # Log frozen/not frozen parameters
        logger.info("Following parameters are frozen.")
        logger.info(frozen_parameters)
        logger.info("Following parameters are not frozen.")
        logger.info(not_frozen_parameters)

    # build optimizer, learning rate scheduler
    trainable_named_params = filter(lambda x: x[1].requires_grad,
                                    model.named_parameters())
    no_decay = ('bias', 'LayerNorm.weight')
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in trainable_named_params
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        config['optimizer']['args']['weight_decay']
    }, {
        'params': [
            p for n, p in trainable_named_params
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = config.init_obj('optimizer', module_optim,
                                optimizer_grouped_parameters)

    lr_scheduler = config.init_obj('lr_scheduler', module_optim, optimizer)

    trainer = Trainer(model,
                      metrics,
                      optimizer,
                      config=config,
                      train_dataset=train_dataset,
                      valid_datasets=valid_datasets,
                      lr_scheduler=lr_scheduler)

    trainer.train()
Beispiel #11
0
        type=float,
        help=
        'threshold for argument existence. The higher you set, the higher precision gets. [0, 1]'
    )
    parser.add_argument(
        '--recall-threshold',
        default=0.0,
        type=float,
        help=
        'threshold for argument non-existence. The higher you set, the higher recall gets [0, 1]'
    )
    parser.add_argument('--result-suffix',
                        default='',
                        type=str,
                        help='custom evaluation result directory name')
    parser.add_argument('--run-id',
                        default=None,
                        type=str,
                        help='custom experiment directory name')
    parser.add_argument('--oracle',
                        action='store_true',
                        default=False,
                        help='use oracle dependency labels')
    parsed_args = parser.parse_args()
    inherit_save_dir = (parsed_args.resume is not None
                        and parsed_args.run_id is None)
    main(
        ConfigParser.from_args(parsed_args,
                               run_id=parsed_args.run_id,
                               inherit_save_dir=inherit_save_dir), parsed_args)
    prediction_writer = PredictionKNPWriter(dataset, logger)
    with io.StringIO() as string:
        _ = prediction_writer.write(arguments_set, string, skip_untagged=False)
        knp_result = string.getvalue()
    with log_dir.joinpath('pas.knp').open('wt') as f:
        f.write(knp_result)
    return knp_result


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--resume', '-m', '--model', default=None, type=str,
                        help='path to trained checkpoint')
    parser.add_argument('--ens', '--ensemble', default=None, type=str,
                        help='path to directory where checkpoints to ensemble exist')
    parser.add_argument('-d', '--device', default='', type=str,
                        help='indices of GPUs to enable (default: all)')
    parser.add_argument('-c', '--config', default=None, type=str,
                        help='config file path (default: None)')
    parser.add_argument('--host', default='0.0.0.0', type=str,
                        help='host ip address (default: 0.0.0.0)')
    parser.add_argument('--port', default=12345, type=int,
                        help='host port number (default: 12345)')
    args = parser.parse_args()
    config = ConfigParser.from_args(args, run_id='')
    analyzer = Analyzer(config, logger=logger)

    server = SimpleXMLRPCServer((args.host, args.port))
    server.register_function(analyze_raw_data_from_client)
    server.serve_forever()
Beispiel #13
0
def coteachingtrain(parse, config: ConfigParser):
    # implementation for WandB
    wandb_run_name_list = wandbRunlist(config, parse)

    if parse.no_wandb:
        wandb.init(config=config,
                   project='noisylabel',
                   entity='goguryeo',
                   name=wandb_run_name)

    # By default, pytorch utilizes multi-threaded cpu
    numthread = torch.get_num_threads()
    torch.set_num_threads(numthread)
    logger = config.get_logger('train')

    # Set seed for reproducibility
    fix_seed(config['seed'])

    data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=config['data_loader']['args']['batch_size'],
        shuffle=False
        if parse.distillation else config['data_loader']['args']['shuffle'],
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory'],
        seed=parse.dataseed  # parse.seed
    )

    valid_data_loader = None
    test_data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=128,
        shuffle=False,
        validation_split=0.0,
        training=False,
        num_workers=2).split_validation()

    # build model architecture, then print to console
    model = config.initialize('arch', module_arch)

    if parse.no_wandb: wandb.watch(model)

    if parse.distillation:
        teacher = config.initialize('teacher_arch', module_arch)

        data_loader = getattr(module_data, config['data_loader']['type'])(
            config['data_loader']['args']['data_dir'],
            batch_size=config['data_loader']['args']['batch_size'],
            shuffle=config['data_loader']['args']['shuffle'],
            validation_split=0.0,
            num_batches=config['data_loader']['args']['num_batches'],
            training=True,
            num_workers=config['data_loader']['args']['num_workers'],
            pin_memory=config['data_loader']['args']['pin_memory'],
            seed=parse.dataseed,
            teacher_idx=extract_cleanidx(teacher, data_loader, parse))
    else:
        teacher = None

    # get function handles of loss and metrics
    logger.info(config.config)
    if hasattr(data_loader.dataset, 'num_raw_example'):
        num_examp = data_loader.dataset.num_raw_example
    else:
        num_examp = len(data_loader.dataset)

    # F-coteaching
    if config['train_loss']['type'] == 'CCELoss':
        train_loss = getattr(module_loss, 'CCELoss')()

    # coteaching
    elif config['train_loss']['type'] == 'CoteachingLoss':
        train_loss = getattr(module_loss, 'CoteachingLoss')(
            forget_rate=config['trainer']['percent'],
            num_gradual=int(config['train_loss']['args']['num_gradual']),
            n_epoch=config['trainer']['epochs'])

    # coteaching_plus
    elif config['train_loss']['type'] == 'CoteachingPlusLoss':
        train_loss = getattr(module_loss, 'CoteachingPlusLoss')(
            forget_rate=config['trainer']['percent'],
            num_gradual=int(config['train_loss']['args']['num_gradual']),
            n_epoch=config['trainer']['epochs'])

    val_loss = getattr(module_loss, config['val_loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # F-coteaching
    if config['train_loss']['type'] == 'CCELoss':

        model = config.initialize('arch', module_arch)
        trainer = FCoteachingTrainer(model,
                                     train_loss,
                                     metrics,
                                     None,
                                     config=config,
                                     data_loader=data_loader,
                                     parse=parse,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=None,
                                     val_criterion=val_loss,
                                     mode=parse.mode,
                                     entropy=parse.entropy,
                                     threshold=parse.threshold)

    # coteaching
    elif config['train_loss']['type'] == 'CoteachingLoss':

        model1, model2 = config.initialize('arch',
                                           module_arch), config.initialize(
                                               'arch', module_arch)

        trainable_params1 = filter(lambda p: p.requires_grad,
                                   model1.parameters())
        trainable_params2 = filter(lambda p: p.requires_grad,
                                   model2.parameters())

        optimizer1 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params1
                                       }])
        optimizer2 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params2
                                       }])

        if isinstance(optimizer1, torch.optim.Adam):
            lr_scheduler = None
        else:
            lr_scheduler1 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer1)
            lr_scheduler2 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer2)
            lr_scheduler = [lr_scheduler1, lr_scheduler2]

#         print ('$$$$$$$$$$$$$$$')
#         print (config['optimizer'])

        trainer = CoteachingTrainer(
            [model1, model2],
            train_loss,
            metrics, [optimizer1, optimizer2],
            config=config,
            data_loader=data_loader,
            parse=parse,
            teacher=teacher,
            valid_data_loader=valid_data_loader,
            test_data_loader=test_data_loader,
            lr_scheduler=lr_scheduler,
            val_criterion=val_loss,
            mode=parse.mode,
            entropy=parse.entropy,
            threshold=parse.threshold,
            epoch_decay_start=config['trainer']['epoch_decay_start'],
            n_epoch=config['trainer']['epochs'],
            learning_rate=config['optimizer']['args']['lr'])

    elif config['train_loss']['type'] == 'CoteachingPlusLoss':

        model1, model2 = config.initialize('arch',
                                           module_arch), config.initialize(
                                               'arch', module_arch)

        trainable_params1 = filter(lambda p: p.requires_grad,
                                   model1.parameters())
        trainable_params2 = filter(lambda p: p.requires_grad,
                                   model2.parameters())

        optimizer1 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params1
                                       }])
        optimizer2 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params2
                                       }])

        if isinstance(optimizer1, torch.optim.Adam):
            lr_scheduler = None
        else:
            lr_scheduler1 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer1)
            lr_scheduler2 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer2)
            lr_scheduler = [lr_scheduler1, lr_scheduler2]

        trainer = CoteachingTrainer(
            [model1, model2],
            train_loss,
            metrics, [optimizer1, optimizer2],
            config=config,
            data_loader=data_loader,
            parse=parse,
            teacher=teacher,
            valid_data_loader=valid_data_loader,
            test_data_loader=test_data_loader,
            lr_scheduler=lr_scheduler,
            val_criterion=val_loss,
            mode=parse.mode,
            entropy=parse.entropy,
            threshold=parse.threshold,
            epoch_decay_start=config['trainer']['epoch_decay_start'],
            n_epoch=config['trainer']['epochs'],
            learning_rate=config['optimizer']['args']['lr'])

    elif config['train_loss']['type'] == 'CoteachingDistillLoss':

        model1, model2 = config.initialize('arch',
                                           module_arch), config.initialize(
                                               'arch', module_arch)

        trainable_params1 = filter(lambda p: p.requires_grad,
                                   model1.parameters())
        trainable_params2 = filter(lambda p: p.requires_grad,
                                   model2.parameters())

        optimizer1 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params1
                                       }])
        optimizer2 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params2
                                       }])

        if isinstance(optimizer1, torch.optim.Adam):
            lr_scheduler = None
        else:
            lr_scheduler1 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer1)
            lr_scheduler2 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer2)
            lr_scheduler = [lr_scheduler1, lr_scheduler2]

        trainer = CoteachingTrainer(
            [model1, model2],
            train_loss,
            metrics, [optimizer1, optimizer2],
            config=config,
            data_loader=data_loader,
            parse=parse,
            teacher=teacher,
            valid_data_loader=valid_data_loader,
            test_data_loader=test_data_loader,
            lr_scheduler=lr_scheduler,
            val_criterion=val_loss,
            mode=parse.mode,
            entropy=parse.entropy,
            threshold=parse.threshold,
            epoch_decay_start=config['trainer']['epoch_decay_start'],
            n_epoch=config['trainer']['epochs'],
            learning_rate=config['optimizer']['args']['lr'])

    elif config['train_loss']['type'] == 'CoteachingPlusDistillLoss':

        model1, model2 = config.initialize('arch',
                                           module_arch), config.initialize(
                                               'arch', module_arch)

        trainable_params1 = filter(lambda p: p.requires_grad,
                                   model1.parameters())
        trainable_params2 = filter(lambda p: p.requires_grad,
                                   model2.parameters())

        optimizer1 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params1
                                       }])
        optimizer2 = config.initialize('optimizer', torch.optim,
                                       [{
                                           'params': trainable_params2
                                       }])

        if isinstance(optimizer1, torch.optim.Adam):
            lr_scheduler = None
        else:
            lr_scheduler1 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer1)
            lr_scheduler2 = config.initialize('lr_scheduler',
                                              torch.optim.lr_scheduler,
                                              optimizer2)
            lr_scheduler = [lr_scheduler1, lr_scheduler2]

        trainer = CoteachingTrainer(
            [model1, model2],
            train_loss,
            metrics, [optimizer1, optimizer2],
            config=config,
            data_loader=data_loader,
            parse=parse,
            teacher=teacher,
            valid_data_loader=valid_data_loader,
            test_data_loader=test_data_loader,
            lr_scheduler=lr_scheduler,
            val_criterion=val_loss,
            mode=parse.mode,
            entropy=parse.entropy,
            threshold=parse.threshold,
            epoch_decay_start=config['trainer']['epoch_decay_start'],
            n_epoch=config['trainer']['epochs'],
            learning_rate=config['optimizer']['args']['lr'])

    trainer.train()

    logger = config.get_logger('trainer', config['trainer']['verbosity'])
    cfg_trainer = config['trainer']
Beispiel #14
0
        help='path to the directory where parsed documents are saved'
        'in case parsed files exist here, KNP is skipped')
    parser.add_argument('--export-dir',
                        default=None,
                        type=str,
                        help='directory where analysis result is exported')
    parser.add_argument('-tab',
                        '--tab',
                        action='store_true',
                        default=False,
                        help='whether to output details')
    parser.add_argument(
        '--remote-knp',
        action='store_true',
        default=False,
        help='Use KNP running on remote host. '
        'Make sure you specify host address and port in analyzer/config.ini')
    parser.add_argument(
        '--skip-untagged',
        action='store_true',
        default=False,
        help='If set, do not export documents which failed to be analyzed')
    parser.add_argument(
        '--rel-only',
        action='store_true',
        default=False,
        help='If set, do not add <述語項構造> tag besides <rel> tag to system output'
    )
    parsed_args = parser.parse_args()
    main(ConfigParser.from_args(parsed_args, run_id=''), parsed_args)
Beispiel #15
0
            if metric.__name__ in ['precision', 'recall', 'f1_score'] \
                    and type(algorithm) not in [Coma, SemProp]:  # Do not use the 1-1 match filter on Coma and SemProp
                final_metrics[metric.__name__] = metric(
                    matches, golden_standard, True)
            else:
                final_metrics[metric.__name__] = metric(
                    matches, golden_standard)
        else:
            for n in config['metrics']['args']['n']:
                final_metrics[metric.__name__.replace(
                    '_n_',
                    '_' + str(n) + '_')] = metric(matches, golden_standard, n)

    print("Metrics: ", final_metrics)

    write_output(config['name'], config['algorithm']['type'], matches,
                 final_metrics, run_times)


if __name__ == '__main__':
    print("Running job")
    args = argparse.ArgumentParser(description='Schema matching job')
    args.add_argument('-c',
                      '--config',
                      default=None,
                      type=str,
                      help='config file path (default: None)')

    configuration = ConfigParser(args)
    main(configuration)
Beispiel #16
0
def gtrobustlosstrain(parse, config: ConfigParser):
    dataset_name = config['name'].split('_')[0]
    lr_scheduler_name = config['lr_scheduler']['type']
    loss_fn_name = config['train_loss']['type']

    wandb_run_name_list = []

    if parse.distillation:
        if parse.distill_mode == 'eigen':
            wandb_run_name_list.append('distil')
        else:
            wandb_run_name_list.append('kmeans')
    else:
        wandb_run_name_list.append('baseline')
    wandb_run_name_list.append(dataset_name)
    wandb_run_name_list.append(lr_scheduler_name)
    wandb_run_name_list.append(loss_fn_name)
    wandb_run_name_list.append(str(config['trainer']['asym']))
    wandb_run_name_list.append(str(config['trainer']['percent']))
    wandb_run_name = '_'.join(wandb_run_name_list)

    if parse.no_wandb:
        wandb.init(config=config,
                   project='noisylabel',
                   entity='goguryeo',
                   name=wandb_run_name)

    # By default, pytorch utilizes multi-threaded cpu
    # Set to handle whole procedures on a single core
    torch.set_num_threads(1)

    logger = config.get_logger('train')

    # Set seed for reproducibility
    random.seed(config['seed'])
    torch.manual_seed(config['seed'])
    torch.cuda.manual_seed_all(config['seed'])
    torch.backends.cudnn.deterministic = True
    np.random.seed(config['seed'])

    data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=config['data_loader']['args']['batch_size'],
        shuffle=False
        if parse.distillation else config['data_loader']['args']['shuffle'],
        #         validation_split=config['data_loader']['args']['validation_split'],
        validation_split=0.0,
        num_batches=config['data_loader']['args']['num_batches'],
        training=True,
        num_workers=config['data_loader']['args']['num_workers'],
        pin_memory=config['data_loader']['args']['pin_memory'])

    # valid_data_loader = data_loader.split_validation()

    valid_data_loader = None

    # test_data_loader = None

    test_data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=128,
        shuffle=False,
        validation_split=0.0,
        training=False,
        num_workers=2).split_validation()

    # build model architecture, then print to console
    model = config.initialize('arch', module_arch)

    if parse.no_wandb:
        wandb.watch(model)

    if parse.distillation:
        teacher = config.initialize('arch', module_arch)
        teacher.load_state_dict(
            torch.load('./checkpoint/' + parse.load_name)['state_dict'])
        if not parse.reinit:
            model.load_state_dict(
                torch.load('./checkpoint/' + parse.load_name)['state_dict'])
        for params in teacher.parameters():
            params.requires_grad = False
        if parse.distill_mode == 'eigen':
            tea_label_list, tea_out_list = get_out_list(teacher, data_loader)
            singular_dict, v_ortho_dict = get_singular_value_vector(
                tea_label_list, tea_out_list)

            for key in v_ortho_dict.keys():
                v_ortho_dict[key] = v_ortho_dict[key].cuda()

            teacher_idx = singular_label(v_ortho_dict, tea_out_list,
                                         tea_label_list)
        else:
            teacher_idx = get_out_list(teacher, data_loader)

        data_loader = getattr(module_data, config['data_loader']['type'])(
            config['data_loader']['args']['data_dir'],
            batch_size=config['data_loader']['args']['batch_size'],
            shuffle=config['data_loader']['args']['shuffle'],
            #         validation_split=config['data_loader']['args']['validation_split'],
            validation_split=0.0,
            num_batches=config['data_loader']['args']['num_batches'],
            training=True,
            num_workers=config['data_loader']['args']['num_workers'],
            pin_memory=config['data_loader']['args']['pin_memory'],
            teacher_idx=teacher_idx)
    else:
        teacher = None

    # get function handles of loss and metrics
    logger.info(config.config)
    if hasattr(data_loader.dataset, 'num_raw_example'):
        num_examp = data_loader.dataset.num_raw_example
    else:
        num_examp = len(data_loader.dataset)

    if config['train_loss']['type'] == 'ELR_GTLoss':
        train_loss = getattr(module_loss, 'ELR_GTLoss')(
            num_examp=num_examp,
            num_classes=config['num_classes'],
            beta=config['train_loss']['args']['beta'])
    elif config['train_loss']['type'] == 'SCE_GTLoss':
        train_loss = getattr(module_loss, 'SCE_GTLoss')(
            alpha=config['train_loss']['args']['alpha'],
            beta=config['train_loss']['args']['beta'],
            num_classes=config['num_classes'])
    elif config['train_loss']['type'] == 'GCE_GTLoss':
        train_loss = getattr(module_loss, 'GCE_GTLoss')(
            q=config['train_loss']['args']['q'],
            k=config['train_loss']['args']['k'],
            trainset_size=num_examp,
            truncated=config['train_loss']['args']['truncated'])
    elif config['train_loss']['type'] == 'CCE_GTLoss':
        train_loss = getattr(module_loss, 'CCE_GTLoss')()

    val_loss = getattr(module_loss, config['val_loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = config.initialize('optimizer', torch.optim,
                                  [{
                                      'params': trainable_params
                                  }])

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler,
                                     optimizer)

    if config['train_loss']['type'] == 'ELR_GTLoss':
        trainer = GroundTruthTrainer(model,
                                     train_loss,
                                     metrics,
                                     optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode=parse.mode,
                                     entropy=parse.entropy,
                                     threshold=parse.threshold)
    elif config['train_loss']['type'] == 'SCE_GTLoss':
        trainer = GroundTruthTrainer(model,
                                     train_loss,
                                     metrics,
                                     optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode=parse.mode,
                                     entropy=parse.entropy,
                                     threshold=parse.threshold)
    elif config['train_loss']['type'] == 'GCE_GTLoss':
        trainer = GroundTruthTrainer(model,
                                     train_loss,
                                     metrics,
                                     optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode=parse.mode,
                                     entropy=parse.entropy,
                                     threshold=parse.threshold)
    elif config['train_loss']['type'] == 'CCE_GTLoss':
        trainer = GroundTruthTrainer(model,
                                     train_loss,
                                     metrics,
                                     optimizer,
                                     config=config,
                                     data_loader=data_loader,
                                     teacher=teacher,
                                     valid_data_loader=valid_data_loader,
                                     test_data_loader=test_data_loader,
                                     lr_scheduler=lr_scheduler,
                                     val_criterion=val_loss,
                                     mode=parse.mode,
                                     entropy=parse.entropy,
                                     threshold=parse.threshold)

    trainer.train()

    logger = config.get_logger('trainer', config['trainer']['verbosity'])
    cfg_trainer = config['trainer']

if __name__ == '__main__':
    print("n_gpu", str(torch.cuda.device_count()))
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--config',
                        default=None,
                        type=str,
                        help='config file path (default: None)')
    parser.add_argument('-r',
                        '--resume',
                        default=None,
                        type=str,
                        help='path to latest checkpoint (default: None)')
    parser.add_argument('-d',
                        '--device',
                        default='',
                        type=str,
                        help='indices of GPUs to enable (default: "")')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='random seed for initialization')
    parser.add_argument("-f",
                        '--freeze_encoder',
                        action='store_true',
                        help='Freeze encoder during training')
    parsed_args = parser.parse_args()
    main(ConfigParser.from_args(parsed_args), parsed_args)
Beispiel #18
0
    args.add_argument('-c',
                      '--config',
                      default=None,
                      type=str,
                      help='config file path (default: None)')
    args.add_argument('-r',
                      '--resume',
                      default=None,
                      type=str,
                      help='path to latest checkpoint (default: None)')

    # custom cli options to modify configuration from default values given in json file.
    CustomArgs = collections.namedtuple('CustomArgs', 'flags type target')
    options = [
        CustomArgs(['-n', '--name'], type=str, target='name'),
        CustomArgs(['--lr', '--learning_rate'],
                   type=float,
                   target='optimizer;args;lr'),
        CustomArgs(['--bs', '--batch_size'],
                   type=int,
                   target='dataloader;args;batch_size'),
        CustomArgs(['--u2w', '--utt2wav_val'],
                   type=str,
                   target='valid_dataset;args;wav_scp'),
        CustomArgs(['--u2l', '--utt2label_val'],
                   type=str,
                   target='valid_dataset;args;utt2label')
    ]
    config = ConfigParser.from_args(args, options)
    main(config)