Ejemplo n.º 1
0
def main():
    # config_file_path = sys.argv[1]
    config_file_path = '/Users/zx/Documents/GitHub/NMT/config/translation-transformer/medical-ende.yaml'

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        # create_path(config['Record']['training_record_path'])

    # set random seed
    set_random_seed(config['Train']['random_seed'])

    # ================================================================================== #
    # set the device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # set the data fields dict['src': (name, field), 'trg': (name, field)]

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    mt_data_loader.build_vocab()
    vocab = mt_data_loader.vocab
    # print(vocab['src'].stoi)

    train_dataset = mt_data_loader.train_datasets[0]
    # print(train_dataset[10].src)
    data_loader = DataLoader(train_dataset,
                             batch_size=4,
                             collate_fn=lambda x: my_collate_fn(x, vocab))
    #
    # print(train_dataset.examples[0].src)
    #
    # from torchtext.data.functional import numericalize_tokens_from_iterator
    # ids_iter = numericalize_tokens_from_iterator(vocab['src'].stoi, [train_dataset.examples[0].src])
    # for ids in ids_iter:
    #     print([num for num in ids])
    i = 0
    for batch in data_loader:
        if i < 3:
            print(batch)
        else:
            break
        i = i + 1
Ejemplo n.º 2
0
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # ================================================================================== #
    # set the device
    set_random_seed(config['Train']['random_seed'])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    mt_data_loader.build_vocab()
    mt_data_loader.build_iterators(device=device,
                                   build_train=True,
                                   build_dev=True,
                                   build_test=False)

    vocab = mt_data_loader.vocab
    train_iterators = mt_data_loader.train_iterators
    dev_iterators = mt_data_loader.dev_iterators
    dev_test_iterators = mt_data_loader.dev_test_iterators

    model_builder = ModelBuilder()
    model = model_builder.build_model(
        model_name='transformer_with_parallel_adapter',
        model_config=config['Model'],
        vocab=vocab,
        device=device,
        load_pretrained=config['Train']['load_exist_model'],
        pretrain_path=config['Train']['model_load_path'])
    criterion = model_builder.build_criterion(
        criterion_config=config['Criterion'], vocab=vocab)

    training_domain = config['Train']['training_domain']
    for name, param in model.named_parameters():
        if 'adapter' not in name or training_domain not in name:
            param.requires_grad = False
        else:
            param.requires_grad = True

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape)

    parameters = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = model_builder.build_optimizer(
        parameters=parameters,
        optimizer_config=config['Optimizer'],
        load_pretrained=config['Train']['load_optimizer'],
        pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(
        optimizer=optimizer,
        lr_scheduler_config=config['Optimizer']['lr_scheduler'],
        load_pretrained=config['Train']['load_lr_scheduler'],
        pretrain_path=config['Train']['lr_scheduler_path'])

    os.system('cp ' + config_file_path + ' ' +
              config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))

    trainer = MultiAdapterTrainer(
        model=model,
        criterion=criterion,
        vocab=vocab,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        train_iterators=train_iterators,
        validation_iterators=dev_iterators,
        validation_test_iterators=dev_test_iterators,
        optimizer_config=config['Optimizer'],
        train_config=config['Train'],
        validation_config=config['Validation'],
        record_config=config['Record'],
        device=device,
        used_domain_list=config['Train']['used_domain_list'],
    )

    trainer.train()
Ejemplo n.º 3
0
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # set random seed
    set_random_seed(config['Train']['random_seed'])

    # ================================================================================== #
    # set the device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # set the data fields dict['src': (name, field), 'trg': (name, field)]

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    mt_data_loader.build_vocab()
    mt_data_loader.build_iterators(device=device,
                                   build_train=True,
                                   build_dev=True,
                                   build_test=False)

    vocab = mt_data_loader.vocab
    train_iterators = mt_data_loader.train_iterators
    dev_iterators = mt_data_loader.dev_iterators
    dev_test_iterators = mt_data_loader.dev_test_iterators

    model_builder = ModelBuilder()
    model = model_builder.build_model(
        model_name='transformer_with_split_position',
        model_config=config['Model'],
        vocab=vocab,
        device=device,
        load_pretrained=config['Train']['load_exist_model'],
        pretrain_path=config['Train']['model_load_path'])
    criterion = model_builder.build_criterion(
        criterion_config=config['Criterion'], vocab=vocab)
    # make model
    optimizer = model_builder.build_optimizer(
        parameters=model.parameters(),
        optimizer_config=config['Optimizer'],
        load_pretrained=config['Train']['load_optimizer'],
        pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(
        optimizer=optimizer,
        lr_scheduler_config=config['Optimizer']['lr_scheduler'],
        load_pretrained=config['Train']['load_lr_scheduler'],
        pretrain_path=config['Train']['lr_scheduler_path'])

    os.system('cp ' + config_file_path + ' ' +
              config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))

    trainer = Split_Position_Trainer(
        model=model,
        criterion=criterion,
        vocab=vocab,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        train_iterators=train_iterators,
        validation_iterators=dev_iterators,
        validation_test_iterators=dev_test_iterators,
        optimizer_config=config['Optimizer'],
        train_config=config['Train'],
        validation_config=config['Validation'],
        record_config=config['Record'],
        device=device,
    )

    trainer.train()
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # ================================================================================== #
    # set the device
    set_random_seed(config['Train']['random_seed'])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    mt_data_loader.build_vocab()
    mt_data_loader.build_iterators(device=device,
                                   build_train=True,
                                   build_dev=True,
                                   build_test=False)

    vocab = mt_data_loader.vocab
    train_iterators = mt_data_loader.train_iterators
    train_iterator_domain = config['Dataset']['train_dataset_domain']
    dev_iterators = mt_data_loader.dev_iterators
    dev_test_iterators = mt_data_loader.dev_test_iterators
    dev_iterator_domain = config['Dataset']['dev_dataset_domain']

    model_builder = ModelBuilder()
    model = model_builder.build_model(
        model_name='transformer_with_mix_adapter_update',
        model_config=config['Model'],
        vocab=vocab,
        device=device,
        load_pretrained=config['Train']['load_exist_model'],
        pretrain_path=config['Train']['model_load_path'])
    # model.classify_domain_mask = model.classify_domain_mask.to(device)

    criterion = model_builder.build_criterion(
        criterion_config=config['Criterion'], vocab=vocab)
    validation_criterion = model_builder.build_criterion(criterion_config={
        'name': 'kl_divergence',
        'label_smoothing': 0,
    },
                                                         vocab=vocab)

    # training_domain = config['Train']['training_domain']
    train_params = config['Train']['params']

    for name, param in model.named_parameters():

        tag = True
        for param_filter in train_params:
            if isinstance(param_filter, str):
                if param_filter not in name:
                    tag = False
            if isinstance(param_filter, list):
                if not any(domain in name for domain in param_filter):
                    tag = False
            param.requires_grad = tag

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape)

    parameters = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = model_builder.build_optimizer(
        parameters=parameters,
        optimizer_config=config['Optimizer'],
        load_pretrained=config['Train']['load_optimizer'],
        pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(
        optimizer=optimizer,
        lr_scheduler_config=config['Optimizer']['lr_scheduler'],
        load_pretrained=config['Train']['load_lr_scheduler'],
        pretrain_path=config['Train']['lr_scheduler_path'])

    os.system('cp ' + config_file_path + ' ' +
              config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))

    trainer = Mix_Adapter_Trainer(
        model=model,
        criterion=criterion,
        validation_criterion=validation_criterion,
        vocab=vocab,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        train_iterators=train_iterators,
        validation_iterators=dev_iterators,
        validation_test_iterators=dev_test_iterators,
        optimizer_config=config['Optimizer'],
        train_config=config['Train'],
        validation_config=config['Validation'],
        record_config=config['Record'],
        device=device,
    )

    trainer.train()
Ejemplo n.º 5
0
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # set random seed
    set_random_seed(config['Train']['random_seed'])

    # ================================================================================== #
    # set the device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # set the data fields dict['src': (name, field), 'trg': (name, field)]

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True, load_dev=True, load_test=False)
    mt_data_loader.build_vocab()
    mt_data_loader.build_iterators(device=device, build_train=True, build_dev=True, build_test=False)

    vocab = mt_data_loader.vocab
    train_iterators = mt_data_loader.train_iterators
    dev_iterators = mt_data_loader.dev_iterators
    dev_test_iterators = mt_data_loader.dev_test_iterators

    # make model
    model_builder = ModelBuilder()
    model = model_builder.build_model(model_name='transformer',
                                      model_config=config['Model'],
                                      vocab=vocab,
                                      device=device,
                                      load_pretrained=config['Train']['load_exist_model'],
                                      pretrain_path=config['Train']['model_load_path'])
    print('trained parameters: ')

    ref_model = model_builder.build_model(model_name='transformer',
                                          model_config=config['Model'],
                                          vocab=vocab,
                                          device=device,
                                          load_pretrained=True,
                                          pretrain_path=config['Train']['ref_model_load_path'])

    if 'params' in config['Train']:
        train_params = config['Train']['params']

        for name, param in model.named_parameters():

            tag = True
            for param_filter in train_params:
                if isinstance(param_filter, str):
                    if param_filter not in name:
                        tag = False
                if isinstance(param_filter, list):
                    if not any(domain in name for domain in param_filter):
                        tag = False
                param.requires_grad = tag

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape)

    # make criterion
    # the label_smoothing of validation criterion is always set to 0
    criterion = model_builder.build_criterion(criterion_config=config['Criterion'], vocab=vocab)
    validation_criterion = model_builder.build_criterion(criterion_config={
        'name': 'kl_divergence',
        'label_smoothing': 0,
    }, vocab=vocab)

    # make optimizer
    optimizer = model_builder.build_optimizer(parameters=model.parameters(),
                                              optimizer_config=config['Optimizer'],
                                              load_pretrained=config['Train']['load_optimizer'],
                                              pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(optimizer=optimizer,
                                                    lr_scheduler_config=config['Optimizer']['lr_scheduler'],
                                                    load_pretrained=config['Train']['load_lr_scheduler'],
                                                    pretrain_path=config['Train']['lr_scheduler_path']
                                                    )

    os.system('cp ' + config_file_path + ' ' + config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))

    trainer = Kd_Trainer(
        model=model,
        criterion=criterion,
        validation_criterion=validation_criterion,
        vocab=vocab,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        train_iterators=train_iterators,
        validation_iterators=dev_iterators,
        validation_test_iterators=dev_test_iterators,
        optimizer_config=config['Optimizer'],
        train_config=config['Train'],
        validation_config=config['Validation'],
        record_config=config['Record'],
        device=device,
        ref_model=ref_model,
        ref_temperature=config['Train']['ref_temperature'],
        ref_factor=config['Train']['ref_factor'],
    )

    trainer.train()
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # ================================================================================== #
    # set the device
    set_random_seed(config['Train']['random_seed'])
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load dataset
    print('load dataset ...')
    mt_data_loader = MTDataLoader(config)
    mt_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    mt_data_loader.build_vocab()
    mt_data_loader.build_iterators(device=device,
                                   build_train=True,
                                   build_dev=True,
                                   build_test=False)

    vocab = mt_data_loader.vocab
    train_iterators = mt_data_loader.train_iterators
    train_iterator_domain = config['Dataset']['train_dataset_domain']
    dev_iterators = mt_data_loader.dev_iterators
    dev_test_iterators = mt_data_loader.dev_test_iterators
    dev_iterator_domain = config['Dataset']['dev_dataset_domain']

    model_builder = ModelBuilder()
    model = model_builder.build_model(
        model_name='transformer_with_mix_adapter',
        model_config=config['Model'],
        vocab=vocab,
        device=device,
        load_pretrained=config['Train']['load_exist_model'],
        pretrain_path=config['Train']['model_load_path'])
    model.classify_domain_mask = model.classify_domain_mask.to(device)

    criterion = model_builder.build_criterion(
        criterion_config=config['Criterion'], vocab=vocab)
    validation_criterion = model_builder.build_criterion(criterion_config={
        'name': 'kl_divergence',
        'label_smoothing': 0,
    },
                                                         vocab=vocab)

    # training_domain = config['Train']['training_domain']
    training_stage = config['Train']['stage']

    if training_stage == 'classify':
        for name, param in model.named_parameters():
            if 'classifier' in name:
                param.requires_grad = True
            else:
                param.requires_grad = False

    elif training_stage == 'mixture_of_experts':
        for name, param in model.named_parameters():
            if 'adapter' in name:
                param.requires_grad = True
            else:
                param.requires_grad = False

    elif training_stage == 'train_gate':
        for name, param in model.named_parameters():
            if 'inner_gate' in name and any(
                    domain in name for domain in train_iterator_domain):
                param.requires_grad = True
            else:
                param.requires_grad = False

    else:
        # update specific domain adapter
        for name, param in model.named_parameters():
            if 'adapter' in name and any(domain in name
                                         for domain in train_iterator_domain):
                param.requires_grad = True
            else:
                param.requires_grad = False

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape)

    parameters = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = model_builder.build_optimizer(
        parameters=parameters,
        optimizer_config=config['Optimizer'],
        load_pretrained=config['Train']['load_optimizer'],
        pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(
        optimizer=optimizer,
        lr_scheduler_config=config['Optimizer']['lr_scheduler'],
        load_pretrained=config['Train']['load_lr_scheduler'],
        pretrain_path=config['Train']['lr_scheduler_path'])

    os.system('cp ' + config_file_path + ' ' +
              config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))
    if training_stage == 'classify':
        trainer = ClassifierTrainer(
            model=model,
            criterion=criterion,
            vocab=vocab,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            train_iterators=train_iterators,
            train_iterators_domain_list=train_iterator_domain,
            validation_iterators=dev_iterators,
            validation_iterators_domain_list=dev_iterator_domain,
            domain_dict=config['Model']['domain_dict'],
            optimizer_config=config['Optimizer'],
            train_config=config['Train'],
            validation_config=config['Validation'],
            record_config=config['Record'],
            device=device,
        )
    elif training_stage == 'mix_adapter_translation' or training_stage == 'train_gate' or training_stage == 'mixture_of_experts':
        trainer = Mix_Adapter_Trainer(
            model=model,
            criterion=criterion,
            validation_criterion=validation_criterion,
            vocab=vocab,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            train_iterators=train_iterators,
            validation_iterators=dev_iterators,
            validation_test_iterators=dev_test_iterators,
            optimizer_config=config['Optimizer'],
            train_config=config['Train'],
            validation_config=config['Validation'],
            record_config=config['Record'],
            device=device,
            target_domain=config['Train']['target_domain'],
            used_domain_list=config['Train']['used_domain_list'],
            used_inner_gate=config['Train']['used_inner_gate'],
        )
    elif training_stage == 'kd':
        trainer = Kd_Adapter_Trainer(
            model=model,
            criterion=criterion,
            validation_criterion=validation_criterion,
            vocab=vocab,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            train_iterators=train_iterators,
            validation_iterators=dev_iterators,
            validation_test_iterators=dev_test_iterators,
            optimizer_config=config['Optimizer'],
            train_config=config['Train'],
            validation_config=config['Validation'],
            record_config=config['Record'],
            device=device,
            target_domain=config['Train']['target_domain'],
            ref_domain_dict=config['Train']['kd_ref_domain'])
    else:
        trainer = Adapter_Trainer(
            model=model,
            criterion=criterion,
            validation_criterion=validation_criterion,
            vocab=vocab,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            train_iterators=train_iterators,
            validation_iterators=dev_iterators,
            validation_test_iterators=dev_test_iterators,
            optimizer_config=config['Optimizer'],
            train_config=config['Train'],
            validation_config=config['Validation'],
            record_config=config['Record'],
            device=device,
            target_domain=config['Train']['target_domain'],
        )

    trainer.train()
Ejemplo n.º 7
0
def main():
    config_file_path = sys.argv[1]

    print('read config')
    with open(config_file_path, 'r') as config_file:
        config = yaml.load(config_file)
        create_path(config['Record']['training_record_path'])

    # set random seed
    set_random_seed(config['Train']['random_seed'])

    # ================================================================================== #
    # set the device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # set the data fields dict['src': (name, field), 'trg': (name, field)]

    # load dataset
    print('load dataset ...')
    lm_data_loader = LMDataLoader(config)
    lm_data_loader.load_datasets(load_train=True,
                                 load_dev=True,
                                 load_test=False)
    lm_data_loader.build_vocab()
    lm_data_loader.build_iterators(device=device,
                                   build_train=True,
                                   build_dev=True,
                                   build_test=False)

    for i in range(5):
        print(lm_data_loader.train_datasets[0].examples[i].text)

    vocab = lm_data_loader.vocab
    train_iterators = lm_data_loader.train_iterators
    dev_iterators = lm_data_loader.dev_iterators

    # make model
    model_builder = ModelBuilder()
    model = model_builder.build_model(
        model_name='transformer_language_model',
        model_config=config['Model'],
        vocab=vocab,
        device=device,
        load_pretrained=config['Train']['load_exist_model'],
        pretrain_path=config['Train']['model_load_path'])
    print('trained parameters: ')
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.shape)

    # make criterion
    # the label_smoothing of validation criterion is always set to 0
    criterion = model_builder.build_criterion(
        criterion_config=config['Criterion'], vocab=vocab)
    validation_criterion = model_builder.build_criterion(criterion_config={
        'name': 'kl_divergence',
        'label_smoothing': 0,
    },
                                                         vocab=vocab)

    # make optimizer
    optimizer = model_builder.build_optimizer(
        parameters=model.parameters(),
        optimizer_config=config['Optimizer'],
        load_pretrained=config['Train']['load_optimizer'],
        pretrain_path=config['Train']['optimizer_path'])
    # make optimizer
    lr_scheduler = model_builder.build_lr_scheduler(
        optimizer=optimizer,
        lr_scheduler_config=config['Optimizer']['lr_scheduler'],
        load_pretrained=config['Train']['load_lr_scheduler'],
        pretrain_path=config['Train']['lr_scheduler_path'])

    os.system('cp ' + config_file_path + ' ' +
              config['Record']['training_record_path'] + '/model_config.txt')

    # parameters=filter(lambda p: p.requires_grad, model.parameters()))

    trainer = Trainer(
        model=model,
        criterion=criterion,
        validation_criterion=validation_criterion,
        vocab=vocab,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        train_iterators=train_iterators,
        validation_iterators=dev_iterators,
        optimizer_config=config['Optimizer'],
        train_config=config['Train'],
        validation_config=config['Validation'],
        record_config=config['Record'],
        device=device,
    )

    trainer.train()