Beispiel #1
0
def run(config):
    """Runs the model for either pretrain or segmentation/classifiction tasks
    
    Loads the ResNet backbone and based on task specification performs either
    pretrain or segmentation/classification finetuning. 

    Args:
        config: instance of config class
    Raises:
        ValueError: In segmentation/classification tasks if number of classes are not specified
    """

    # load the Resnet backbone
    model = load_ResNet(config.model,
                        config.imagenet_path,
                        include_top=False,
                        cifar10=config.cifar10,
                        weight_decay=config.weight_decay)

    if config.task == 'pretrain':

        print("Pretraining model")

        if config.cifar10:
            pretrain_cifar10(model, config)
        else:
            pretrain(model, config)

    elif config.task == 'segmentation' or 'classification':

        print("Fine-tuning model")

        if config.num_classes is None:
            raise ValueError("The number of classes must be set")

        if config.cifar10:
            finetune_cifar10(model, config)
        else:
            finetune(model, config)
Beispiel #2
0
def main():
    args = parse_arguments()
    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("seed: " + str(args.seed))
    print("train_seed: " + str(args.train_seed))
    print("model_type: " + str(args.model))
    print("max_seq_length: " + str(args.max_seq_length))
    print("batch_size: " + str(args.batch_size))
    print("pre_epochs: " + str(args.pre_epochs))
    print("num_epochs: " + str(args.num_epochs))
    print("AD weight: " + str(args.alpha))
    print("KD weight: " + str(args.beta))
    print("temperature: " + str(args.temperature))
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    print("=== Processing datasets ===")
    if args.src in ['blog', 'airline', 'imdb']:
        src_x, src_y = CSV2Array(
            os.path.join('data', args.src, args.src + '.csv'))
    else:
        src_x, src_y = XML2Array(
            os.path.join('data', args.src, 'negative.review'),
            os.path.join('data', args.src, 'positive.review'))

    src_x, src_test_x, src_y, src_test_y = train_test_split(
        src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed)

    if args.tgt in ['blog', 'airline', 'imdb']:
        tgt_x, tgt_y = CSV2Array(
            os.path.join('data', args.tgt, args.tgt + '.csv'))
    else:
        tgt_x, tgt_y = XML2Array(
            os.path.join('data', args.tgt, 'negative.review'),
            os.path.join('data', args.tgt, 'positive.review'))

    tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split(
        tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed)

    if args.model in ['roberta', 'distilroberta']:
        src_features = roberta_convert_examples_to_features(
            src_x, src_y, args.max_seq_length, tokenizer)
        src_test_features = roberta_convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = roberta_convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)
        tgt_train_features = roberta_convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)
    else:
        src_features = convert_examples_to_features(src_x, src_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        src_test_features = convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = convert_examples_to_features(tgt_x, tgt_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        tgt_train_features = convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)

    # load dataset

    src_data_loader = get_data_loader(src_features, args.batch_size)
    src_data_eval_loader = get_data_loader(src_test_features, args.batch_size)
    tgt_data_train_loader = get_data_loader(tgt_train_features,
                                            args.batch_size)
    tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_data_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(src_encoder, src_classifier, src_data_loader)
    evaluate(src_encoder, src_classifier, src_data_eval_loader)
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)

    for params in src_encoder.parameters():
        params.requires_grad = False

    for params in src_classifier.parameters():
        params.requires_grad = False

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator,
                            src_classifier, src_data_loader,
                            tgt_data_train_loader, tgt_data_all_loader)

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)
    print(">>> domain adaption <<<")
    evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
Beispiel #3
0
def main(args, f):
    set_seed(args.train_seed)
    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader, tgt_te = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        # encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        # encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        # encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        # encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()
    discriminator = Discriminator()

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        # encoder = nn.DataParallel(encoder)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # encoder = init_model(args, encoder, restore_path=param.tgt_encoder_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    # encoder = init_model(args, encoder)
    discriminator = init_model(args, discriminator)

    # train source model
    if args.pretrain:
        print("=== Training classifier for source domain ===")
        src_encoder, classifier = pretrain(args, src_encoder, classifier,
                                           src_loader)

        # save pretrained model
        # save_model(args, src_encoder, param.src_encoder_path)
        # save_model(args, classifier, param.src_classifier_path)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, classifier, src_loader)
    src_acc = evaluate(args, src_encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    # adapt
    print("=== Adapt tgt encoder ===")
    # encoder.load_state_dict(src_encoder.state_dict())
    # if args.src_free:
    # s_res_features = src_gmm(args, src_encoder, src_loader)
    # src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
    # encoder = aad_adapt_src_free(args, src_encoder, encoder, discriminator,
    #                                  classifier, src_loader, tgt_train_loader, tgt_all_loader)
    # else:
    if args.adapt:
        encoder, classifier = shot_adapt(args, src_encoder, classifier,
                                         tgt_train_loader, tgt_all_loader,
                                         tgt_te)

    # save_model(args, encoder, param.tgt_encoder_path)

    # argument setting
    # print("=== Argument Setting ===")
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}; "
        f'src_free: {args.src_free}; dp: {args.dp}')

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt}; dp: {args.dp}\n\n")
Beispiel #4
0
def main():
    """ Main """

    # Arguments
    args = parser.parse_args()

    # Setup Distributed Training
    device, local_rank = setup(distributed=args.distributed)

    # Get Dataloaders for Dataset of choice
    dataloaders, args = get_dataloaders(args)

    # Setup logging, saving models, summaries
    args = experiment_config(parser, args)

    # Get available models from /model/network.py
    model_names = sorted(name for name in models.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models.__dict__[name]))

    # If model exists
    if any(args.model in model_name for model_name in model_names):

        # Load model
        base_encoder = getattr(models, args.model)(
            args, num_classes=args.n_classes)  # Encoder

        proj_head = models.projection_MLP(args)
        sup_head = models.Sup_Head(args)

    else:
        raise NotImplementedError("Model Not Implemented: {}".format(
            args.model))

    # Remove last FC layer from resnet
    base_encoder.fc = nn.Sequential()

    # Place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(6)  # n cpu threads / n processes per node

        base_encoder = DistributedDataParallel(base_encoder.cuda(),
                                               device_ids=[local_rank],
                                               output_device=local_rank,
                                               find_unused_parameters=True,
                                               broadcast_buffers=False)
        proj_head = DistributedDataParallel(proj_head.cuda(),
                                            device_ids=[local_rank],
                                            output_device=local_rank,
                                            find_unused_parameters=True,
                                            broadcast_buffers=False)

        sup_head = DistributedDataParallel(sup_head.cuda(),
                                           device_ids=[local_rank],
                                           output_device=local_rank,
                                           find_unused_parameters=True,
                                           broadcast_buffers=False)

        # Only print from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        # If non Distributed use DataParallel
        if torch.cuda.device_count() > 1:
            base_encoder = nn.DataParallel(base_encoder)
            proj_head = nn.DataParallel(proj_head)
            sup_head = nn.DataParallel(sup_head)

        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')

        base_encoder.to(device)
        proj_head.to(device)
        sup_head.to(device)

        args.print_progress = True

    # Print Network Structure and Params
    if args.print_progress:
        print_network(base_encoder,
                      args)  # prints out the network architecture etc
        logging.info('\npretrain/train: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.finetune:
        ''' Pretraining / Finetuning / Evaluate '''

        if not args.supervised:
            # Pretrain the encoder and projection head
            proj_head.apply(init_weights)

            pretrain(base_encoder, proj_head, dataloaders, args)
        else:
            supervised(base_encoder, sup_head, dataloaders, args)

        print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir))

        # Load the pretrained model
        checkpoint = torch.load(args.load_checkpoint_dir)

        # Load the encoder parameters
        base_encoder.load_state_dict(checkpoint['encoder'])

        # Initalize weights of the supervised / classification head
        sup_head.apply(init_weights)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, sup_head, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:
        ''' Finetuning / Evaluate '''

        # Do not Pretrain, just finetune and inference

        print("\n\nLoading the model: {}\n\n".format(args.load_checkpoint_dir))

        # Load the pretrained model
        checkpoint = torch.load(args.load_checkpoint_dir)

        # Load the encoder parameters
        base_encoder.load_state_dict(checkpoint['encoder'])  # .cuda()

        # Initalize weights of the supervised / classification head
        sup_head.apply(init_weights)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, sup_head, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder, sup_head,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
Beispiel #5
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        encoder = BertEncoder()
        src_encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        encoder = DistilBertEncoder()
        src_encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        encoder = RobertaEncoder()
        src_encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        encoder = DistilRobertaEncoder()
        src_encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()

    # domain discriminator
    discriminator = AdversarialNetworkCdan(param.input_dim * param.num_labels,
                                           param.hidden_dim)

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        encoder = nn.DataParallel(encoder)
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        encoder = init_model(args,
                             encoder,
                             restore_path=param.src_encoder_path)
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.tgt_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        encoder = init_model(args, encoder)
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    discriminator = init_model(args, discriminator)

    # train source model
    print("=== Pretrain encoder for source domain ===")
    if args.pretrain:
        encoder, classifier = pretrain(args, encoder, classifier, src_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, encoder, classifier, src_loader)
    src_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt} no adapt acc on src data: {src_acc}\n')
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_pretrain'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_pretrain'), x, y)

    # adapt
    print("=== Adapt encoder for target domain ===")
    src_encoder.load_state_dict(encoder.state_dict())
    if args.src_free:
        # use the same encoder and copy encoder to src_encoder have different baseline results
        s_res_features = src_gmm(args, encoder, src_loader)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_src_free(args, encoder, src_encoder,
                                                  discriminator, classifier,
                                                  src_loader, tgt_train_loader,
                                                  tgt_all_loader)
    elif args.data_free:
        s_res_features = src_gmm(args, encoder, src_loader)
        t_res_features = tgt_gmm(encoder, tgt_all_loader, 1)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        tgt_train_loader = t_numpy_dataloader(t_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_data_free(args, encoder,
                                                   discriminator, classifier,
                                                   src_loader,
                                                   tgt_train_loader,
                                                   tgt_all_loader)
    else:
        encoder, classifier = cdan_adapt(args, encoder, discriminator,
                                         classifier, src_loader,
                                         tgt_train_loader, tgt_all_loader)
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_adapt_cdan'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_adapt_cdan'), x, y)

    # argument setting
    print(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}"
    )

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}\n\n"
    )
Beispiel #6
0
    elif args.mode == 'urdf':
        object_paths = [
            str(path) for path in Path(args.objects).rglob('*.obj')
        ]
        object_paths = list(
            filter(lambda path: not path.endswith("collision.obj"),
                   object_paths))
        with Pool(48) as p:
            rv = list(
                tqdm(p.imap(create_grasp_object_urdf, object_paths),
                     total=len(object_paths)))
        exit()

    config = load_config(args.config)
    if args.mode == 'cotrain':
        cotrain(*setup(args, config))
    elif args.mode == 'vae':
        train_vae(*setup_train_vae(args, config))
    elif args.mode == 'pretrain':
        pretrain(args, config)
    elif args.mode == 'pretrain_gn':
        pretrain_gn(args, config)
    elif args.mode == 'pretrain_dataset':
        generate_pretrain_data(*setup(args, config))
    elif args.mode == 'pretrain_imprint_dataset':
        generate_pretrain_imprint_data(*setup(args, config))
    elif args.mode == 'grasp_objects':
        create_grasp_objects(*setup(args, config))
    elif args.mode == 'imprint_baseline':
        generate_imprints(*setup(args, config))
Beispiel #7
0
def main():
    """ Main """

    # Arguments
    args = parser.parse_args()

    # Setup Distributed Training
    device, local_rank = setup(distributed=args.distributed)

    # Get Dataloaders for Dataset of choice
    dataloaders, args = get_dataloaders(args)

    # Setup logging, saving models, summaries
    args = experiment_config(parser, args)
    ''' Base Encoder '''

    # Get available models from /model/network.py
    model_names = sorted(name for name in models.__dict__
                         if name.islower() and not name.startswith("__")
                         and callable(models.__dict__[name]))

    # If model exists
    if any(args.model in model_name for model_name in model_names):
        # Load model
        base_encoder = getattr(models, args.model)(
            args, num_classes=args.n_classes)  # Encoder

    else:
        raise NotImplementedError("Model Not Implemented: {}".format(
            args.model))

    if not args.supervised:
        # freeze all layers but the last fc
        for name, param in base_encoder.named_parameters():
            if name not in ['fc.weight', 'fc.bias']:
                param.requires_grad = False
        # init the fc layer
        init_weights(base_encoder)
    ''' MoCo Model '''
    moco = MoCo_Model(args,
                      queue_size=args.queue_size,
                      momentum=args.queue_momentum,
                      temperature=args.temperature)

    # Place model onto GPU(s)
    if args.distributed:
        torch.cuda.set_device(device)
        torch.set_num_threads(6)  # n cpu threads / n processes per node

        moco = DistributedDataParallel(moco.cuda(),
                                       device_ids=[local_rank],
                                       output_device=local_rank,
                                       find_unused_parameters=True,
                                       broadcast_buffers=False)
        base_encoder = DistributedDataParallel(base_encoder.cuda(),
                                               device_ids=[local_rank],
                                               output_device=local_rank,
                                               find_unused_parameters=True,
                                               broadcast_buffers=False)

        # Only print from process (rank) 0
        args.print_progress = True if int(
            os.environ.get('RANK')) == 0 else False
    else:
        # If non Distributed use DataParallel
        if torch.cuda.device_count() > 1:
            moco = nn.DataParallel(moco)
            base_encoder = nn.DataParallel(base_encoder)

        print('\nUsing', torch.cuda.device_count(), 'GPU(s).\n')

        moco.to(device)
        base_encoder.to(device)

        args.print_progress = True

    # Print Network Structure and Params
    if args.print_progress:
        print_network(moco, args)  # prints out the network architecture etc
        logging.info('\npretrain/train: {} - valid: {} - test: {}'.format(
            len(dataloaders['train'].dataset),
            len(dataloaders['valid'].dataset),
            len(dataloaders['test'].dataset)))

    # launch model training or inference
    if not args.finetune:
        ''' Pretraining / Finetuning / Evaluate '''

        if not args.supervised:
            # Pretrain the encoder and projection head
            pretrain(moco, dataloaders, args)

            # Load the state_dict from query encoder and load it on finetune net
            base_encoder = load_moco(base_encoder, args)

        else:
            supervised(base_encoder, dataloaders, args)

            # Load the state_dict from query encoder and load it on finetune net
            base_encoder = load_sup(base_encoder, args)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
    else:
        ''' Finetuning / Evaluate '''

        # Do not Pretrain, just finetune and inference
        # Load the state_dict from query encoder and load it on finetune net
        base_encoder = load_moco(base_encoder, args)

        # Supervised Finetuning of the supervised classification head
        finetune(base_encoder, dataloaders, args)

        # Evaluate the pretrained model and trained supervised head
        test_loss, test_acc, test_acc_top5 = evaluate(base_encoder,
                                                      dataloaders, 'test',
                                                      args.finetune_epochs,
                                                      args)

        print('[Test] loss {:.4f} - acc {:.4f} - acc_top5 {:.4f}'.format(
            test_loss, test_acc, test_acc_top5))

        if args.distributed:  # cleanup
            torch.distributed.destroy_process_group()
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser(
        description='Argument Parser for SERIL.')
    parser.add_argument('--logdir', default='log',
                        help='Name of current experiment.')
    parser.add_argument('--n_jobs', default=2, type=int)
    parser.add_argument(
        '--do', choices=['train', 'test'], default='train', type=str)
    parser.add_argument(
        '--mode', choices=['seril', 'finetune'], default='seril', type=str)
    parser.add_argument(
        '--model', choices=['LSTM', 'Residual', 'IRM'], default='LSTM', type=str)
    
    # Options
    parser.add_argument(
        '--config', default='config/config.yaml', required=False)
    parser.add_argument('--seed', default=1126, type=int,
                        help='Random seed for reproducable results.', required=False)
    parser.add_argument('--gpu', default='2', type=int,
                        help='Assigning GPU id.')
    args = parser.parse_args()

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # build log directory
    os.makedirs(args.logdir, exist_ok=True)

    # load configure
    config = yaml.load(open(args.config, 'r'), Loader=yaml.FullLoader)

    if config['train']['loss'] == 'sisdr':
        loss_func = SingleSrcNegSDR("sisdr", zero_mean=False,
                                    reduction='mean')
    if args.do == 'train':
        torch.cuda.set_device(args.gpu)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        assert len(config['dataset']['train']['clean']) == len(
            config['dataset']['train']['noisy']) and len(config['dataset']['train']['clean']) >= 1
        
        model_path = f'{args.logdir}/pretrain/{args.model}_model_T0.pth'
        lifelong_agent_path = f'{args.logdir}/pretrain/{args.model}_synapses_T0.pth'

        if os.path.exists(model_path) and os.path.exists(lifelong_agent_path):
            print(f'[Runner] - pretrain model has already existed!')
            model = torch.load(model_path).to(device)
            lifelong_agent = torch.load(lifelong_agent_path).to(device)
            lifelong_agent.load_config(**config['train']['strategies'])

        else:
            print(f'[Runner] - run pretrain process!')
            preprocessor = OnlinePreprocessor(feat_list=feat_list).to(device)
            model = eval(f'{args.model}')(loss_func, preprocessor, **config['model']).to(device)
            lifelong_agent = LifeLongAgent(model, **config['train']['strategies'])
            pretrain(args, config, model, lifelong_agent)

        print(f'[Runner] - run adaptation process!')
        args.logdir = f'{args.logdir}/{args.mode}'
        if args.mode == 'seril':
            adapt(args, config, model, lifelong_agent)
        elif args.mode == 'finetune':
            adapt(args, config, model)

    if args.do == 'test':
        test(args, config)
Beispiel #9
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()  # output dims is 2 instead of 1

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore_path=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore_path=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        src_classifier = nn.DataParallel(src_classifier)
        tgt_encoder = nn.DataParallel(tgt_encoder)
        discriminator = nn.DataParallel(discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, src_classifier, src_loader)
    src_acc = evaluate(args, src_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    for params in src_encoder.parameters():
        params.requires_grad = False

    # train target encoder by ADDA
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adda_adapt(args, src_encoder, tgt_encoder, discriminator,
                                 src_loader, tgt_train_loader)

    # argument setting
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}"
    )

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, tgt_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt};\n\n")