コード例 #1
0
async def local_training(config, train_loader, test_loader):
    model = MyNet(config.model)

    model.load_state_dict(config.para)
    model = model.to(device)
    
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    test_acc = train(args, config, model, device, train_loader, test_loader, optimizer, config.epoch_num)
    
    config.acc = test_acc 
    config.model = models.Net2Tuple(model)
    config.para = dict(model.named_parameters())

    print("before send")
    await send_data(config, MASTER_IP, MASTER_LISTEN_PORT)
    print("after send")
    config_received = await get_data(LISTEN_PORT, LOCAL_IP)

    for k, v in config_received.__dict__.items():
        setattr(config, k, v)
コード例 #2
0
ファイル: main.py プロジェクト: helboukkouri/recital_2020
def main(args):
    """ Main function. """

    # --------------------------------- DATA ---------------------------------

    # Tokenizer
    logging.disable(logging.INFO)
    tokenizer = BertTokenizer.from_pretrained(os.path.join(
        'pretrained-models', args.embedding),
                                              do_lower_case=args.do_lower_case)
    logging.disable(logging.NOTSET)

    tokenization_function = tokenizer.tokenize

    # Pre-processsing: apply basic tokenization (both) then split into wordpieces (BERT only)
    data = {}
    for split in ['train', 'test']:
        if args.task == 'classification':
            func = load_classification_dataset
        elif args.task == 'sequence_labelling':
            func = load_sequence_labelling_dataset
        else:
            raise NotImplementedError

        data[split] = func(step=split, do_lower_case=args.do_lower_case)
        retokenize(data[split], tokenization_function)

    logging.info('Splitting training data into train / validation sets...')
    data['validation'] = data['train'][:int(args.validation_ratio *
                                            len(data['train']))]
    data['train'] = data['train'][int(args.validation_ratio *
                                      len(data['train'])):]
    logging.info('New number of training sequences: %d', len(data['train']))
    logging.info('New number of validation sequences: %d',
                 len(data['validation']))

    # Count target labels or classes
    if args.task == 'classification':
        counter_all = Counter([
            example.label
            for example in data['train'] + data['validation'] + data['test']
        ])
        counter = Counter([example.label for example in data['train']])

        # Maximum sequence length is either 512 or maximum token sequence length + 5
        max_seq_length = min(
            512, 5 + max(
                map(len, [
                    e.tokens_a if e.tokens_b is None else e.tokens_a +
                    e.tokens_b
                    for e in data['train'] + data['validation'] + data['test']
                ])))
    elif args.task == 'sequence_labelling':
        counter_all = Counter([
            label
            for example in data['train'] + data['validation'] + data['test']
            for label in example.label_sequence
        ])
        counter = Counter([
            label for example in data['train']
            for label in example.label_sequence
        ])

        # Maximum sequence length is either 512 or maximum token sequence length + 5
        max_seq_length = min(
            512, 5 + max(
                map(len, [
                    e.token_sequence
                    for e in data['train'] + data['validation'] + data['test']
                ])))
    else:
        raise NotImplementedError
    labels = sorted(counter_all.keys())
    num_labels = len(labels)

    logging.info("Goal: predict the following labels")
    for i, label in enumerate(labels):
        logging.info("* %s: %s (count: %s)", label, i, counter[label])

    # Input features: list[token indices]
    pad_token_id = tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0]
    pad_token_label_id = None
    if args.task == 'sequence_labelling':
        pad_token_label_id = CrossEntropyLoss().ignore_index

    dataset = {}
    logging.info("Maximum sequence lenght: %s", max_seq_length)
    for split in data:
        dataset[split] = build_and_cache_features(
            args,
            split=split,
            tokenizer=tokenizer,
            examples=data[split],
            labels=labels,
            pad_token_id=pad_token_id,
            pad_token_label_id=pad_token_label_id,
            max_seq_length=max_seq_length)

    del data  # Not used anymore

    # --------------------------------- MODEL ---------------------------------

    # Initialize model
    if args.task == 'classification':
        model = BertForSequenceClassification
    elif args.task == 'sequence_labelling':
        model = BertForTokenClassification
    else:
        raise NotImplementedError

    logging.info('Loading `%s` model...', args.embedding)
    logging.disable(logging.INFO)
    config = BertConfig.from_pretrained(os.path.join('pretrained-models',
                                                     args.embedding),
                                        num_labels=num_labels)
    model = model.from_pretrained(os.path.join('pretrained-models',
                                               args.embedding),
                                  config=config)
    logging.disable(logging.NOTSET)

    model.to(args.device)
    logging.info('Model:\n%s', model)

    # ------------------------------ TRAIN / EVAL ------------------------------

    # Log args
    logging.info('Using the following arguments for training:')
    for k, v in vars(args).items():
        logging.info("* %s: %s", k, v)

    # Training
    if args.do_train:
        global_step, train_loss, best_val_metric, best_val_epoch = train(
            args=args,
            dataset=dataset,
            model=model,
            tokenizer=tokenizer,
            labels=labels,
            pad_token_label_id=pad_token_label_id)
        logging.info("global_step = %s, average training loss = %s",
                     global_step, train_loss)
        logging.info("Best performance: Epoch=%d, Value=%s", best_val_epoch,
                     best_val_metric)

    # Evaluation on test data
    if args.do_predict:

        # Load best model
        if args.task == 'classification':
            model = BertForSequenceClassification
        elif args.task == 'sequence_labelling':
            model = BertForTokenClassification
        else:
            raise NotImplementedError

        logging.disable(logging.INFO)
        model = model.from_pretrained(args.output_dir)
        logging.disable(logging.NOTSET)
        model.to(args.device)

        # Compute predictions and metrics
        results, _ = evaluate(args=args,
                              eval_dataset=dataset["test"],
                              model=model,
                              labels=labels,
                              pad_token_label_id=pad_token_label_id)

        # Save metrics
        with open(os.path.join(args.output_dir, 'performance_on_test_set.txt'),
                  'w') as f:
            f.write(f'best validation score: {best_val_metric}\n')
            f.write(f'best validation epoch: {best_val_epoch}\n')
            f.write('--- Performance on test set ---\n')
            for k, v in results.items():
                f.write(f'{k}: {v}\n')
コード例 #3
0
loaders = [(train_loader_poisson, 'train_loader_poisson'),
           (train_loader_noise, 'train_loader_noise'),
           (train_loader_clean, 'train_loader_clean')]

n_epochs = 40
for train_loader, loader_name in loaders:
    logger = SummaryWriter(f'runs/noise2noise_{loader_name}')
    print(
        f"\n\nTraining noise2noise for {n_epochs} epochs with loader {loader_name}"
    )
    for epoch in tqdm.tqdm(range(n_epochs), total=n_epochs):
        # train
        train(net,
              train_loader,
              optimizer,
              LOSS_CRITERION,
              epoch,
              log_interval=25,
              tb_logger=logger,
              device=device)
        step = epoch * len(train_loader.dataset)
        # validate
        validate(net,
                 val_loader,
                 LOSS_CRITERION,
                 EVAL_METRIC,
                 step=step,
                 tb_logger=logger,
                 device=device)
"""## Exercises

1. Train a separete denoising model using clean target and compare the PSNR scores with those obtained with noise2noise model. Compare results of the two models visually in tensorboard.
    print(datetime.now(), len(train_id_type_list), len(val_id_type_list))
    assert len(to_set(train_id_type_list)
               & to_set(val_id_type_list)) == 0, "WTF"

    cnn = params['network'](lr=params['lr_kwargs']['lr'],
                            **params,
                            **params['network_kwargs'])
    params['save_prefix'] = params['save_prefix_template'].format(
        cnn_name=cnn.name, fold_index=val_fold_index - 1)
    print("\n {} - Loaded {} model ...".format(datetime.now(), cnn.name))

    if 'pretrained_model' in params:
        load_pretrained_model(cnn, **params)

    print("\n {} - Start training ...".format(datetime.now()))
    h = train(cnn, train_id_type_list, val_id_type_list, **params)
    if h is None:
        continue
    hists.append(h)

# ### Validation all classes

n_runs = 2
n_folds = 5
run_counter = 0
cv_mean_scores = np.zeros((n_runs, n_folds))
val_fold_indices = []  # !!! CHECK BEFORE LOAD TO FLOYD

params['pretrained_model'] = 'load_best'

_trainval_id_type_list = np.array(trainval_id_type_list)
        # build the dice coefficient metric
        metric = DiceCoefficient()
        # train for 25 epochs
        start = int(time.time())
        stop = 0
        best_accuracy = 0.
        checkpoint_name = './best_checkpoint_{name}_{loss_name}.tar'.format(
            name=name, loss_name=loss_name)
        best_epoch = 0
        for epoch in tqdm.tqdm(range(n_epochs), total=n_epochs):
            # train
            train(net,
                  train_loader,
                  optimizer,
                  loss_function,
                  epoch,
                  tb_logger=logger,
                  device=device)

            step = epoch * len(train_loader.dataset)
            # validate
            _, acc = validate(net,
                              val_loader,
                              loss_function,
                              metric,
                              step=step,
                              tb_logger=logger,
                              device=device,
                              optimizer=optimizer)
コード例 #6
0
ファイル: main.py プロジェクト: markdjthomas/ssl_experiments
def main():
    global best_metrics

    # Parse the arguments
    args = parser.parse_args()

    # Create the SummaryWriter for Tensorboard
    args.writer = SummaryWriter('./logs/tensorboard/{}'.format(args.run_id))

    # Set the RNG seegs
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. \
                       This will turn on the CUDNN deterministic setting, \
                       which can slow down your training considerably! \
                       You may see unexpected behavior when restarting \
                       from checkpoints.')

    # Print out the training setup
    print('New training run...\n')
    print('   Run ID:            {}'.format(args.run_id))
    print('   Architecture:      {}'.format(args.arch))
    print('   Batch size:        {}'.format(args.batch_size))
    print('   Learning rate:     {}'.format(args.learning_rate))
    print('   Decay rate:        {}\n'.format(args.decay_rate))

    # Create the model
    print("=> creating model...")
    device = torch.device('cuda')
    model = models.__dict__[args.arch](pretrained=False,
                                       num_classes=args.classes).to(device)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=args.decay_rate, patience=10)
    cudnn.benchmark = True

    # Create the datasets and loaders
    print('=> creating the datasets and iterators')

    # Create the training dataset and loader
    training_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
    ])

    training_dataset = datasets.CIFAR10('./data',
                                        train=True,
                                        download=True,
                                        transform=training_transform)
    training_loader = torch.utils.data.DataLoader(training_dataset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True)

    validation_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
    ])

    validation_dataset = datasets.CIFAR10('./data',
                                          train=False,
                                          transform=validation_transform)
    validation_loader = torch.utils.data.DataLoader(validation_dataset,
                                                    batch_size=args.batch_size,
                                                    shuffle=True)

    # Save the lengths of the data loaders for Tensorboard
    args.train_loader_len = len(training_loader)
    args.validation_loader_len = len(validation_loader)

    # Train the model
    print('=> starting the training\n')
    for epoch in range(args.epochs):
        # Set the current epoch to be used by Tensorboard
        args.current_epoch = epoch

        # Take a training step
        train(training_loader, model, criterion, optimizer, epoch, device,
              args)

        # Evaluate on validation set and check if it is the current best
        val_loss, metrics = validate(validation_loader, model, criterion,
                                     device, args)
        best_metrics, is_best = test_best_metrics(metrics, best_metrics)

        # Take a step using the learning rate scheduler
        lr_scheduler.step(val_loss)

        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_acc': best_metrics[0],
                'best_pre': best_metrics[1],
                'best_rec': best_metrics[2]
            }, is_best, args)

    # Close the Tensorboard writer
    args.writer.close()
コード例 #7
0
        placeholder_x = [0] * 20
        placeholder_y = [0] * 5

        train_episodes = training_utils.bucketing(
            bucket_size=mini_batch_size,
            episodes=train_episodes,
            placeholder_timestep=[placeholder_x, placeholder_y])
        #test_episodes = training_utils.bucketing(bucket_size=mini_batch_size, episodes=test_episodes, placeholder_timestep=[placeholder_x, placeholder_y])
        validation_episodes = training_utils.bucketing(
            bucket_size=mini_batch_size,
            episodes=validation_episodes,
            placeholder_timestep=[placeholder_x, placeholder_y])

        training_utils.train(train_data_xy=train_episodes,
                             validation_data_xy=validation_episodes,
                             model=model,
                             batch_size=mini_batch_size,
                             G=G)

        model = reload_model(model, batch_size=1)

        success = 0
        for i in range(0, 5):
            ml_dur, gt_dur = run_optimality_evaluation(G)

            if ml_dur != None and gt_dur != None:
                success += 1

        print("Success: ", success)

    # arg_parser = create_arg_parser()
コード例 #8
0
def main():
    args = get_args()

    if args.model_type in ["bert", "roberta", "distilbert", "camembert"] and not \
            (args.mlm or args.token_discrimination or args.mask_token_discrimination):
        raise ValueError(
            "BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the --mlm "
            "flag (masked language modeling).")
    if args.eval_data_file is None and args.do_eval:
        raise ValueError(
            "Cannot do evaluation without an evaluation data file. Either supply a file to --eval_data_file "
            "or remove the --do_eval argument.")
    if args.should_continue:
        sorted_checkpoints = _sorted_checkpoints(args)
        if len(sorted_checkpoints) == 0:
            raise ValueError(
                "Used --should_continue but no checkpoint was found in --output_dir."
            )
        else:
            args.model_name_or_path = sorted_checkpoints[-1]

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Barrier to make sure only the first process in distributed training download model & vocab

    if args.config_name:
        config = AutoConfig.from_pretrained(args.config_name,
                                            cache_dir=args.cache_dir)
    elif args.model_name_or_path:
        config = AutoConfig.from_pretrained(args.model_name_or_path,
                                            cache_dir=args.cache_dir)
    else:
        # When we release a pip version exposing CONFIG_MAPPING,
        # we can do `config = CONFIG_MAPPING[args.model_type]()`.
        raise ValueError(
            "You are instantiating a new config instance from scratch. This is not supported, but you can do it from another script, save it,"
            "and load it from here, using --config_name")

    if args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name,
                                                  cache_dir=args.cache_dir)
    elif args.model_name_or_path:
        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path,
                                                  cache_dir=args.cache_dir)
    else:
        raise ValueError(
            "You are instantiating a new tokenizer from scratch. This is not supported, but you can do it from another script, save it,"
            "and load it from here, using --tokenizer_name")

    if args.block_size <= 0:
        args.block_size = tokenizer.max_len
        # Our input block size will be the max possible for the model
    else:
        args.block_size = min(args.block_size, tokenizer.max_len)

    if args.model_name_or_path and (args.token_discrimination
                                    or args.mask_token_discrimination):
        model = RobertaForTokenDiscrimination.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
            cache_dir=args.cache_dir,
        )
    elif args.model_name_or_path and args.mlm:
        model = AutoModelWithLMHead.from_pretrained(
            args.model_name_or_path,
            from_tf=bool(".ckpt" in args.model_name_or_path),
            config=config,
            cache_dir=args.cache_dir,
        )
    else:
        logger.info("Training new model from scratch")
        model = AutoModelWithLMHead.from_config(config)

    model.to(args.device)

    if args.local_rank == 0:
        torch.distributed.barrier()  # End of barrier
        # to make sure only the first process
        # in distributed training download model & vocab

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        if args.local_rank not in [-1, 0]:
            torch.distributed.barrier(
            )  # Barrier to make sure only the first process in distributed training process the dataset, and the others will use the cache

        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False)

        if args.local_rank == 0:
            torch.distributed.barrier()

        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use save_pretrained for the model and tokenizer,
    # you can reload them using from_pretrained()
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir, exist_ok=True)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        if args.mlm:
            model = AutoModelWithLMHead.from_pretrained(args.output_dir)
        elif args.token_discrimination or args.mask_token_discrimination:
            model = RobertaForTokenDiscrimination.from_pretrained(
                args.output_dir)
        else:
            raise NotImplementedError(
                'only mlm and token discrimination loss supported')

        tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                "/")[-1] if checkpoint.find("checkpoint") != -1 else ""

            if args.mlm:
                model = AutoModelWithLMHead.from_pretrained(checkpoint)
            elif args.token_discrimination or args.mask_token_discrimination:
                model = RobertaForTokenDiscrimination.from_pretrained(
                    checkpoint)
            else:
                raise NotImplementedError(
                    'only mlm and token discrimination loss supported')

            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=prefix)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)

    return results
コード例 #9
0
from training_utils import train
import sys

MORGAN_FP_SIZE = 1024
MORGAN_FP_RADIUS = 2
N_EPISODES = 2000
EPISODE_LENGTH = 45
BOOTSTRAP_HEADS = 5

if __name__ == '__main__':
    agent = BootstrappedDQN(MORGAN_FP_SIZE,
                            MORGAN_FP_SIZE,
                            n_heads=BOOTSTRAP_HEADS)
    if sys.argv[1] == 'qed':
        env = QEDMolEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH)
    elif sys.argv[1] == 'penalized_logp':
        env = PenalizedLogpEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH)
    elif sys.argv[1] == 'benchmark':
        env = BenchmarkEnv({'C', 'O', 'N', 'Cl'}, max_steps=EPISODE_LENGTH)
    else:
        print('BAD ARGS')
        sys.exit(1)
    mfp = MorganFingerprintProvider(MORGAN_FP_SIZE, MORGAN_FP_RADIUS)

    molecule_pool = list(train(env, agent, mfp, N_EPISODES, EPISODE_LENGTH))

    with open('OUT_MOLS_%s.smiles' % sys.argv[1], 'w') as f:
        for m in molecule_pool:
            if m is None: continue
            f.write('%s\n' % m)
    print('DONE')
コード例 #10
0
        nn.ReLU(inplace=True),
        nn.BatchNorm2d(64),
        nn.Conv2d(64, 64, kernel_size=3, stride=1),
        nn.ReLU(inplace=True),
        nn.BatchNorm2d(64),
        nn.AdaptiveMaxPool2d(32),
        ## 32x32
        Flatten(),
        nn.Linear(64*32*32, 1024),
        nn.ReLU(inplace=True),
        nn.Linear(1024, 17)
    )
    model.type(dtype)

    loss_fn = nn.MultiLabelSoftMarginLoss().type(dtype)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    ## don't load model params from file - instead retrain the model
    if not from_pickle:
        train(train_loader, model, loss_fn, optimizer, dtype, print_every=10)
        ## serialize model data and save as .pkl file
        torch.save(model.state_dict(), save_model_path)
        print("model saved as {}".format(os.path.abspath(save_model_path)))
    ## load model params from file
    else:
        state_dict = torch.load(save_model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(state_dict)
        print("model loaded from {}".format(os.path.abspath(save_model_path)))

    acc = validate_epoch(model, val_loader, dtype)
    print(acc)
コード例 #11
0
    ## loader
    train_loader = DataLoader(
        training_dataset,
        batch_size=256,
        shuffle=True,
        num_workers=4  # 0 for CUDA
    )
    ## simple linear model
    model = nn.Sequential(nn.Conv2d(4, 16, kernel_size=3, stride=1),
                          nn.ReLU(inplace=True), nn.BatchNorm2d(16),
                          nn.AdaptiveMaxPool2d(128),
                          nn.Conv2d(16, 32, kernel_size=3, stride=1),
                          nn.ReLU(inplace=True), nn.BatchNorm2d(32),
                          nn.AdaptiveMaxPool2d(64), Flatten(),
                          nn.Linear(32 * 64 * 64, 1024), nn.ReLU(inplace=True),
                          nn.Linear(1024, 17))
    model.type(dtype)

    loss_fn = nn.BCELoss().type(dtype)
    optimizer = optim.Adam(model.parameters(), lr=5e-2)
    ## don't load model params from file - instead retrain the model
    if not from_pickle:
        train(train_loader, model, loss_fn, optimizer, dtype)
        ## serialize model data and save as .pkl file
        torch.save(model.state_dict(), save_model_path)
        print("model saved as {}".format(os.path.abspath))
    ## load model params from file
    else:
        state_dict = torch.load(save_model_path)
        model.load_state_dict(state_dict)
コード例 #12
0
for t, (x, y) in enumerate(train_loader):
	x_var = Variable(x.type(dtype)).cuda()
	size=temp_model(x_var).size()
	if(t==0):
		break

model = nn.Sequential(
nn.Conv2d(4, 16, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(16),
nn.AdaptiveMaxPool2d(128),
nn.Conv2d(16, 32, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.AdaptiveMaxPool2d(64),
Flatten(),
nn.Linear(size[1], 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 17))

model.type(dtype)
model.train()
loss_fn = nn.MultiLabelSoftMarginLoss().type(dtype)
optimizer = optim.Adam(model.parameters(), lr=5e-2)
torch.cuda.synchronize()
train(train_loader, model, loss_fn, optimizer, dtype,num_epochs=1, print_every=10)

torch.save(model.state_dict(), save_model_path)
state_dict = torch.load(save_model_path)
model.load_state_dict(state_dict)
コード例 #13
0
def main():

    total_steps = 0

    results_file = open(RESULTS_WEIGHTS_PATH.joinpath("results.txt"), "w")

    age_criterion = nn.MSELoss()
    sex_criterion = nn.BCELoss()

    age_pred = torch.empty(0).to(DEVICE)
    age_data = torch.empty(0).to(DEVICE)
    sex_pred = torch.empty(0).to(DEVICE)
    sex_data = torch.empty(0).to(DEVICE)

    for i in range(N_FOLDS):
        model = m.Dasnet().to(DEVICE)
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         lr=1.0,
                                         rho=0.95,
                                         eps=1e-06)

        if FIXED_GROUPS:
            training_gen, eval_gen, test_gen = dat_ut.fixed_dataset_generator()
        elif STATIC_TEST:
            training_gen, eval_gen, test_gen = dat_ut.kfold_generator_simple(i)
        else:
            training_gen, eval_gen, test_gen = dat_ut.kfold_generator_simple(i)

        best_epoch_result = [-1, -1, -1, -1]
        best_epoch = 0
        best_epoch_model = dict()
        no_upgrade_cont = 0

        for k in range(1, N_EPOCHS + 1):

            train_ut.train(model, training_gen, age_criterion, sex_criterion,
                           optimizer)
            _, _, _, _, total_loss, age_loss, sex_loss, avg_age_diff, avg_sex_diff = train_ut.validate(
                model, eval_gen, age_criterion, sex_criterion)
            if best_epoch_result[0] >= total_loss or best_epoch_result[0] == -1:
                best_epoch_result = [
                    total_loss, age_loss, sex_loss, avg_age_diff, avg_sex_diff
                ]
                best_epoch_model = model.state_dict()
                best_epoch = k
                no_upgrade_cont = 0

            if best_epoch_result[0] < total_loss:
                no_upgrade_cont += 1

            if no_upgrade_cont == MAX_ITER_NO_IMPROVE:
                print("UPGRADE FIN / EPOCH: {}".format(best_epoch),
                      file=results_file)
                print("FINAL EPOCH: {}".format(k), file=results_file)
                break

        model.load_state_dict(best_epoch_model)

        torch.save(
            model.state_dict(),
            RESULTS_WEIGHTS_PATH.joinpath("model_weights.pth".format(i)))
        age, age_out, sex, sex_out, total_test_loss, age_test_loss, sex_test_loss, avg_age_diff, avg_sex_diff = train_ut.validate(
            model, test_gen, age_criterion, sex_criterion, 'test')

        print(
            "TEST :: TOTAL LOSS = {} \nAGE_LOSS = {} / SEX_LOSS = {} \nAVG_AGE_DIFF = {} / AVG_SEX_DIFF = {}"
            .format(total_test_loss, age_test_loss, sex_test_loss,
                    avg_age_diff, avg_sex_diff),
            file=results_file)

        age_data = torch.cat((age_data, age), 0)
        age_pred = torch.cat((age_pred, age_out), 0)
        sex_data = torch.cat((sex_data, sex), 0)
        sex_pred = torch.cat((sex_pred, sex_out), 0)

        print_metrics(age, age_out, sex, sex_out, results_file)

    save_results(age_data, age_pred, sex_data, sex_pred)
    results_file.close()