Esempio n. 1
0
def init(config, _run):
    args = SimpleNamespace(**config)
    assertions.validate_hypers(args)
    mlh.seed_all(args.seed)

    args.data_path = assertions.validate_dataset_path(args)

    if args.activation is not None:
        if 'relu' in args.activation:
            args.activation = torch.nn.ReLU()
        elif 'elu' in args.activation:
            args.activation = torch.nn.ELU()
        else:
            args.activation = torch.nn.ReLU()

    args._run = _run

    Path(args.artifact_dir).mkdir(exist_ok=True)

    args.loss_name = args.loss

    if args.cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
        args.cuda = True
    else:
        args.device = torch.device('cpu')
        args.cuda = False

    args.partition_scheduler = updates.get_partition_scheduler(args)
    args.partition = util.get_partition(args)

    args.data_path = Path(args.data_path)
    return args
Esempio n. 2
0
def init(config, _run):
    args = SimpleNamespace(**config)
    assertions.validate_hypers(args)
    mlh.seed_all(args.seed)

    args.data_path = assertions.validate_dataset_path(args)

    if args.activation is not None:
        if 'relu' in args.activation:
            args.activation = torch.nn.ReLU()
        elif 'elu' in args.activation:
            args.activation = torch.nn.ELU()
        else:
            args.activation = torch.nn.ReLU()

    args._run = _run
    args.model_dir = args.artifact_dir

    if args.checkpoint or args.record:
        unique_directory = Path(args.model_dir) / str(uuid.uuid4())
        unique_directory.mkdir(parents=True)
        args.unique_directory = unique_directory

        # Save args json for grepability
        with open(args.unique_directory / 'args.json', 'w') as outfile:
            json.dump(dict(config), outfile, indent=4)

    args.loss_name = args.loss

    if args.cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
        args.cuda = True
    else:
        args.device = torch.device('cpu')
        args.cuda = False

    args.partition_scheduler = updates.get_partition_scheduler(args)
    args.partition = util.get_partition(args)

    args.per_batch = False if (args.per_batch
                               and args.per_sample) else args.per_batch
    args.data_path = Path(args.data_path)
    return args
Esempio n. 3
0
def init(config, run):
    # general init
    args = SimpleNamespace(**config)
    args = assertions.validate_args(args)
    mlh.seed_all(args.seed)
    args._run = run
    args.wandb = wandb

    # init scheduler
    args.partition_scheduler = schedules.get_partition_scheduler(args)
    args.partition = util.get_partition(args)

    # init data
    train_data_loader, test_data_loader = get_data(args)
    args.train_data_loader = train_data_loader
    args.test_data_loader = test_data_loader

    # init model
    model = get_model(train_data_loader, args)

    # init optimizer
    model.init_optimizer()

    return model, args
Esempio n. 4
0
def train(args):
    # read data
    with args._run.open_resource(args.data_path, 'rb') as file_handle:
        data = pickle.load(file_handle)

    train_image = data['train_image']
    test_image = data['test_image']

    train_data_loader = get_data_loader(train_image, args.batch_size, args)
    test_data_loader = get_data_loader(test_image, args.test_batch_size, args)

    # Make models
    train_obs_mean = util.tensor(np.mean(train_image, axis=0), args)
    generative_model, inference_network = util.init_models(
        train_obs_mean, args)

    # Make partition
    args.partition = util.get_partition(args.K, args.partition_type,
                                        args.log_beta_min, args.device)

    # Make optimizer
    parameters = itertools.chain.from_iterable(
        [x.parameters() for x in [generative_model, inference_network]])
    optimizer = torch.optim.Adam(parameters, lr=args.lr)

    for epoch in range(args.epochs):
        epoch_train_elbo = 0
        for idx, data in enumerate(train_data_loader):
            optimizer.zero_grad()
            loss, elbo = args.loss(generative_model, inference_network, data,
                                   args,
                                   args.valid_S)  #TODO add alpha lower bound
            loss.backward()
            optimizer.step()
            epoch_train_elbo += elbo.item()

        if (args.save_grads and (epoch % args.test_frequency) == 0):
            # Save grads
            grad_variance = util.calculate_grad_variance(
                generative_model, inference_network, data, args)
            log_scalar("grad.variance", grad_variance, epoch, verbose=True)

        if torch.isnan(loss):
            break

        epoch_train_elbo = epoch_train_elbo / len(train_data_loader)
        log_scalar("train.elbo", epoch_train_elbo, epoch)

        if (args.checkpoint and (epoch != 0)
                and ((epoch % args.checkpoint_frequency) == 0)):
            save_checkpoint(generative_model, inference_network, epoch,
                            epoch_train_elbo, optimizer, args)

        if args.train_only: continue

        # run test set
        if (epoch == (args.epochs - 1)) or \
           (args.test_during_training and ((epoch % args.test_frequency) == 0)):
            print("Running test set...")
            test_elbo = 0
            with torch.no_grad():
                for idx, data in enumerate(test_data_loader):
                    _, elbo = args.loss(generative_model, inference_network,
                                        data, args, args.test_S)
                    test_elbo += elbo.item()

            test_elbo = test_elbo / len(test_data_loader)
            log_scalar("test.elbo", test_elbo, epoch)

        # ------ end of training loop ---------

    # Save trained model
    if args.checkpoint:
        save_checkpoint(generative_model, inference_network, epoch,
                        epoch_train_elbo, optimizer, args)

    if args.train_only:
        return None
    else:
        results = {"test_elbo": test_elbo if not args.train_only else None}
        return results