예제 #1
0
def main(args):
    # cuda
    cuda_exp = args.cuda_exp == "true"

    # Notebook experiment settings
    experiment_name = args.experiment_name
    experiment_results_folder = args.results_folder
    results_path = os.path.join("../", experiment_results_folder)
    data_folder = args.data_folder
    data_file = args.data_file

    # Regularization settings
    if args.noise_reg_scheduler == "constant":
        noise_reg_schedule = constant_regularization_schedule
    elif args.noise_reg_scheduler == "sqrt":
        noise_reg_schedule = square_root_noise_schedule
    elif args.noise_reg_scheduler == "rot":
        noise_reg_schedule = rule_of_thumb_noise_schedule
    else:
        noise_reg_schedule = constant_regularization_schedule

    noise_reg_sigma = args.noise_reg_sigma  # Used as sigma in rule of thumb and as noise in const

    l2_reg = args.l2_reg
    initial_lr = args.initial_lr
    lr_factor = args.lr_factor
    lr_patience = args.lr_patience
    min_lr = args.min_lr

    # Data settings
    obs_cols = args.obs_cols
    context_cols = args.context_cols

    # Training settings
    epochs = args.epochs
    batch_size = args.batch_size
    clipped_adam = args.clipped_adam

    # Dimensions of problem
    problem_dim = len(args.obs_cols)
    context_dim = len(context_cols)

    # Flow settings
    flow_depth = args.flow_depth
    c_net_depth = args.c_net_depth
    c_net_h_dim = args.c_net_h_dim
    batchnorm_momentum = args.batchnorm_momentum

    # Define context conditioner
    context_n_depth = args.context_n_depth
    context_n_h_dim = args.context_n_h_dim
    rich_context_dim = args.rich_context_dim

    settings_dict = {
        "epochs": epochs,
        "batch_size": batch_size,
        "problem_dim": problem_dim,
        "context_dim": context_dim,
        "flow_depth": flow_depth,
        "c_net_depth": c_net_depth,
        "c_net_h_dim": c_net_h_dim,
        "context_n_depth": context_n_depth,
        "context_n_h_dim": context_n_h_dim,
        "rich_context_dim": rich_context_dim,
        "obs_cols": obs_cols,
        "context_cols": context_cols,
        "batchnorm_momentum": batchnorm_momentum,
        "l2_reg": l2_reg,
        "clipped_adam": clipped_adam,
        "noise_reg_schedule": args.noise_reg_scheduler,
        "noise_reg_sigma": noise_reg_sigma,
        "initial_lr": initial_lr,
        "lr_factor": lr_factor,
        "lr_patience": lr_patience,
        "min_lr": min_lr
    }

    print(f"Settings:\n{settings_dict}")

    # Load data
    csv_path = os.path.join(data_folder, data_file)
    df = pd.read_csv(csv_path)

    train_dataloader, test_dataloader, obs_scaler, context_scaler = simple_data_split_conditional(
        df=df,
        obs_cols=obs_cols,
        context_cols=context_cols,
        batch_size=batch_size,
        cuda_exp=True)

    # Define stuff for reqularization
    data_size = len(train_dataloader)
    data_dim = problem_dim + context_dim

    # Define normalizing flow
    normalizing_flow = combi_conditional_normalizing_flow_factory(
        flow_depth=flow_depth,
        problem_dim=problem_dim,
        c_net_depth=c_net_depth,
        c_net_h_dim=c_net_h_dim,
        context_dim=context_dim,
        context_n_h_dim=context_n_h_dim,
        context_n_depth=context_n_depth,
        rich_context_dim=rich_context_dim,
        cuda=cuda_exp,
        batchnorm_momentum=batchnorm_momentum)

    # Setup Optimizer
    if clipped_adam is None:
        if l2_reg is None:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr)
        else:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr,
                                   weight_decay=l2_reg)
    else:
        if l2_reg is None:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    clip_norm=clipped_adam)
        else:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    weight_decay=l2_reg,
                                    clip_norm=clipped_adam)

    if lr_factor is not None:
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                         factor=lr_factor,
                                                         patience=lr_patience,
                                                         min_lr=min_lr,
                                                         verbose=True)

    # Setup regularization
    h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma)
    noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp)

    # Train and test sizes
    n_train = train_dataloader.dataset.shape[0]
    n_test = test_dataloader.dataset.shape[0]

    # Training loop
    full_train_losses = []
    train_losses = []
    test_losses = []
    no_noise_losses = []
    lr_scheduler_steps = []

    for epoch in range(1, epochs + 1):

        normalizing_flow.modules.train()
        train_epoch_loss = 0
        for k, batch in enumerate(train_dataloader):
            # Add noise reg to two moons
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            context = batch[:, problem_dim:]

            # Condition the flow on the sampled covariate and calculate -log_prob = loss
            conditioned_flow_dist = normalizing_flow.condition(context)
            loss = -conditioned_flow_dist.log_prob(x).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_epoch_loss += loss.item()
        full_train_losses.append(train_epoch_loss / n_train)

        # calculate test loss
        normalizing_flow.modules.eval()
        with torch.no_grad():
            test_epoch_loss = 0
            for j, batch in enumerate(test_dataloader):
                # Sample covariates and use them to sample from conditioned two_moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                test_loss = -conditioned_flow_dist.log_prob(x).sum()

                test_epoch_loss += test_loss.item()

            # save every 10 epoch to log and eval
            if epoch % 10 == 0 or epoch == epochs - 1:
                normalizing_flow.modules.eval()
                train_losses.append(train_epoch_loss / n_train)
                test_losses.append(test_epoch_loss / n_test)

                no_noise_epoch_loss = 0
                for k, batch in enumerate(train_dataloader):
                    # Add noise reg to two moons
                    x = batch[:, :problem_dim]
                    context = batch[:, problem_dim:]

                    # Condition the flow on the sampled covariate and calculate -log_prob = loss
                    conditioned_flow_dist = normalizing_flow.condition(context)
                    loss = -conditioned_flow_dist.log_prob(x).sum()

                    no_noise_epoch_loss += loss.item()
                no_noise_losses.append(no_noise_epoch_loss / n_train)

            if epoch % 100 == 0:
                print(
                    f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
                )

        # Take scheduler step if needed
        if lr_factor is not None:
            scheduler.step(test_epoch_loss / n_test)
            lr_scheduler_steps.append(epoch)

        # Plot Epoch results if epoch == epochs-1:
        if epoch == epochs - 1:
            normalizing_flow.modules.eval()
            print(
                f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
            )
    experiment_dict = {
        'train': train_losses,
        'test': test_losses,
        'no_noise_losses': no_noise_losses,
        'lr_steps': lr_scheduler_steps
    }

    results_dict = {
        'model': normalizing_flow,
        'settings': settings_dict,
        'logs': experiment_dict
    }

    file_name = f"{experiment_name}.pickle"
    file_path = os.path.join(results_path, file_name)

    print(f"Saving: {file_name}")
    with open(file_path, 'wb') as f:
        pickle.dump(results_dict, f)
예제 #2
0
def main(args):
    # cuda
    cuda_exp = args.cuda_exp == "true"

    print(cuda_exp)

    # Notebook experiment settings
    experiment_name = args.experiment_name
    experiment_results_folder = args.results_folder
    results_path = os.path.join("../", experiment_results_folder)
    data_folder = args.data_folder
    data_file = args.data_file

    # Regularization settings
    if args.noise_reg_scheduler == "constant":
        noise_reg_schedule = constant_regularization_schedule
    elif args.noise_reg_scheduler == "sqrt":
        noise_reg_schedule = square_root_noise_schedule
    elif args.noise_reg_scheduler == "rot":
        noise_reg_schedule = rule_of_thumb_noise_schedule
    else:
        noise_reg_schedule = constant_regularization_schedule

    noise_reg_sigma = args.noise_reg_sigma  # Used as sigma in rule of thumb and as noise in const

    # Data settings
    obs_cols = args.obs_cols
    context_cols = args.context_cols

    # Training settings
    epochs = args.epochs
    batch_size = args.batch_size

    # Dimensions of problem
    problem_dim = len(args.obs_cols)
    context_dim = len(args.context_cols)

    # Flow settings
    flow_depth = args.flow_depth
    c_net_depth = args.c_net_depth
    c_net_h_dim = args.c_net_h_dim

    # Define context conditioner
    context_n_depth = args.context_n_depth
    context_n_h_dim = args.context_n_h_dim
    rich_context_dim = args.rich_context_dim

    settings_dict = {
        "epochs": epochs,
        "batch_size": batch_size,
        "problem_dim": problem_dim,
        "context_dim": context_dim,
        "flow_depth": flow_depth,
        "c_net_depth": c_net_depth,
        "c_net_h_dim": c_net_h_dim,
        "context_n_depth": context_n_depth,
        "context_n_h_dim": context_n_h_dim,
        "rich_context_dim": rich_context_dim,
        "obs_cols": obs_cols,
        "context_cols": context_cols
    }

    print(f"Settings:\n{settings_dict}")

    # Load data
    csv_path = os.path.join(data_folder, data_file)
    donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11])

    train_dataloader, test_dataloader, _, _ = searchlog_day_split(
        donkey_df, obs_cols, context_cols, batch_size, cuda_exp)
    train_idx, test_idx = get_split_idx_on_day(donkey_df)

    run_idxs = {'train': train_idx, 'test': test_idx}

    # Define stuff for reqularization
    data_size = len(train_dataloader)
    data_dim = problem_dim + context_dim

    # Define normalizing flow
    normalizing_flow = conditional_normalizing_flow_factory2(
        flow_depth=flow_depth,
        problem_dim=problem_dim,
        c_net_depth=c_net_depth,
        c_net_h_dim=c_net_h_dim,
        context_dim=context_dim,
        context_n_h_dim=context_n_h_dim,
        context_n_depth=context_n_depth,
        rich_context_dim=rich_context_dim,
        cuda=cuda_exp)

    # Setup Optimizer
    optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=1e-4)
    print("number of params: ",
          sum(p.numel() for p in normalizing_flow.modules.parameters()))

    # Setup regularization
    h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma)
    noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp)
    print(f"Data size: {train_dataloader.dataset.shape}")
    print(f"Noise scale: {h}")

    # Train and test sizes
    n_train = train_dataloader.dataset.shape[0]
    n_test = test_dataloader.dataset.shape[0]
    print(f"n_train {n_train}")
    print(f"n_test {n_test}")

    # Training loop
    full_train_losses = []
    train_losses = []
    test_losses = []
    no_noise_losses = []

    for epoch in range(1, epochs + 1):

        normalizing_flow.modules.train()
        train_epoch_loss = 0
        for k, batch in enumerate(train_dataloader):
            # Add noise reg to two moons
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            context = batch[:, problem_dim:]

            # Condition the flow on the sampled covariate and calculate -log_prob = loss
            conditioned_flow_dist = normalizing_flow.condition(context)
            loss = -conditioned_flow_dist.log_prob(x).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_epoch_loss += loss.item()
        full_train_losses.append(train_epoch_loss / n_train)

        # save every 10 epoch to log and eval
        if epoch % 10 == 0 or epoch == epochs - 1:
            normalizing_flow.modules.eval()
            train_losses.append(train_epoch_loss / n_train)

            no_noise_epoch_loss = 0
            for k, batch in enumerate(train_dataloader):
                # Add noise reg to two moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                loss = -conditioned_flow_dist.log_prob(x).sum()

                no_noise_epoch_loss += loss.item()
            no_noise_losses.append(no_noise_epoch_loss / n_train)

            test_epoch_loss = 0
            for j, batch in enumerate(test_dataloader):
                # Sample covariates and use them to sample from conditioned two_moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                test_loss = -conditioned_flow_dist.log_prob(x).sum()

                test_epoch_loss += test_loss.item()
            test_losses.append(test_epoch_loss / n_test)

        if epoch % 100 == 0:
            print(
                f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
            )

        # Plot Epoch results if epoch == epochs-1:
        if epoch == epochs - 1:
            normalizing_flow.modules.eval()
            print(
                f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
            )
    experiment_dict = {
        'train': train_losses,
        'test': test_losses,
        'no_noise_losses': no_noise_losses
    }

    results_dict = {
        'model': normalizing_flow,
        'settings': settings_dict,
        'logs': experiment_dict,
        'data_split': run_idxs
    }

    file_name = f"{experiment_name}.pickle"
    file_path = os.path.join(results_path, file_name)

    print(f"Saving: {file_name}")
    with open(file_path, 'wb') as f:
        saved_flow = pickle.dump(results_dict, f)
예제 #3
0
def main(args):
    # cuda
    cuda_exp = args.cuda_exp == "true"

    # Notebook experiment settings
    experiment_name = args.experiment_name
    experiment_results_folder = args.results_folder
    results_path = os.path.join("../", experiment_results_folder)
    data_folder = args.data_folder
    data_file = args.data_file
    extra_data_file = args.extra_data_file

    # Regularization settings
    if args.noise_reg_scheduler == "constant":
        noise_reg_schedule = constant_regularization_schedule
    elif args.noise_reg_scheduler == "sqrt":
        noise_reg_schedule = square_root_noise_schedule
    elif args.noise_reg_scheduler == "rot":
        noise_reg_schedule = rule_of_thumb_noise_schedule
    else:
        noise_reg_schedule = constant_regularization_schedule

    noise_reg_sigma = args.noise_reg_sigma  # Used as sigma in rule of thumb and as noise in const

    l2_reg = args.l2_reg
    initial_lr = args.initial_lr
    lr_factor = args.lr_factor
    lr_patience = args.lr_patience
    min_lr = args.min_lr

    # Data settings
    obs_cols = args.obs_cols
    semisup_context_cols = args.semisup_context_cols
    sup_context_cols = args.sup_context_cols

    if sup_context_cols is None:
        context_cols = semisup_context_cols
    else:
        context_cols = semisup_context_cols + sup_context_cols

    # Training settings
    epochs = args.epochs
    batch_size = args.batch_size
    clipped_adam = args.clipped_adam

    # Dimensions of problem
    problem_dim = len(args.obs_cols)
    context_dim = len(context_cols)

    # Flow settings
    flow_depth = args.flow_depth
    c_net_depth = args.c_net_depth
    c_net_h_dim = args.c_net_h_dim
    batchnorm_momentum = args.batchnorm_momentum

    # Define context conditioner
    context_n_depth = args.context_n_depth
    context_n_h_dim = args.context_n_h_dim
    rich_context_dim = args.rich_context_dim

    settings_dict = {
        "epochs": epochs,
        "batch_size": batch_size,
        "problem_dim": problem_dim,
        "context_dim": context_dim,
        "flow_depth": flow_depth,
        "c_net_depth": c_net_depth,
        "c_net_h_dim": c_net_h_dim,
        "context_n_depth": context_n_depth,
        "context_n_h_dim": context_n_h_dim,
        "rich_context_dim": rich_context_dim,
        "obs_cols": obs_cols,
        "context_cols": context_cols,
        "semisup_context_cols": semisup_context_cols,
        "sup_context_context_cols": sup_context_cols,
        "batchnorm_momentum": batchnorm_momentum,
        "l2_reg": l2_reg,
        "clipped_adam": clipped_adam,
        "initial_lr": initial_lr,
        "lr_factor": lr_factor,
        "lr_patience": lr_patience,
        "min_lr": min_lr,
        "noise_reg_sigma": noise_reg_sigma
    }

    print(f"Settings:\n{settings_dict}")

    # Load data
    csv_path = os.path.join(data_folder, data_file)
    donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11])

    csv_path = os.path.join(data_folder, extra_data_file)
    extra_df = pd.read_csv(csv_path, parse_dates=[4, 12])

    # Save the test train split. We do use seed but this way we have it.
    train_idx, test_idx = get_split_idx_on_day(donkey_df)
    run_idxs = {'train': train_idx, 'test': test_idx}

    train_dataloader, test_dataloader, extra_dataloader, obs_scaler, semisup_context_scaler, sup_context_scaler = searchlog_semisup_day_split(
        sup_df=donkey_df,
        unsup_df=extra_df,
        obs_cols=obs_cols,
        semisup_context_cols=semisup_context_cols,
        sup_context_cols=sup_context_cols,
        batch_size=batch_size,
        cuda_exp=True)

    # Define stuff for reqularization
    data_size = len(train_dataloader)
    data_dim = problem_dim + context_dim

    # Define normalizing flow
    normalizing_flow = conditional_normalizing_flow_factory3(
        flow_depth=flow_depth,
        problem_dim=problem_dim,
        c_net_depth=c_net_depth,
        c_net_h_dim=c_net_h_dim,
        context_dim=context_dim,
        context_n_h_dim=context_n_h_dim,
        context_n_depth=context_n_depth,
        rich_context_dim=rich_context_dim,
        cuda=cuda_exp,
        batchnorm_momentum=batchnorm_momentum)

    # Setup Optimizer
    if clipped_adam is None:
        if l2_reg is None:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr)
        else:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr,
                                   weight_decay=l2_reg)
    else:
        if l2_reg is None:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    clip_norm=clipped_adam)
        else:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    weight_decay=l2_reg,
                                    clip_norm=clipped_adam)

    if lr_factor is not None:
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                         factor=lr_factor,
                                                         patience=lr_patience,
                                                         min_lr=min_lr,
                                                         verbose=True)

    # Setup regularization
    h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma)
    noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp)

    # Train and test sizes
    n_train = train_dataloader.dataset.shape[0]
    n_test = test_dataloader.dataset.shape[0]

    # Define the possible supervised contexts to marginalize out during unsupervised training
    context_val_dict = {}

    if "wind_dir_sin" in sup_context_cols and "wind_dir_cos" in sup_context_cols:
        wind_dir_arr = np.unique(donkey_df[['wind_dir_sin',
                                            'wind_dir_cos']].values.tolist(),
                                 axis=0)
        context_val_dict['wind_dir_sin'] = wind_dir_arr
        context_val_dict['wind_dir_cos'] = None

    if "windy" in sup_context_cols:
        windy_arr = donkey_df['windy'].unique()
        context_val_dict['windy'] = windy_arr

    if "air_temp" in sup_context_cols:
        air_temp_arr = donkey_df['air_temp'].unique()
        context_val_dict['air_temp'] = air_temp_arr

    if "rain" in sup_context_cols:
        rain_arr = donkey_df['rain'].unique()
        context_val_dict['rain'] = rain_arr

    context_val_arr = [
        context_val_dict[col] for col in sup_context_cols
        if context_val_dict[col] is not None
    ]
    temp_contexts = np.array(list(itertools.product(*context_val_arr)))

    contexts_arr = []
    for row in temp_contexts:
        cleaned_row = []
        for elem in row:
            if isinstance(elem, np.ndarray):
                for value in elem:
                    cleaned_row.append(value)
            else:
                cleaned_row.append(elem)
        contexts_arr.append(cleaned_row)

    possible_contexts = np.array(contexts_arr)

    prior_dict = {
        True:
        torch.tensor(donkey_df.rain.sum() / len(donkey_df)).float().cuda(),
        False:
        torch.tensor(1 - donkey_df.rain.sum() / len(donkey_df)).float().cuda()
    }

    print(len(possible_contexts))
    print(possible_contexts)
    print(prior_dict)

    # Training loop
    train_losses = []
    test_losses = []
    no_noise_losses = []
    lr_scheduler_steps = []

    for epoch in range(1, epochs + 1):

        normalizing_flow.modules.train()
        train_epoch_loss = 0
        for k, batch in enumerate(train_dataloader):
            # Add noise reg to two moons
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            context = batch[:, problem_dim:]

            # Condition the flow on the sampled covariate and calculate -log_prob = loss
            conditioned_flow_dist = normalizing_flow.condition(context)
            loss = -conditioned_flow_dist.log_prob(x).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_epoch_loss += loss.item()

        # Cheeky unsupervised step that's not really logged
        for k, batch in enumerate(extra_dataloader):
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            semisup_context = batch[:, problem_dim:]
            loss = 0
            for unscaled_sup_context in possible_contexts:
                sup_context = sup_context_scaler.transform(
                    [unscaled_sup_context])
                sup_context = torch.tensor(sup_context).float().expand(
                    (semisup_context.shape[0], len(sup_context[0]))).cuda()
                context = torch.cat((semisup_context, sup_context),
                                    dim=1)  # Mayb
                conditioned_flow_dist = normalizing_flow.condition(context)
                loss += -(conditioned_flow_dist.log_prob(x) *
                          prior_dict[unscaled_sup_context[0]]).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        normalizing_flow.modules.eval()
        with torch.no_grad():
            test_epoch_loss = 0
            for j, batch in enumerate(test_dataloader):
                # Sample covariates and use them to sample from conditioned two_moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                test_loss = -conditioned_flow_dist.log_prob(x).sum()

                test_epoch_loss += test_loss.item()

            # save every 10 epoch to log and eval
            if epoch % 10 == 0 or epoch == epochs - 1:
                normalizing_flow.modules.eval()
                train_losses.append(train_epoch_loss / n_train)
                test_losses.append(test_epoch_loss / n_test)

                no_noise_epoch_loss = 0
                for k, batch in enumerate(train_dataloader):
                    # Add noise reg to two moons
                    x = batch[:, :problem_dim]
                    context = batch[:, problem_dim:]

                    # Condition the flow on the sampled covariate and calculate -log_prob = loss
                    conditioned_flow_dist = normalizing_flow.condition(context)
                    loss = -conditioned_flow_dist.log_prob(x).sum()

                    no_noise_epoch_loss += loss.item()
                no_noise_losses.append(no_noise_epoch_loss / n_train)

        # Take scheduler step if needed
        if lr_factor is not None:
            scheduler.step(test_epoch_loss / n_test)
            lr_scheduler_steps.append(epoch)

        # Plot Epoch results if epoch == epochs-1:
        if epoch == epochs - 1:
            normalizing_flow.modules.eval()
            print(
                f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
            )
    experiment_dict = {
        'train': train_losses,
        'test': test_losses,
        'no_noise_losses': no_noise_losses
    }

    results_dict = {
        'model': normalizing_flow,
        'settings': settings_dict,
        'logs': experiment_dict,
        'data_split': run_idxs,
        'lr_steps': lr_scheduler_steps
    }

    file_name = f"{experiment_name}.pickle"
    file_path = os.path.join(results_path, file_name)

    print(f"Saving: {file_name}")
    with open(file_path, 'wb') as f:
        pickle.dump(results_dict, f)
예제 #4
0
def main(args):
    # cuda
    cuda_exp = args.cuda_exp == "true"

    # Notebook experiment settings
    experiment_name = args.experiment_name
    experiment_results_folder = args.results_folder
    results_path = os.path.join("../", experiment_results_folder)
    data_folder = args.data_folder
    data_file = args.data_file
    extra_data_file = args.extra_data_file

    # Regularization settings
    if args.noise_reg_scheduler == "constant":
        noise_reg_schedule = constant_regularization_schedule
    elif args.noise_reg_scheduler == "sqrt":
        noise_reg_schedule = square_root_noise_schedule
    elif args.noise_reg_scheduler == "rot":
        noise_reg_schedule = rule_of_thumb_noise_schedule
    else:
        noise_reg_schedule = constant_regularization_schedule

    noise_reg_sigma = args.noise_reg_sigma  # Used as sigma in rule of thumb and as noise in const

    l2_reg = args.l2_reg
    initial_lr = args.initial_lr
    lr_decay = args.lr_decay

    # Data settings
    obs_cols = args.obs_cols
    semisup_context_cols = args.semisup_context_cols

    context_cols = semisup_context_cols

    # Training settings
    epochs = args.epochs
    batch_size = args.batch_size
    clipped_adam = args.clipped_adam

    # Dimensions of problem
    problem_dim = len(args.obs_cols)
    context_dim = len(context_cols)

    # Flow settings
    flow_depth = args.flow_depth
    c_net_depth = args.c_net_depth
    c_net_h_dim = args.c_net_h_dim
    batchnorm_momentum = args.batchnorm_momentum

    # Define context conditioner
    context_n_depth = args.context_n_depth
    context_n_h_dim = args.context_n_h_dim
    rich_context_dim = args.rich_context_dim

    settings_dict = {
        "epochs": epochs,
        "batch_size": batch_size,
        "problem_dim": problem_dim,
        "context_dim": context_dim,
        "flow_depth": flow_depth,
        "c_net_depth": c_net_depth,
        "c_net_h_dim": c_net_h_dim,
        "context_n_depth": context_n_depth,
        "context_n_h_dim": context_n_h_dim,
        "rich_context_dim": rich_context_dim,
        "obs_cols": obs_cols,
        "context_cols": context_cols,
        "semisup_context_cols": semisup_context_cols,
        "batchnorm_momentum": batchnorm_momentum,
        "l2_reg": l2_reg,
        "clipped_adam": clipped_adam,
        "initial_lr": initial_lr,
        "lr_decay": lr_decay
    }

    print(f"Settings:\n{settings_dict}")

    # Load data
    csv_path = os.path.join(data_folder, data_file)
    donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11])

    csv_path = os.path.join(data_folder, extra_data_file)
    extra_df = pd.read_csv(csv_path, parse_dates=[4, 12])

    # Save the test train split. We do use seed but this way we have it.
    train_idx, test_idx = get_split_idx_on_day(donkey_df)
    run_idxs = {'train': train_idx, 'test': test_idx}

    train_dataloader, test_dataloader, extra_dataloader, obs_scaler, semisup_context_scaler, = searchlog_no_weather_day_split(
        sup_df=donkey_df,
        unsup_df=extra_df,
        obs_cols=obs_cols,
        semisup_context_cols=semisup_context_cols,
        batch_size=batch_size,
        cuda_exp=True)

    # Define stuff for reqularization
    data_size = len(train_dataloader)
    data_dim = problem_dim + context_dim

    # Define normalizing flow
    normalizing_flow = conditional_normalizing_flow_factory3(
        flow_depth=flow_depth,
        problem_dim=problem_dim,
        c_net_depth=c_net_depth,
        c_net_h_dim=c_net_h_dim,
        context_dim=context_dim,
        context_n_h_dim=context_n_h_dim,
        context_n_depth=context_n_depth,
        rich_context_dim=rich_context_dim,
        cuda=cuda_exp,
        batchnorm_momentum=batchnorm_momentum)

    # Setup Optimizer
    if clipped_adam is None:
        if l2_reg is None:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr)
        else:
            optimizer = optim.Adam(normalizing_flow.modules.parameters(),
                                   lr=initial_lr,
                                   weight_decay=l2_reg)
    else:
        if l2_reg is None:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    clip_norm=clipped_adam)
        else:
            optimizer = ClippedAdam(normalizing_flow.modules.parameters(),
                                    lr=initial_lr,
                                    weight_decay=l2_reg,
                                    clip_norm=clipped_adam)

    if lr_decay is not None:
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                     gamma=lr_decay,
                                                     last_epoch=-1)

    # Setup regularization
    h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma)
    noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp)

    # Train and test sizes
    n_train = train_dataloader.dataset.shape[0]
    n_test = test_dataloader.dataset.shape[0]

    # Training loop
    full_train_losses = []
    train_losses = []
    test_losses = []
    no_noise_losses = []

    for epoch in range(1, epochs + 1):

        normalizing_flow.modules.train()
        train_epoch_loss = 0
        for k, batch in enumerate(train_dataloader):
            # Add noise reg to two moons
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            context = batch[:, problem_dim:]

            # Condition the flow on the sampled covariate and calculate -log_prob = loss
            conditioned_flow_dist = normalizing_flow.condition(context)
            loss = -conditioned_flow_dist.log_prob(x).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_epoch_loss += loss.item()
        full_train_losses.append(train_epoch_loss / n_train)

        # Loop over the data from the "unsupervised set" - we do not log the loss here, but we are only intereseted in test anyway
        for k, batch in enumerate(extra_dataloader):
            batch = noise_reg.add_noise(batch)
            x = batch[:, :problem_dim]
            context = batch[:, problem_dim:]

            conditioned_flow_dist = normalizing_flow.condition(context)
            loss = -conditioned_flow_dist.log_prob(x).sum()

            # Calculate gradients and take an optimizer step
            normalizing_flow.modules.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # save every 10 epoch to log and eval
        if epoch % 10 == 0 or epoch == epochs - 1:
            normalizing_flow.modules.eval()
            train_losses.append(train_epoch_loss / n_train)

            no_noise_epoch_loss = 0
            for k, batch in enumerate(train_dataloader):
                # Add noise reg to two moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                loss = -conditioned_flow_dist.log_prob(x).sum()

                no_noise_epoch_loss += loss.item()
            no_noise_losses.append(no_noise_epoch_loss / n_train)

            test_epoch_loss = 0
            for j, batch in enumerate(test_dataloader):
                # Sample covariates and use them to sample from conditioned two_moons
                x = batch[:, :problem_dim]
                context = batch[:, problem_dim:]

                # Condition the flow on the sampled covariate and calculate -log_prob = loss
                conditioned_flow_dist = normalizing_flow.condition(context)
                test_loss = -conditioned_flow_dist.log_prob(x).sum()

                test_epoch_loss += test_loss.item()
            test_losses.append(test_epoch_loss / n_test)

        # Take scheduler step if needed
        if lr_decay is not None:
            scheduler.step()

        # Plot Epoch results if epoch == epochs-1:
        if epoch == epochs - 1:
            normalizing_flow.modules.eval()
            print(
                f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}"
            )
    experiment_dict = {
        'train': train_losses,
        'test': test_losses,
        'no_noise_losses': no_noise_losses
    }

    results_dict = {
        'model': normalizing_flow,
        'settings': settings_dict,
        'logs': experiment_dict,
        'data_split': run_idxs
    }

    file_name = f"{experiment_name}.pickle"
    file_path = os.path.join(results_path, file_name)

    print(f"Saving: {file_name}")
    with open(file_path, 'wb') as f:
        pickle.dump(results_dict, f)