def train_network(X, p, corr_g, data_type):
    # Set the parameters for training deep knockoffs
    pars = dict()
    # Number of epochs
    pars['epochs'] = 50
    # Number of iterations over the full data per epoch
    pars['epoch_length'] = 100
    # Data type, either "continuous" or "binary"
    pars['family'] = "continuous"
    # Dimensions of the data
    pars['p'] = p
    pars['ncat'] = ncat
    # List of categorical variables
    pars['cat_var_idx'] = np.arange(0, (ncat * (num_cuts)))
    # Number of discrete variables
    pars['ncat'] = ncat
    # Number of categories
    pars['num_cuts'] = num_cuts
    # Size of regularizer
    # pars['regularizer'] = grid_results[0]
    # Boolean for using different weighting structure for decorr
    pars['use_weighting'] = False
    # Multiplier for weighting discrete variables
    pars['kappa'] = 50
    # Size of the test set
    pars['test_size'] = 0
    # Batch size
    pars['batch_size'] = int(0.5 * n)
    # Learning rate
    pars['lr'] = 0.01
    # When to decrease learning rate (unused when equal to number of epochs)
    pars['lr_milestones'] = [pars['epochs']]
    # Width of the network (number of layers is fixed to 6)
    pars['dim_h'] = int(10 * p)
    # Penalty for the MMD distance
    pars['GAMMA'] = training_params['GAMMA']
    # Penalty encouraging second-order knockoffs
    pars['LAMBDA'] = training_params['LAMBDA']
    # Decorrelation penalty hyperparameter
    pars['DELTA'] = training_params['DELTA']
    # Target pairwise correlations between variables and knockoffs
    pars['target_corr'] = corr_g
    # Kernel widths for the MMD measure (uniform weights)
    pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.]
    # Save parameters
    np.save('/artifacts/pars_' + data_type + '.npy', pars)
    # Where to store the machine
    checkpoint_name = "/artifacts/" + model + "_" + data_type
    # Where to print progress information
    logs_name = "/artifacts/" + model + "_progress.txt"
    # Initialize the machine
    machine = KnockoffMachine(pars,
                              checkpoint_name=checkpoint_name,
                              logs_name=logs_name)
    # Train the machine
    machine.train(X.values)
Ejemplo n.º 2
0
pars['dim_h'] = int(10*p)
# Penalty for the MMD distance
pars['GAMMA'] = training_params['GAMMA']
# Penalty encouraging second-order knockoffs
pars['LAMBDA'] = training_params['LAMBDA']
# Decorrelation penalty hyperparameter
pars['DELTA'] = training_params['DELTA']
# Target pairwise correlations between variables and knockoffs
pars['target_corr'] = corr_g
# Kernel widths for the MMD measure (uniform weights)
pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.]

# machine = KnockoffMachine(pars)
# machine.train(X_train.values)

# Save parameters
np.save('/artifacts/pars.npy', pars)


# Where to store the machine
checkpoint_name = "/artifacts/" + model

# Where to print progress information
logs_name = "/artifacts/" + model + "_progress.txt"

# Initialize the machine
machine = KnockoffMachine(pars, checkpoint_name=checkpoint_name, logs_name=logs_name)

# Train the machine
machine.train(X_train)
Ejemplo n.º 3
0
# Penalty for the MMD distance
pars['GAMMA'] = 1
# Penalty encouraging second-order knockoffs
pars['LAMBDA'] = 1
# Decorrelation penalty hyperparameter
pars['DELTA'] = 1
# Target pairwise correlations between variables and knockoffs
pars['target_corr'] = corr_g
# Kernel widths for the MMD measure (uniform weights)
pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.]

# Where the machine is stored
checkpoint_name = "../models/deepmodel"

# test to exclude 51

x_train = x_train.to_numpy()

# Initialize the machine
machine = KnockoffMachine(pars, checkpoint_name)

# Train the machine
#pdb.set_trace()
machine.train(x_train)

# Generate deep knockoffs
xk_train = machine.generate(x_train)

# Save knockoffs
pyreadr.write_rds("../data/derived_data/knockoffs.rds", pd.DataFrame(xk_train))