def train_network(X, p, corr_g, data_type): # Set the parameters for training deep knockoffs pars = dict() # Number of epochs pars['epochs'] = 50 # Number of iterations over the full data per epoch pars['epoch_length'] = 100 # Data type, either "continuous" or "binary" pars['family'] = "continuous" # Dimensions of the data pars['p'] = p pars['ncat'] = ncat # List of categorical variables pars['cat_var_idx'] = np.arange(0, (ncat * (num_cuts))) # Number of discrete variables pars['ncat'] = ncat # Number of categories pars['num_cuts'] = num_cuts # Size of regularizer # pars['regularizer'] = grid_results[0] # Boolean for using different weighting structure for decorr pars['use_weighting'] = False # Multiplier for weighting discrete variables pars['kappa'] = 50 # Size of the test set pars['test_size'] = 0 # Batch size pars['batch_size'] = int(0.5 * n) # Learning rate pars['lr'] = 0.01 # When to decrease learning rate (unused when equal to number of epochs) pars['lr_milestones'] = [pars['epochs']] # Width of the network (number of layers is fixed to 6) pars['dim_h'] = int(10 * p) # Penalty for the MMD distance pars['GAMMA'] = training_params['GAMMA'] # Penalty encouraging second-order knockoffs pars['LAMBDA'] = training_params['LAMBDA'] # Decorrelation penalty hyperparameter pars['DELTA'] = training_params['DELTA'] # Target pairwise correlations between variables and knockoffs pars['target_corr'] = corr_g # Kernel widths for the MMD measure (uniform weights) pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.] # Save parameters np.save('/artifacts/pars_' + data_type + '.npy', pars) # Where to store the machine checkpoint_name = "/artifacts/" + model + "_" + data_type # Where to print progress information logs_name = "/artifacts/" + model + "_progress.txt" # Initialize the machine machine = KnockoffMachine(pars, checkpoint_name=checkpoint_name, logs_name=logs_name) # Train the machine machine.train(X.values)
pars['dim_h'] = int(10*p) # Penalty for the MMD distance pars['GAMMA'] = training_params['GAMMA'] # Penalty encouraging second-order knockoffs pars['LAMBDA'] = training_params['LAMBDA'] # Decorrelation penalty hyperparameter pars['DELTA'] = training_params['DELTA'] # Target pairwise correlations between variables and knockoffs pars['target_corr'] = corr_g # Kernel widths for the MMD measure (uniform weights) pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.] # machine = KnockoffMachine(pars) # machine.train(X_train.values) # Save parameters np.save('/artifacts/pars.npy', pars) # Where to store the machine checkpoint_name = "/artifacts/" + model # Where to print progress information logs_name = "/artifacts/" + model + "_progress.txt" # Initialize the machine machine = KnockoffMachine(pars, checkpoint_name=checkpoint_name, logs_name=logs_name) # Train the machine machine.train(X_train)
# Penalty encouraging second-order knockoffs pars['LAMBDA'] = combo[0] # Decorrelation penalty hyperparameter pars['DELTA'] = combo[1] # Target pairwise correlations between variables and knockoffs pars['target_corr'] = corr_g # Kernel widths for the MMD measure (uniform weights) pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.] par_lambda = str(np.round(combo[0], 4)) par_delta = str(np.round(combo[1], 4)) # Save parameters np.save('/artifacts/pars' + "_" + par_lambda + "_" + par_delta + '.npy', pars) model = model + "_" + par_lambda + "_" + par_delta # Where to store the machine checkpoint_name = "/artifacts/" + model # Where to print progress information logs_name = "/artifacts/" + model + "_progress.txt" # Initialize the machine machine = KnockoffMachine(pars, checkpoint_name=checkpoint_name, logs_name=logs_name) # Train the machine machine.train(X_train.values)
# Penalty for the MMD distance pars['GAMMA'] = 1 # Penalty encouraging second-order knockoffs pars['LAMBDA'] = 1 # Decorrelation penalty hyperparameter pars['DELTA'] = 1 # Target pairwise correlations between variables and knockoffs pars['target_corr'] = corr_g # Kernel widths for the MMD measure (uniform weights) pars['alphas'] = [1., 2., 4., 8., 16., 32., 64., 128.] # Where the machine is stored checkpoint_name = "../models/deepmodel" # test to exclude 51 x_train = x_train.to_numpy() # Initialize the machine machine = KnockoffMachine(pars, checkpoint_name) # Train the machine #pdb.set_trace() machine.train(x_train) # Generate deep knockoffs xk_train = machine.generate(x_train) # Save knockoffs pyreadr.write_rds("../data/derived_data/knockoffs.rds", pd.DataFrame(xk_train))