# Parse config file if args.neptune_config is not None: with open(args.neptune_config) as neptune_file: neptune_config = yaml.safe_load(neptune_file) utils.initialize_neptune(neptune_config) all_data, labeled_data, unlabeled_data = datasets.load_binary_data( args.dataset_config, args.label, args.negative_class) # Load binary data subsets the data to two classes. Update the label encoder to treat this # data as binary so the F1 score doesn't break labeled_data.recode() label_encoder = labeled_data.get_label_encoder() # Correct for batch effects if args.batch_correction_method is not None: all_data = datasets.correct_batch_effects(all_data, args.batch_correction_method) labeled_data = all_data.get_labeled() labeled_data.subset_samples_to_labels( [args.label, args.negative_class]) unlabeled_data = all_data.get_unlabeled() # Get fivefold cross-validation splits labeled_splits = labeled_data.get_cv_splits(num_splits=args.num_splits, seed=args.seed) # Train the model on each fold accuracies = [] balanced_accuracies = [] f1_scores = [] supervised_train_studies = [] supervised_train_sample_names = []
with open(args.dataset_config) as data_file: dataset_config = yaml.safe_load(data_file) with open(args.simulation_config) as data_file: simulation_config = yaml.safe_load(data_file) all_data, labeled_data, unlabeled_data = datasets.load_binary_data( args.dataset_config, args.label, args.negative_class) # Load healthy + disease data expression_data = labeled_data.subset_samples_to_labels( [args.label, args.negative_class]) # Correct for batch effects if args.batch_correction_method is not None: expression_data = datasets.correct_batch_effects( expression_data, args.batch_correction_method) # Scale data labeled_scaler = preprocessing.MinMaxScaler() train_data, _ = expression_data.get_all_data() labeled_scaler.fit(train_data) learning_rate = simulation_config['lr'] batch_size = simulation_config['batch_size'] epochs = simulation_config['epochs'] kappa = simulation_config['kappa'] intermediate_dim = simulation_config['intermediate_dim'] latent_dim = simulation_config['latent_dim'] epsilon_std = simulation_config['epsilon_std'] val_frac = simulation_config['val_frac']