예제 #1
0
    # Parse config file
    if args.neptune_config is not None:
        with open(args.neptune_config) as neptune_file:
            neptune_config = yaml.safe_load(neptune_file)
            utils.initialize_neptune(neptune_config)

    all_data, labeled_data, unlabeled_data = datasets.load_binary_data(
        args.dataset_config, args.label, args.negative_class)
    # Load binary data subsets the data to two classes. Update the label encoder to treat this
    # data as binary so the F1 score doesn't break
    labeled_data.recode()
    label_encoder = labeled_data.get_label_encoder()

    # Correct for batch effects
    if args.batch_correction_method is not None:
        all_data = datasets.correct_batch_effects(all_data,
                                                  args.batch_correction_method)
        labeled_data = all_data.get_labeled()
        labeled_data.subset_samples_to_labels(
            [args.label, args.negative_class])
        unlabeled_data = all_data.get_unlabeled()

    # Get fivefold cross-validation splits
    labeled_splits = labeled_data.get_cv_splits(num_splits=args.num_splits,
                                                seed=args.seed)

    # Train the model on each fold
    accuracies = []
    balanced_accuracies = []
    f1_scores = []
    supervised_train_studies = []
    supervised_train_sample_names = []
예제 #2
0
    with open(args.dataset_config) as data_file:
        dataset_config = yaml.safe_load(data_file)

    with open(args.simulation_config) as data_file:
        simulation_config = yaml.safe_load(data_file)

    all_data, labeled_data, unlabeled_data = datasets.load_binary_data(
        args.dataset_config, args.label, args.negative_class)

    # Load healthy + disease data
    expression_data = labeled_data.subset_samples_to_labels(
        [args.label, args.negative_class])

    # Correct for batch effects
    if args.batch_correction_method is not None:
        expression_data = datasets.correct_batch_effects(
            expression_data, args.batch_correction_method)

    # Scale data
    labeled_scaler = preprocessing.MinMaxScaler()
    train_data, _ = expression_data.get_all_data()
    labeled_scaler.fit(train_data)

    learning_rate = simulation_config['lr']
    batch_size = simulation_config['batch_size']
    epochs = simulation_config['epochs']
    kappa = simulation_config['kappa']
    intermediate_dim = simulation_config['intermediate_dim']
    latent_dim = simulation_config['latent_dim']
    epsilon_std = simulation_config['epsilon_std']
    val_frac = simulation_config['val_frac']