return io_args, model_options

if __name__ == '__main__':

    # process command line arguments
    io_args, model_options = process_args()
    sample_info_df = du.load_sample_info(model_options.training_data,
                                         verbose=io_args.verbose)

    # create results dir and subdir for experiment if they don't exist
    experiment_dir = Path(io_args.results_dir, 'purity').resolve()
    experiment_dir.mkdir(parents=True, exist_ok=True)

    # save model options for this experiment
    # (hyperparameters, preprocessing info, etc)
    fu.save_model_options(experiment_dir, model_options,
                          classify=model_options.classify)

    # create empty log file if it doesn't exist
    log_columns = [
        'training_data',
        'shuffle_labels',
        'skip_reason'
    ]
    log_df = None

    tcga_data = TCGADataModel(seed=model_options.seed,
                              subset_mad_genes=model_options.subset_mad_genes,
                              training_data=model_options.training_data,
                              load_compressed_data=model_options.use_compressed,
                              n_dim=model_options.n_dim,
                              sample_info_df=sample_info_df,
Exemple #2
0

if __name__ == '__main__':

    # process command line arguments
    io_args, model_options = process_args()
    sample_info_df = du.load_sample_info(model_options.training_data,
                                         verbose=io_args.verbose)

    # create results dir and subdir for experiment if they don't exist
    experiment_dir = Path(io_args.results_dir, 'gene').resolve()
    experiment_dir.mkdir(parents=True, exist_ok=True)

    # save model options for this experiment
    # (hyperparameters, preprocessing info, etc)
    fu.save_model_options(experiment_dir, model_options)

    # create empty log file if it doesn't exist
    log_columns = ['gene', 'titration_ratio', 'shuffle_labels', 'skip_reason']

    tcga_data = TCGADataModel(
        seed=model_options.seed,
        subset_mad_genes=model_options.subset_mad_genes,
        training_data=model_options.training_data,
        overlap_data_types=model_options.overlap_data_types,
        sample_info_df=sample_info_df,
        verbose=io_args.verbose,
        debug=model_options.debug)
    genes_df = tcga_data.load_gene_set(io_args.gene_set)

    # we want to run mutation prediction experiments:
    return io_args, model_options, sample_info_df


if __name__ == '__main__':

    # process command line arguments
    io_args, model_options, sample_info_df = process_args()

    # create results dir and subdir for experiment if they don't exist
    experiment_dir = Path(io_args.results_dir).resolve()
    experiment_dir.mkdir(parents=True, exist_ok=True)

    # save model options for this experiment
    # (hyperparameters, preprocessing info, etc)
    fu.save_model_options(experiment_dir, model_options, 'survival')

    # create empty log file if it doesn't exist
    log_columns = [
        'cancer_type', 'training_data', 'shuffle_labels', 'skip_reason'
    ]

    tcga_data = TCGADataModel(
        seed=model_options.seed,
        subset_mad_genes=model_options.subset_mad_genes,
        training_data=model_options.training_data,
        overlap_data_types=model_options.overlap_data_types,
        load_compressed_data=(model_options.n_dim is not None),
        standardize_input=(model_options.n_dim is not None
                           and model_options.training_data
                           in cfg.standardize_data_types),