def test_config_parser_env_interpolation_missing( conf_schema_basic, fixtures_dir ): GPFConfigParser.load_config( os.path.join(fixtures_dir, "env_interpolation_conf.toml"), conf_schema_basic, )
def read_and_parse_file_configuration(cls, options, config_file): config = GPFConfigParser.load_config( config_file, annotation_conf_schema ).to_dict() config["options"] = options config["columns"] = {} config["native_columns"] = [] config["virtual_columns"] = [] config["output_columns"] = [] config = cls._setup_defaults(DefaultBox(config)) parsed_sections = list() for config_section in config.sections: if config_section.annotator is None: continue config_section_dict = recursive_dict_update( {"options": options}, config_section.to_dict() ) parsed_sections.append(cls.parse_section(config_section_dict)) config["sections"] = parsed_sections return FrozenBox(config)
def test_pheno_regressions_from_conf_path(regressions_conf): regs = GPFConfigParser.load_config(regressions_conf, regression_conf_schema) expected_regs = { "reg1": { "instrument_name": "i1", "measure_name": "regressor1", "jitter": 0.1, }, "reg2": { "instrument_name": "i1", "measure_name": "regressor2", "jitter": 0.2, }, "reg3": { "instrument_name": "", "measure_name": "common_regressor", "jitter": 0.3, }, "reg4": { "instrument_name": "i2", "measure_name": "regressor1", "jitter": 0.4, }, } assert len(regs.regression) == len(expected_regs) for reg_name, expected_reg in expected_regs.items(): assert regs.regression[reg_name] == expected_reg
def __init__( self, dae_config=None, config_file="DAE.conf", work_dir=None, defaults=None, load_eagerly=False): if dae_config is None: # FIXME Merge defaults with newly-loaded config assert not defaults, defaults if work_dir is None: work_dir = os.environ["DAE_DB_DIR"] config_file = os.path.join(work_dir, config_file) dae_config = GPFConfigParser.load_config( config_file, dae_conf_schema ) self.dae_config = dae_config self.dae_db_dir = work_dir self.__autism_gene_profile_config = None self.load_eagerly = load_eagerly if load_eagerly: self.genomes_db self.gene_sets_db self._gene_info_config self._pheno_db self._variants_db self._gene_info_config self.denovo_gene_sets_db self._score_config self._scores_factory self.genotype_storage_db self._common_report_facade self._background_facade
def test_handle_regressions_default_jitter(mocker, fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf): def fake_build_regression(*args): return {"pvalue_regression_male": 0, "pvalue_regression_female": 0} mocked = mocker.patch( "dae.pheno_browser.prepare_data." "PreparePhenoBrowserBase.build_regression", side_effect=fake_build_regression, ) reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf, pheno_conf_schema) prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir, reg) regressand = fake_phenotype_data.get_measure("i1.m1") for i in prep.handle_regressions(regressand): pass mocked.assert_called() measure, reg_measure, jitter = mocked.call_args_list[0][0] assert jitter == 0.12 measure, reg_measure, jitter = mocked.call_args_list[1][0] assert jitter == 0.13
def _gene_info_config(self): logger.debug( f"loading gene info config file: " f"{self.dae_config.gene_info_db.conf_file}") return GPFConfigParser.load_config( self.dae_config.gene_info_db.conf_file, gene_info_conf )
def _autism_gene_profile_config(self): agp_config = self.dae_config.autism_gene_tool_config if agp_config is None or not os.path.exists(agp_config.conf_file): return None return GPFConfigParser.load_config( self.dae_config.autism_gene_tool_config.conf_file, autism_gene_tool_config )
def test_config_parser_load_single(conf_schema_basic, fixtures_dir): config = GPFConfigParser.load_config( os.path.join(fixtures_dir, "basic_conf.toml"), conf_schema_basic ) print(config) assert config.id == "152135" assert config.name == "Basic test config" assert config.section1.someval1 == "beep" assert config.section1.someval2 == 1.23 assert config.section1.someval3 == 52345
def test_handle_regressions_regressand_is_regressor( fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf): reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf, pheno_conf_schema) prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir, reg) regressand = fake_phenotype_data.get_measure("i1.age") with pytest.raises(StopIteration): next(prep.handle_regressions(regressand))
def test_config_parser_set_config(conf_schema_set, fixtures_dir): config = GPFConfigParser.load_config( os.path.join(fixtures_dir, "set_conf.toml"), conf_schema_set ) print(config) assert config.id == "152135" assert config.name == "Set test config" assert config.section1.someval1 == "ala" assert isinstance(config.section1.someval2, set) assert (config.section1.someval2 ^ {"a", "b", "c", "d"}) == set() assert config.section1.someval3 == 123
def test_config_parser_string_interpolation(conf_schema_strings, fixtures_dir): config = GPFConfigParser.load_config( os.path.join(fixtures_dir, "vars_conf.toml"), conf_schema_strings ) print(config) assert config.id == "152135" assert config.name == "Vars test config" assert config.vars is None assert config.section1.someval1 == "asdf" assert config.section1.someval2 == "ghjkl" assert config.section1.someval3 == "qwertyasdfghjk"
def test_config_parser_load_paths(conf_schema_path, fixtures_dir, mocker): patch = mocker.patch("os.path.exists") patch.return_value = True config = GPFConfigParser.load_config( os.path.join(fixtures_dir, "path_conf.toml"), conf_schema_path ) print(config) assert config.id == "152135" assert config.name == "Path test config" assert config.some_abs_path == "/tmp/maybesomeconf.toml" assert config.some_rel_path == os.path.join( fixtures_dir, "environ_conf.toml" )
def test_handle_regressions_non_continuous_or_ordinal_measure( fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf): reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf, pheno_conf_schema) prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir, reg) regressand_categorical = fake_phenotype_data.get_measure("i1.m5") regressand_raw = fake_phenotype_data.get_measure("i1.m6") with pytest.raises(StopIteration): next(prep.handle_regressions(regressand_categorical)) with pytest.raises(StopIteration): next(prep.handle_regressions(regressand_raw))
def test_config_parser_env_interpolation( conf_schema_basic, fixtures_dir, mocker ): mocker.patch.dict(os.environ, {"test_env_var": "bop"}) config = GPFConfigParser.load_config( os.path.join(fixtures_dir, "env_interpolation_conf.toml"), conf_schema_basic, ) print(config) assert config.id == "152135" assert config.name == "Environment interpolation test config" assert config.section1.someval1 == "bop" assert config.section1.someval2 == 1.23 assert config.section1.someval3 == 52345
def __init__(self, dae_dir, conf_file=None): self.dae_dir = dae_dir if not conf_file: conf_file = f"{dae_dir}/genomesDB.conf" self.config = GPFConfigParser.load_config(conf_file, genomes_db_conf) self._genomes = {} for section_id, genome_config in self.config.genome.items(): genome = Genome.load_config(genome_config, section_id) assert genome is not None self._genomes[genome.genome_id] = genome assert self.config.genomes.default_genome in self._genomes self.default_genome = self._genomes[self.config.genomes.default_genome]
def test_has_regression_measure(fake_phenotype_data, output_dir, regressions_conf): reg = GPFConfigParser.load_config(regressions_conf, regression_conf_schema) prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir, reg) expected_reg_measures = [ ("regressor1", "i1"), ("regressor2", "i1"), ("common_regressor", ""), ("common_regressor", "i1"), ("common_regressor", "i2"), ("regressor1", "i2"), ] for e in expected_reg_measures: assert prep._has_regression_measure(*e)
def __init__(self, score_filename, config_filename=None): self.score_filename = score_filename assert os.path.exists(self.score_filename), self.score_filename if config_filename is None: config_filename = "{}.conf".format(self.score_filename) self.config = GPFConfigParser.load_config(config_filename, score_file_conf_schema) assert self.config.general.header is not None assert self.config.columns.score is not None self.header = self.config.general.header logger.debug(f"score file {os.path.basename(self.score_filename)} " f"header {self.header}") self.score_names = self.config.columns.score self.schema = Schema.from_dict(self.config.score_schema).order_as( self.header) logger.debug(f"score file {os.path.basename(self.score_filename)} " f"schema {self.schema.col_names}") assert all([sn in self.schema for sn in self.score_names]), [ self.score_filename, self.score_names, self.schema.col_names, ] self.chr_index = self.schema.col_names.index(self.chr_name) self.pos_begin_index = self.schema.col_names.index(self.pos_begin_name) self.pos_end_index = self.schema.col_names.index(self.pos_end_name) self.chr_prefix = getattr(self.config.general, "chr_prefix", False) self.no_score_value = self.config.general.no_score_value or "na" if self.no_score_value.lower() in ("na", "none"): self.no_score_value = None self._init_access()
def test_handle_regressions(mocker, fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf): def fake_build_regression(dependent_measure, independent_measure, jitter): return { "regressand": dependent_measure, "regressor": independent_measure, "jitter": jitter, "pvalue_regression_male": 0, "pvalue_regression_female": 0, } mocked = mocker.patch( "dae.pheno_browser.prepare_data." "PreparePhenoBrowserBase.build_regression", side_effect=fake_build_regression, ) reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf, pheno_conf_schema) prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir, reg) regressand = fake_phenotype_data.get_measure("i1.m1") res = [r for r in prep.handle_regressions(regressand) if r is not None] assert len(res) == 2 assert sorted([r["regression_id"] for r in res]) == sorted(["age", "nviq"]) mocked.assert_called() measure, reg_measure, jitter = mocked.call_args_list[0][0] assert measure.measure_id == "i1.m1" assert reg_measure.measure_id == "i1.age" assert jitter == 0.12 measure, reg_measure, jitter = mocked.call_args_list[1][0] assert measure.measure_id == "i1.m1" assert reg_measure.measure_id == "i1.iq" assert jitter == 0.13
def main(argv): try: # Setup argument parser gpf_instance = GPFInstance() dae_conf = gpf_instance.dae_config parser = pheno_cli_parser() args = parser.parse_args(argv) if args.instruments is None: print("missing instruments directory parameter", sys.stderr) raise ValueError() if args.pedigree is None: print("missing pedigree filename", sys.stderr) raise ValueError() if args.pheno_name is None: print("missing pheno db name", sys.stderr) raise ValueError() args.pheno_name = verify_phenotype_data_name(args.pheno_name) pheno_db_dir = os.path.join(dae_conf.phenotype_data.dir, args.pheno_name) if not os.path.exists(pheno_db_dir): os.makedirs(pheno_db_dir) args.pheno_db_filename = os.path.join(pheno_db_dir, "{}.db".format(args.pheno_name)) if os.path.exists(args.pheno_db_filename): if not args.force: print("pheno db filename already exists:", args.pheno_db_filename) raise ValueError() else: os.remove(args.pheno_db_filename) args.browser_dir = os.path.join(pheno_db_dir, "browser") if not os.path.exists(args.browser_dir): os.makedirs(args.browser_dir) config = parse_phenotype_data_config(args) if args.regression: regressions = GPFConfigParser.load_config(args.regression, regression_conf_schema) else: regressions = None prep = PrepareVariables(config) prep.build_pedigree(args.pedigree) prep.build_variables(args.instruments, args.data_dictionary) build_pheno_browser( args.pheno_db_filename, args.pheno_name, args.browser_dir, regressions, ) pheno_conf_path = os.path.join(pheno_db_dir, "{}.conf".format(args.pheno_name)) with open(pheno_conf_path, "w") as pheno_conf_file: pheno_conf_file.write( toml.dumps(generate_phenotype_data_config(args, regressions))) return 0 except KeyboardInterrupt: return 0 except Exception as e: traceback.print_exc() program_name = "simple_pheno_import.py" indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help") return 2
def get_person_set_collections_config(config_path): return GPFConfigParser.load_config( config_path, {"person_set_collections": person_set_collections_schema}, ).person_set_collections
def main(): start_time = time.time() opts = get_argument_parser().parse_args() config = GPFConfigParser.load_config(opts.config, score_file_conf_schema) score_histograms_info = [] score_columns = opts.scores if score_columns is not None: score_columns = [ int(el) if el.isdigit() else el for el in score_columns.split(",") ] else: score_columns = list(config.genomic_scores.keys()) for score, score_column in zip(config.genomic_scores.keys(), score_columns): histogram_info = config.genomic_scores[score] bin_range = None if histogram_info.range: bin_range = list(map(float, histogram_info.range)) score_histograms_info.append( ScoreHistogramInfo( score, score_column, histogram_info.file, histogram_info.xscale, histogram_info.yscale, histogram_info.bins, bin_range, )) if opts.infile == "-": sys.stderr.write("You must provide input file!\n") sys.exit(-78) input_files = opts.infile.split(",") start = opts.s end = opts.e if start is not None and end is not None: if start.isdigit(): start = int(opts.s) if end.isdigit(): end = int(opts.e) round_pos = opts.r if round_pos is not None: round_pos = int(round_pos) chunk_size = opts.chunk_size if chunk_size: chunk_size = int(chunk_size) for input_file in input_files: if not exists(input_file): sys.stderr.write("The given input file does not exist!\n") sys.exit(-78) gsh = GenerateScoresHistograms(input_files, score_histograms_info, round_pos, chunk_size, start, end) gsh.generate_scores_histograms() sys.stderr.write( "The program was running for [h:m:s]: " + str(datetime.timedelta(seconds=round(time.time() - start_time, 0))) + "\n")
def dae_config_fixture(): return GPFConfigParser.load_config( relative_to_this_folder("fixtures/DAE.conf"), dae_conf_schema )
def main(argv=None): # IGNORE:C0111 """Command line options.""" if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_shortdesc = __import__("__main__").__doc__.split("\n")[1] program_license = """%s USAGE """ % (program_shortdesc, ) try: # Setup argument parser parser = ArgumentParser( description=program_license, formatter_class=RawDescriptionHelpFormatter, ) parser.add_argument( "-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]", ) parser.add_argument( "-d", "--dbfile", dest="dbfile", help="pheno db file anme", metavar="path", ) parser.add_argument("-p", "--pheno", dest="pheno_name", help="pheno name") parser.add_argument( "-o", "--output", dest="output", help="output base dir", metavar="path", ) parser.add_argument( "--regression", help=("path to a regression configuration file"), type=str, ) # Process arguments args = parser.parse_args() if not args.output or not os.path.exists(args.output): raise CLIError("output directory should be specified and empty") if not args.pheno_name: raise CLIError("pheno name must be specified") if not args.dbfile or not os.path.exists(args.dbfile): raise CLIError("pheno db file name must be specified") regressions = (GPFConfigParser.load_config(args.regression, regression_conf_schema) if args.regression else None) build_pheno_browser(args.dbfile, args.pheno_name, args.output, regressions) return 0 except KeyboardInterrupt: return 1 except Exception as e: traceback.print_exc() print() indent = len(program_name) * " " sys.stderr.write(program_name + ": " + repr(e) + "\n") sys.stderr.write(indent + " for help use --help") return 2
def test_config_parser_load_incorrect_paths(conf_schema_path, fixtures_dir): GPFConfigParser.load_config( os.path.join(fixtures_dir, "wrong_path_conf.toml"), conf_schema_path )
def _score_config(self): return GPFConfigParser.load_config( self.dae_config.genomic_scores_db.conf_file, genomic_scores_schema )