Ejemplo n.º 1
0
def test_config_parser_env_interpolation_missing(
    conf_schema_basic, fixtures_dir
):
    GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "env_interpolation_conf.toml"),
        conf_schema_basic,
    )
Ejemplo n.º 2
0
    def read_and_parse_file_configuration(cls, options, config_file):

        config = GPFConfigParser.load_config(
            config_file, annotation_conf_schema
        ).to_dict()

        config["options"] = options
        config["columns"] = {}
        config["native_columns"] = []
        config["virtual_columns"] = []
        config["output_columns"] = []

        config = cls._setup_defaults(DefaultBox(config))

        parsed_sections = list()
        for config_section in config.sections:
            if config_section.annotator is None:
                continue
            config_section_dict = recursive_dict_update(
                {"options": options}, config_section.to_dict()
            )
            parsed_sections.append(cls.parse_section(config_section_dict))

        config["sections"] = parsed_sections

        return FrozenBox(config)
Ejemplo n.º 3
0
def test_pheno_regressions_from_conf_path(regressions_conf):
    regs = GPFConfigParser.load_config(regressions_conf,
                                       regression_conf_schema)
    expected_regs = {
        "reg1": {
            "instrument_name": "i1",
            "measure_name": "regressor1",
            "jitter": 0.1,
        },
        "reg2": {
            "instrument_name": "i1",
            "measure_name": "regressor2",
            "jitter": 0.2,
        },
        "reg3": {
            "instrument_name": "",
            "measure_name": "common_regressor",
            "jitter": 0.3,
        },
        "reg4": {
            "instrument_name": "i2",
            "measure_name": "regressor1",
            "jitter": 0.4,
        },
    }

    assert len(regs.regression) == len(expected_regs)
    for reg_name, expected_reg in expected_regs.items():
        assert regs.regression[reg_name] == expected_reg
Ejemplo n.º 4
0
    def __init__(
            self,
            dae_config=None,
            config_file="DAE.conf",
            work_dir=None,
            defaults=None,
            load_eagerly=False):
        if dae_config is None:
            # FIXME Merge defaults with newly-loaded config
            assert not defaults, defaults
            if work_dir is None:
                work_dir = os.environ["DAE_DB_DIR"]
            config_file = os.path.join(work_dir, config_file)
            dae_config = GPFConfigParser.load_config(
                config_file, dae_conf_schema
            )

        self.dae_config = dae_config
        self.dae_db_dir = work_dir
        self.__autism_gene_profile_config = None
        self.load_eagerly = load_eagerly

        if load_eagerly:
            self.genomes_db
            self.gene_sets_db
            self._gene_info_config
            self._pheno_db
            self._variants_db
            self._gene_info_config
            self.denovo_gene_sets_db
            self._score_config
            self._scores_factory
            self.genotype_storage_db
            self._common_report_facade
            self._background_facade
Ejemplo n.º 5
0
def test_handle_regressions_default_jitter(mocker, fake_phenotype_data,
                                           output_dir,
                                           fake_phenotype_data_desc_conf):
    def fake_build_regression(*args):
        return {"pvalue_regression_male": 0, "pvalue_regression_female": 0}

    mocked = mocker.patch(
        "dae.pheno_browser.prepare_data."
        "PreparePhenoBrowserBase.build_regression",
        side_effect=fake_build_regression,
    )

    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.m1")
    for i in prep.handle_regressions(regressand):
        pass

    mocked.assert_called()
    measure, reg_measure, jitter = mocked.call_args_list[0][0]
    assert jitter == 0.12
    measure, reg_measure, jitter = mocked.call_args_list[1][0]
    assert jitter == 0.13
Ejemplo n.º 6
0
    def _gene_info_config(self):
        logger.debug(
            f"loading gene info config file: "
            f"{self.dae_config.gene_info_db.conf_file}")

        return GPFConfigParser.load_config(
            self.dae_config.gene_info_db.conf_file, gene_info_conf
        )
Ejemplo n.º 7
0
    def _autism_gene_profile_config(self):
        agp_config = self.dae_config.autism_gene_tool_config
        if agp_config is None or not os.path.exists(agp_config.conf_file):
            return None

        return GPFConfigParser.load_config(
            self.dae_config.autism_gene_tool_config.conf_file,
            autism_gene_tool_config
        )
Ejemplo n.º 8
0
def test_config_parser_load_single(conf_schema_basic, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "basic_conf.toml"), conf_schema_basic
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Basic test config"
    assert config.section1.someval1 == "beep"
    assert config.section1.someval2 == 1.23
    assert config.section1.someval3 == 52345
Ejemplo n.º 9
0
def test_handle_regressions_regressand_is_regressor(
        fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf):
    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.age")

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand))
Ejemplo n.º 10
0
def test_config_parser_set_config(conf_schema_set, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "set_conf.toml"), conf_schema_set
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Set test config"
    assert config.section1.someval1 == "ala"
    assert isinstance(config.section1.someval2, set)
    assert (config.section1.someval2 ^ {"a", "b", "c", "d"}) == set()
    assert config.section1.someval3 == 123
Ejemplo n.º 11
0
def test_config_parser_string_interpolation(conf_schema_strings, fixtures_dir):
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "vars_conf.toml"), conf_schema_strings
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Vars test config"
    assert config.vars is None
    assert config.section1.someval1 == "asdf"
    assert config.section1.someval2 == "ghjkl"
    assert config.section1.someval3 == "qwertyasdfghjk"
Ejemplo n.º 12
0
def test_config_parser_load_paths(conf_schema_path, fixtures_dir, mocker):
    patch = mocker.patch("os.path.exists")
    patch.return_value = True
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "path_conf.toml"), conf_schema_path
    )
    print(config)
    assert config.id == "152135"
    assert config.name == "Path test config"
    assert config.some_abs_path == "/tmp/maybesomeconf.toml"
    assert config.some_rel_path == os.path.join(
        fixtures_dir, "environ_conf.toml"
    )
Ejemplo n.º 13
0
def test_handle_regressions_non_continuous_or_ordinal_measure(
        fake_phenotype_data, output_dir, fake_phenotype_data_desc_conf):
    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand_categorical = fake_phenotype_data.get_measure("i1.m5")
    regressand_raw = fake_phenotype_data.get_measure("i1.m6")

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand_categorical))

    with pytest.raises(StopIteration):
        next(prep.handle_regressions(regressand_raw))
Ejemplo n.º 14
0
def test_config_parser_env_interpolation(
    conf_schema_basic, fixtures_dir, mocker
):
    mocker.patch.dict(os.environ, {"test_env_var": "bop"})
    config = GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "env_interpolation_conf.toml"),
        conf_schema_basic,
    )

    print(config)
    assert config.id == "152135"
    assert config.name == "Environment interpolation test config"
    assert config.section1.someval1 == "bop"
    assert config.section1.someval2 == 1.23
    assert config.section1.someval3 == 52345
Ejemplo n.º 15
0
    def __init__(self, dae_dir, conf_file=None):
        self.dae_dir = dae_dir
        if not conf_file:
            conf_file = f"{dae_dir}/genomesDB.conf"

        self.config = GPFConfigParser.load_config(conf_file, genomes_db_conf)

        self._genomes = {}

        for section_id, genome_config in self.config.genome.items():
            genome = Genome.load_config(genome_config, section_id)
            assert genome is not None
            self._genomes[genome.genome_id] = genome

        assert self.config.genomes.default_genome in self._genomes
        self.default_genome = self._genomes[self.config.genomes.default_genome]
Ejemplo n.º 16
0
def test_has_regression_measure(fake_phenotype_data, output_dir,
                                regressions_conf):
    reg = GPFConfigParser.load_config(regressions_conf, regression_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)

    expected_reg_measures = [
        ("regressor1", "i1"),
        ("regressor2", "i1"),
        ("common_regressor", ""),
        ("common_regressor", "i1"),
        ("common_regressor", "i2"),
        ("regressor1", "i2"),
    ]

    for e in expected_reg_measures:
        assert prep._has_regression_measure(*e)
Ejemplo n.º 17
0
    def __init__(self, score_filename, config_filename=None):
        self.score_filename = score_filename
        assert os.path.exists(self.score_filename), self.score_filename

        if config_filename is None:
            config_filename = "{}.conf".format(self.score_filename)

        self.config = GPFConfigParser.load_config(config_filename,
                                                  score_file_conf_schema)

        assert self.config.general.header is not None
        assert self.config.columns.score is not None
        self.header = self.config.general.header
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"header {self.header}")
        self.score_names = self.config.columns.score

        self.schema = Schema.from_dict(self.config.score_schema).order_as(
            self.header)
        logger.debug(f"score file {os.path.basename(self.score_filename)} "
                     f"schema {self.schema.col_names}")

        assert all([sn in self.schema for sn in self.score_names]), [
            self.score_filename,
            self.score_names,
            self.schema.col_names,
        ]

        self.chr_index = self.schema.col_names.index(self.chr_name)
        self.pos_begin_index = self.schema.col_names.index(self.pos_begin_name)
        self.pos_end_index = self.schema.col_names.index(self.pos_end_name)

        self.chr_prefix = getattr(self.config.general, "chr_prefix", False)

        self.no_score_value = self.config.general.no_score_value or "na"
        if self.no_score_value.lower() in ("na", "none"):
            self.no_score_value = None

        self._init_access()
Ejemplo n.º 18
0
def test_handle_regressions(mocker, fake_phenotype_data, output_dir,
                            fake_phenotype_data_desc_conf):
    def fake_build_regression(dependent_measure, independent_measure, jitter):
        return {
            "regressand": dependent_measure,
            "regressor": independent_measure,
            "jitter": jitter,
            "pvalue_regression_male": 0,
            "pvalue_regression_female": 0,
        }

    mocked = mocker.patch(
        "dae.pheno_browser.prepare_data."
        "PreparePhenoBrowserBase.build_regression",
        side_effect=fake_build_regression,
    )

    reg = GPFConfigParser.load_config(fake_phenotype_data_desc_conf,
                                      pheno_conf_schema)
    prep = PreparePhenoBrowserBase("fake", fake_phenotype_data, output_dir,
                                   reg)
    regressand = fake_phenotype_data.get_measure("i1.m1")

    res = [r for r in prep.handle_regressions(regressand) if r is not None]
    assert len(res) == 2
    assert sorted([r["regression_id"] for r in res]) == sorted(["age", "nviq"])

    mocked.assert_called()
    measure, reg_measure, jitter = mocked.call_args_list[0][0]
    assert measure.measure_id == "i1.m1"
    assert reg_measure.measure_id == "i1.age"
    assert jitter == 0.12
    measure, reg_measure, jitter = mocked.call_args_list[1][0]
    assert measure.measure_id == "i1.m1"
    assert reg_measure.measure_id == "i1.iq"
    assert jitter == 0.13
Ejemplo n.º 19
0
def main(argv):

    try:
        # Setup argument parser

        gpf_instance = GPFInstance()
        dae_conf = gpf_instance.dae_config

        parser = pheno_cli_parser()
        args = parser.parse_args(argv)
        if args.instruments is None:
            print("missing instruments directory parameter", sys.stderr)
            raise ValueError()
        if args.pedigree is None:
            print("missing pedigree filename", sys.stderr)
            raise ValueError()
        if args.pheno_name is None:
            print("missing pheno db name", sys.stderr)
            raise ValueError()

        args.pheno_name = verify_phenotype_data_name(args.pheno_name)

        pheno_db_dir = os.path.join(dae_conf.phenotype_data.dir,
                                    args.pheno_name)
        if not os.path.exists(pheno_db_dir):
            os.makedirs(pheno_db_dir)

        args.pheno_db_filename = os.path.join(pheno_db_dir,
                                              "{}.db".format(args.pheno_name))
        if os.path.exists(args.pheno_db_filename):
            if not args.force:
                print("pheno db filename already exists:",
                      args.pheno_db_filename)
                raise ValueError()
            else:
                os.remove(args.pheno_db_filename)

        args.browser_dir = os.path.join(pheno_db_dir, "browser")
        if not os.path.exists(args.browser_dir):
            os.makedirs(args.browser_dir)

        config = parse_phenotype_data_config(args)
        if args.regression:
            regressions = GPFConfigParser.load_config(args.regression,
                                                      regression_conf_schema)
        else:
            regressions = None

        prep = PrepareVariables(config)
        prep.build_pedigree(args.pedigree)
        prep.build_variables(args.instruments, args.data_dictionary)

        build_pheno_browser(
            args.pheno_db_filename,
            args.pheno_name,
            args.browser_dir,
            regressions,
        )

        pheno_conf_path = os.path.join(pheno_db_dir,
                                       "{}.conf".format(args.pheno_name))

        with open(pheno_conf_path, "w") as pheno_conf_file:
            pheno_conf_file.write(
                toml.dumps(generate_phenotype_data_config(args, regressions)))

        return 0
    except KeyboardInterrupt:
        return 0
    except Exception as e:
        traceback.print_exc()

        program_name = "simple_pheno_import.py"
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2
Ejemplo n.º 20
0
def get_person_set_collections_config(config_path):
    return GPFConfigParser.load_config(
        config_path, {"person_set_collections": person_set_collections_schema},
    ).person_set_collections
Ejemplo n.º 21
0
def main():
    start_time = time.time()

    opts = get_argument_parser().parse_args()

    config = GPFConfigParser.load_config(opts.config, score_file_conf_schema)

    score_histograms_info = []

    score_columns = opts.scores
    if score_columns is not None:
        score_columns = [
            int(el) if el.isdigit() else el for el in score_columns.split(",")
        ]
    else:
        score_columns = list(config.genomic_scores.keys())

    for score, score_column in zip(config.genomic_scores.keys(),
                                   score_columns):
        histogram_info = config.genomic_scores[score]

        bin_range = None
        if histogram_info.range:
            bin_range = list(map(float, histogram_info.range))

        score_histograms_info.append(
            ScoreHistogramInfo(
                score,
                score_column,
                histogram_info.file,
                histogram_info.xscale,
                histogram_info.yscale,
                histogram_info.bins,
                bin_range,
            ))

    if opts.infile == "-":
        sys.stderr.write("You must provide input file!\n")
        sys.exit(-78)

    input_files = opts.infile.split(",")

    start = opts.s
    end = opts.e
    if start is not None and end is not None:
        if start.isdigit():
            start = int(opts.s)
        if end.isdigit():
            end = int(opts.e)

    round_pos = opts.r
    if round_pos is not None:
        round_pos = int(round_pos)

    chunk_size = opts.chunk_size
    if chunk_size:
        chunk_size = int(chunk_size)

    for input_file in input_files:
        if not exists(input_file):
            sys.stderr.write("The given input file does not exist!\n")
            sys.exit(-78)

    gsh = GenerateScoresHistograms(input_files, score_histograms_info,
                                   round_pos, chunk_size, start, end)
    gsh.generate_scores_histograms()

    sys.stderr.write(
        "The program was running for [h:m:s]: " +
        str(datetime.timedelta(seconds=round(time.time() - start_time, 0))) +
        "\n")
Ejemplo n.º 22
0
def dae_config_fixture():
    return GPFConfigParser.load_config(
        relative_to_this_folder("fixtures/DAE.conf"), dae_conf_schema
    )
Ejemplo n.º 23
0
def main(argv=None):  # IGNORE:C0111
    """Command line options."""

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_shortdesc = __import__("__main__").__doc__.split("\n")[1]
    program_license = """%s

USAGE
""" % (program_shortdesc, )

    try:
        # Setup argument parser
        parser = ArgumentParser(
            description=program_license,
            formatter_class=RawDescriptionHelpFormatter,
        )
        parser.add_argument(
            "-v",
            "--verbose",
            dest="verbose",
            action="count",
            help="set verbosity level [default: %(default)s]",
        )
        parser.add_argument(
            "-d",
            "--dbfile",
            dest="dbfile",
            help="pheno db file anme",
            metavar="path",
        )
        parser.add_argument("-p",
                            "--pheno",
                            dest="pheno_name",
                            help="pheno name")
        parser.add_argument(
            "-o",
            "--output",
            dest="output",
            help="output base dir",
            metavar="path",
        )

        parser.add_argument(
            "--regression",
            help=("path to a regression configuration file"),
            type=str,
        )

        # Process arguments
        args = parser.parse_args()

        if not args.output or not os.path.exists(args.output):
            raise CLIError("output directory should be specified and empty")

        if not args.pheno_name:
            raise CLIError("pheno name must be specified")
        if not args.dbfile or not os.path.exists(args.dbfile):
            raise CLIError("pheno db file name must be specified")

        regressions = (GPFConfigParser.load_config(args.regression,
                                                   regression_conf_schema)
                       if args.regression else None)

        build_pheno_browser(args.dbfile, args.pheno_name, args.output,
                            regressions)

        return 0
    except KeyboardInterrupt:
        return 1
    except Exception as e:
        traceback.print_exc()
        print()
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        return 2
Ejemplo n.º 24
0
def test_config_parser_load_incorrect_paths(conf_schema_path, fixtures_dir):
    GPFConfigParser.load_config(
        os.path.join(fixtures_dir, "wrong_path_conf.toml"), conf_schema_path
    )
Ejemplo n.º 25
0
 def _score_config(self):
     return GPFConfigParser.load_config(
         self.dae_config.genomic_scores_db.conf_file, genomic_scores_schema
     )