Esempio n. 1
0
def test_position_score_annotator_indels_test_score(chrom, pos, ref, alt, t1,
                                                    t2, t3, genomes_db_2013):

    score_filename = "fixtures/TESTphyloP100way/TESTphyloP100way.bedGraph.gz"

    options = {
        "mode": "replace",
        "vcf": True,
        "direct": False,
        "region": None,
        "scores_file": relative_to_this_test_folder(score_filename),
    }

    columns = OrderedDict([("TEST", "TEST"), ("TEST2", "TEST2"),
                           ("TEST3", "TEST3")])

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "score_annotator.PositionScoreAnnotator",
        "virtual_columns": [],
    })

    score_annotator = PositionScoreAnnotator(config, genomes_db_2013)
    assert score_annotator is not None

    line = {"CHROM": chrom, "POS": pos, "REF": ref, "ALT": alt}
    score_annotator.line_annotation(line)
    assert float(line["TEST"]) == pytest.approx(t1, 1e-4)
    assert float(line["TEST2"]) == pytest.approx(t2, 1e-4)
    assert float(line["TEST3"]) == pytest.approx(t3, 1e-4)
def effect_annotator(genomes_db_2013):
    options = {
        "vcf": True,
        "direct": False,
        "r": "reference",
        "a": "alternative",
        "c": "chrom",
        "p": "position",
        "prom_len": 0,
    }

    columns = {
        "effect_type": "effect_type_1",
        "effect_genes": "effect_genes_1",
        "effect_gene_genes": "effect_gene_genes_1",
        "effect_gene_types": "effect_gene_types_1",
        "effect_details": "effect_details_1",
        "effect_details_transcript_ids": "effect_details_transcript_ids_1",
        "effect_details_details": "effect_details_details_1",
    }

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "effect_annotator.VariantEffectAnnotator",
        "virtual_columns": [],
    })

    annotator = VariantEffectAnnotator(config, genomes_db_2013)
    assert annotator is not None

    return annotator
Esempio n. 3
0
    def build(options, config_file, genomes_db):
        pipeline_config = \
            AnnotationConfigParser.read_and_parse_file_configuration(
                options, config_file
            )
        assert pipeline_config.sections

        pipeline = PipelineAnnotator(pipeline_config, genomes_db)
        output_columns = list(pipeline.config.output_columns)
        for section_config in pipeline_config.sections:
            annotator = AnnotatorFactory.make_annotator(
                section_config, genomes_db)
            pipeline.add_annotator(annotator)
            output_columns.extend([
                col for col in annotator.config.output_columns
                if col not in output_columns
            ])

        # FIXME
        # The lines below are a hack to allow modification
        # of the "output_columns" key in an otherwise frozen Box
        # This should be fixed properly when the annotation pipeline
        # module is refactored
        pipeline_config = pipeline.config.to_dict()
        pipeline_config["output_columns"] = output_columns
        pipeline.config = FrozenBox(pipeline_config)

        return pipeline
Esempio n. 4
0
def test_lift_over(mocker, chrom, pos, lift_over, expected, genomes_db_2013):

    options = {
        "mode": "replace",
        "vcf": True,
        "direct": True,
        "region": None,
        "chain_file": "fake_chain_file",
        "c": "chrom",
        "p": "pos",
        "liftover": "lo1",
    }

    columns = {
        "new_x": "hg19_location",
    }

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "lift_over_annotator.LiftOverAnnotator",
            "virtual_columns": [],
        }
    )
    mocker.patch(
        "dae.annotation.tools.lift_over_annotator."
        "LiftOverAnnotator.load_liftover_chain")

    mocker.patch(
        "dae.annotation.tools.lift_over_annotator."
        "LiftOverAnnotator.load_target_genome")

    annotator = LiftOverAnnotator(config, genomes_db_2013)
    assert annotator is not None

    annotator.liftover = mocker.Mock()
    annotator.liftover.convert_coordinate = lift_over
    annotator.target_genome = mocker.Mock()
    annotator.target_genome.get_sequence = mock_get_sequence

    aline = {
        "chrom": chrom,
        "pos": pos,
    }
    allele = SummaryAllele(chrom, pos, "A", "T")
    liftover_variants = {}
    annotator.do_annotate(aline, allele, liftover_variants)

    lo_variant = liftover_variants.get("lo1")
    print(f"liftover variant: {lo_variant}")
    lo_location = lo_variant.details.cshl_location if lo_variant else None

    assert expected == lo_location
Esempio n. 5
0
def test_annotator_base_simple(genomes_db_2013):
    section_config = AnnotationConfigParser.parse_section({
        "options": {},
        "columns": {
            "CSHL_chr": "chr",
            "CSHL_position": "pos"
        },
        "virtual_columns": [],
        "annotator":
        "annotator_base.AnnotatorBase",
    })

    annotator = AnnotatorBase(section_config, genomes_db_2013)
    assert annotator is not None
Esempio n. 6
0
def test_vcf_info_annotator(fixture_dirname, genomes_db_2013):
    score_filename = fixture_dirname(
        "vcf_scores/gnomad.genomes.r2.1.1.sites.21.1_622.vcf.gz")

    columns = {
        "AC": "genome_gnomad_ac",
        "AF": "genome_gnomad_af",
        "AF_percent": "genome_gnomad_af_percent",
    }

    options = {
        "vcf": True,
        "c": "chrom",
        "p": "position",
        "r": "reference",
        "a": "alternative",
        "scores_file": score_filename,
    }

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "vcf_info_annotator.VcfInfoAnnotator",
        "virtual_columns": [],
    })

    annotator = VcfInfoAnnotator(config, genomes_db_2013)
    assert annotator is not None

    vcf_filename = fixture_dirname(
        "vcf_scores/gnomad.genomes.r2.1.1.sites.21.trio.vcf.gz")
    pedigree_filename = fixture_dirname("vcf_scores/trio.ped")
    assert os.path.exists(vcf_filename)
    assert os.path.exists(pedigree_filename)

    families_loader = FamiliesLoader(pedigree_filename)
    families = families_loader.load()

    loader = VcfLoader(families, [vcf_filename], genomes_db_2013.get_genome())
    assert loader is not None

    for summary_variant, _ in loader.full_variants_iterator():
        liftover_variants = {}
        annotator.annotate_summary_variant(summary_variant, liftover_variants)

        for aa in summary_variant.alt_alleles:
            af = aa.get_attribute("genome_gnomad_af_percent")
            logger.debug(f"summary variant: {aa}; gnomad AF {af}%")
            assert af is not None
Esempio n. 7
0
def test_variant_score_annotator_cadd(
    expected_df, variants_io, direct, capsys, genomes_db_2013
):

    options = {
        "vcf": True,
        "direct": direct,
        "mode": "overwrite",
        "scores_file": relative_to_this_test_folder(
            "fixtures/TESTCADD/TESTwhole_genome_SNVs.tsv.gz"
        )
    }

    columns = OrderedDict(
        [("RawScore", "RESULT_RawScore"), ("PHRED", "RESULT_PHRED")]
    )

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "score_annotator.VariantScoreAnnotator",
            "virtual_columns": [],
        }
    )
    print(config.options)
    print(type(config.options))

    with variants_io("fixtures/input2.tsv") as io_manager:
        score_annotator = NPScoreAnnotator(config, genomes_db_2013)
        assert score_annotator is not None

        captured = capsys.readouterr()

        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)

    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(input2_cadd_expected),
        rtol=10e-3,
    )
Esempio n. 8
0
def test_variant_multi_score_annotator_multi(
    expected_df, variants_io, direct, capsys, genomes_db_2013
):

    options = {
        "vcf": True,
        "direct": direct,
        "mode": "overwrite",
        "scores_directory": relative_to_this_test_folder("fixtures/"),
    }

    columns = OrderedDict(
        [
            ("TESTphastCons100way", "RESULT_phastCons100way"),
            ("TESTphyloP100way", "RESULT_phyloP100way"),
        ]
    )

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "score_annotator.VariantScoreAnnotator",
            "virtual_columns": [],
        }
    )
    print(config.options)
    print(type(config.options))

    with variants_io("fixtures/input2.tsv") as io_manager:
        score_annotator = PositionMultiScoreAnnotator(config, genomes_db_2013)
        assert score_annotator is not None

        captured = capsys.readouterr()

        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)
    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(input2_phast_pylo_expected),
        rtol=10e-3,
    )
Esempio n. 9
0
def test_bigwig_access_indels(
    expected_df, capsys, variants_io, genomes_db_2013
):

    options = Box(
        {
            "mode": "overwrite",
            "scores_file": relative_to_this_test_folder(
                "fixtures/TESTbigwig/TEST_bigwig_score.bw"
            ),
        },
        default_box=True,
        default_box_attr=None,
    )

    config = AnnotationConfigParser.parse_section(
        Box(
            {
                "options": options,
                "columns": {"TEST_bigwig_score": "RESULT_bigwig_score"},
                "annotator": "score_annotator.PositionScoreAnnotator",
            }
        ),
        genomes_db_2013,
    )
    print(config.options)
    print(type(config.options))

    with variants_io("fixtures/bigwig_indels.tsv") as io_manager:
        score_annotator = PositionScoreAnnotator(config)
        assert score_annotator is not None
        assert isinstance(score_annotator.score_file.accessor, BigWigAccess)
        captured = capsys.readouterr()
        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)
    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(expected_bw_output),
        check_less_precise=3,
    )
Esempio n. 10
0
def test_position_score_annotator_indels(expected_df, variants_io, capsys,
                                         genomes_db_2013, direct, infile,
                                         expected):

    score_filename = "fixtures/TEST3phyloP100way/TEST3phyloP100way.bedGraph.gz"

    options = {
        "mode": "replace",
        "vcf": True,
        "direct": direct,
        "region": None,
        "scores_file": relative_to_this_test_folder(score_filename),
    }

    columns = {
        "TESTphyloP100way": "RESULT_phyloP100way",
    }

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "score_annotator.NPScoreAnnotator",
        "virtual_columns": [],
    })

    with variants_io(infile, options) as io_manager:
        score_annotator = PositionScoreAnnotator(config, genomes_db_2013)
        assert score_annotator is not None

        captured = capsys.readouterr()

        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)
    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(expected),
        rtol=10e-3,
        check_names=False,
    )
Esempio n. 11
0
def test_frequency_annotator(
        variants_io, expected_df, capsys, genomes_db_2013):

    options = {
        "vcf": True,
        "direct": False,
        "mode": "overwrite",
        "scores_file": relative_to_this_test_folder(
            "fixtures/TESTFreq/test_freq.tsv.gz"
        ),
    }

    columns = {"all_altFreq": "RESULT_FREQ", "all_altFreq2": "RESULT_FREQ_2"}

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "frequency_annotator.FrequencyAnnotator",
            "virtual_columns": [],
        }
    )

    with variants_io("fixtures/freq_test_1.tsv") as io_manager:
        freq_annotator = FrequencyAnnotator(config, genomes_db_2013)
        assert freq_annotator is not None

        captured = capsys.readouterr()

        freq_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)

    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(expected_result_freq),
        rtol=10e-3)

    print(captured.err)
Esempio n. 12
0
def test_np_score_annotator_indels(expected_df, variants_io, capsys,
                                   genomes_db_2013, direct, infile, expected):

    score_filename = "fixtures/TEST3CADD/TEST3whole_genome_SNVs.tsv.gz"

    options = {
        "mode": "replace",
        "vcf": True,
        "direct": direct,
        "region": None,
        "scores_file": relative_to_this_test_folder(score_filename),
    }

    columns = OrderedDict([("RawScore", "RESULT_RawScore"),
                           ("PHRED", "RESULT_PHRED")])

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "score_annotator.NPScoreAnnotator",
        "virtual_columns": [],
    })

    with variants_io(infile, options) as io_manager:
        score_annotator = NPScoreAnnotator(config, genomes_db_2013)
        assert score_annotator is not None

        captured = capsys.readouterr()

        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)
    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(expected),
        rtol=10e-3,
        check_names=False,
    )
Esempio n. 13
0
def test_cleanup_annotator(capsys, variants_io, genomes_db_2013):
    section_config = AnnotationConfigParser.parse_section({
        "options": {},
        "columns": {
            "cleanup": "id, variant"
        },
        "annotator":
        "cleanup_annotator.CleanupAnnotator",
        "virtual_columns": [],
    })

    with variants_io("fixtures/input.tsv") as io_manager:
        annotator = CleanupAnnotator(section_config, genomes_db_2013)
        assert annotator is not None
        capsys.readouterr()
        annotator.annotate_file(io_manager)

    captured = capsys.readouterr()

    print(captured.out)
    print(captured.err)
    assert captured.out == expected_cleanup_output
Esempio n. 14
0
    def _build_annotator_for(self, score_name):
        assert os.path.exists(
            self.config.options.scores_directory
        ), self.config.options.scores_directory

        score_filename = self._get_score_file(score_name)

        options = GPFConfigParser.modify_tuple(
            self.config.options, {"scores_file": score_filename}
        )
        columns = {score_name: getattr(self.config.columns, score_name)}

        variant_config = AnnotationConfigParser.parse_section({
                "options": options,
                "columns": columns,
                "annotator": "score_annotator.VariantScoreAnnotator",
                "virtual_columns": [],
            }
        )

        annotator = PositionScoreAnnotator(variant_config, self.genomes_db)
        return annotator
Esempio n. 15
0
def test_variant_score_annotator_simple(
        expected_df, variants_io, direct, capsys, genomes_db_2013):

    options = {
        "vcf": True,
        "direct": direct,
        "mode": "overwrite",
        "scores_file": relative_to_this_test_folder(
            "fixtures/TESTphastCons100way/TESTphastCons100way.bedGraph.gz"
        ),
    }

    columns = {
        "TESTphastCons100way": "RESULT_phastCons100way",
    }

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "score_annotator.VariantScoreAnnotator",
            "virtual_columns": [],
        }
    )

    with variants_io("fixtures/input2.tsv") as io_manager:
        score_annotator = PositionScoreAnnotator(config, genomes_db_2013)
        assert score_annotator is not None

        captured = capsys.readouterr()

        score_annotator.annotate_file(io_manager)

    captured = capsys.readouterr()
    print(captured.err)
    print(captured.out)
    pd.testing.assert_frame_equal(
        expected_df(captured.out),
        expected_df(input2_phast_exptected),
        rtol=10e-3)
Esempio n. 16
0
def test_vcf_info_extractor(capsys, vcf_io, genomes_db_2013):

    expected_output = (
        "extracted-AC\textracted-AB\textracted-AT\textracted-AZ\n"
        "0\t7.324234\t4.453e-10\ttest1\n"
        "2\t\t6.453e+10\ttest2\n"
        "4\t11.324234\t\ttest3\n"
        "\t13.324234\t10.453e+10\t\n")

    opts = {"mode": "overwrite"}

    section_config = AnnotationConfigParser.parse_section({
        "options":
        opts,
        "columns": {
            "AC": "extracted-AC",
            "AB": "extracted-AB",
            "AT": "extracted-AT",
            "AZ": "extracted-AZ",
        },
        "annotator":
        "vcf_info_extractor.VCFInfoExtractor",
        "virtual_columns": [],
    })

    with vcf_io as io_manager:
        annotator = VCFInfoExtractor(section_config, genomes_db_2013)
        assert annotator is not None
        capsys.readouterr()
        annotator.annotate_file(io_manager)

    # print(variants_input.output)
    captured = capsys.readouterr()

    print(captured.out)
    print(captured.err)
    print(expected_output)
    assert captured.out == expected_output
Esempio n. 17
0
def test_copy_annotator_simple(capsys, variants_io1, genomes_db_2013):
    section_config = AnnotationConfigParser.parse_section({
        "options": {},
        "columns": {
            "location": "loc1",
            "variant": "var1"
        },
        "virtual_columns": [],
        "annotator":
        "annotator_base.CopyAnnotator",
    })

    with variants_io1 as io_manager:
        annotator = CopyAnnotator(section_config, genomes_db_2013)
        assert annotator is not None
        capsys.readouterr()
        annotator.annotate_file(io_manager)

    # print(variants_input.output)
    captured = capsys.readouterr()

    print(captured.out)
    print(captured.err)
Esempio n. 18
0
def test_copy_annotator_multi(capsys, variants_io_m, expected_df,
                              genomes_db_2013):
    section_config = AnnotationConfigParser.parse_section({
        "options": {},
        "columns": {
            "location": "loc1",
            "variant": "var1"
        },
        "virtual_columns": [],
        "annotator":
        "annotator_base.CopyAnnotator",
    })

    df = pd.read_csv(relative_to_this_test_folder("fixtures/input_multi.tsv"),
                     sep="\t")
    print(df)

    print(df[["test1", "test2"]])

    with variants_io_m as io_manager:
        annotator = CopyAnnotator(section_config, genomes_db_2013)
        assert annotator is not None
        capsys.readouterr()
        annotator.annotate_file(io_manager)

    # print(variants_input.output)
    captured = capsys.readouterr()

    print(captured.out)
    print(captured.err)
    res_df = expected_df(captured.out)
    print(res_df)
    print(res_df[["test1", "test2"]])

    pd.testing.assert_frame_equal(df[["test1", "test2"]],
                                  res_df[["test1", "test2"]])