Esempio n. 1
0
    def test_fusion(self):

        t = Transcript()
        t.chrom, t.strand, t.start, t.end, t.id, t.parent = "Chr1", "+", 101, 1000, "foo.1", "foo"
        t.add_exons([(101, 500), (601, 800), (901, 1000)])
        t.finalize()
        t2 = Transcript()
        t2.chrom, t2.strand, t2.start, t2.end, t2.id, t2.parent = "Chr1", "+", 2001, 3000, "bar.1", "bar"
        t2.add_exons([(2001, 2500), (2601, 2800), (2901, 3000)])
        t2.finalize()

        t3 = Transcript()
        t3.chrom, t3.strand, t3.start, t3.end, t3.id, t3.parent = "Chr1", "+", 651, 2703, "faz.1", "faz"
        t3.add_exons([(651, 800), (901, 1300), (2230, 2500), (2601, 2703)])
        t3.finalize()

        logger = create_default_logger("test_fusion")
        with tempfile.TemporaryDirectory() as folder:
            with open(os.path.join(folder, "reference.gtf"),
                      "wt") as reference:
                print(t.format("gtf"), file=reference)
                print(t2.format("gtf"), file=reference)
            self.assertTrue(os.path.exists(reference.name))
            _ = [_ for _ in parser_factory(reference.name)]
            try:
                indexing.create_index(parser_factory(reference.name), logger,
                                      "{}.midx".format(reference.name))
            except InvalidParsingFormat:
                self.assertFalse(
                    True,
                    "\n".join([line.rstrip()
                               for line in open(reference.name)]))
            namespace = Namespace(default=False)
            namespace.out = os.path.join(folder, "out")
            for report in (False, True):
                with self.subTest(report=report):
                    namespace.report_fusions = report
                    assigner = Assigner("{}.midx".format(reference.name),
                                        args=namespace,
                                        printout_tmap=False)
                    result = assigner.get_best(t3)
                    if report:
                        self.assertTrue(len(result), 2)
                        self.assertTrue(result[0].ccode == ("f", "j"),
                                        str(result[0]))
                        self.assertTrue(result[1].ccode == ("f", "j"),
                                        str(result[1]))
                    else:
                        self.assertTrue(result.ccode == ("j", ), str(result))
Esempio n. 2
0
    def test_get_external(self):
        checked_conf = load_and_validate_config(None).copy()
        checked_conf.pick.output_format.report_all_external_metrics = True
        transcript = Transcript()
        transcript.chrom = "15"
        transcript.source = "protein_coding"
        transcript.start = 47631264
        transcript.end = 48051999

        exons = [(47631264, 47631416), (47704590, 47704669),
                 (47762671, 47762742), (47893062, 47893093),
                 (47895572, 47895655), (48051942, 48051999)]

        transcript.strand = "+"
        transcript.add_exons(exons)
        transcript.id = "ENST00000560636"
        transcript.parent = "ENSG00000137872"
        transcript2 = transcript.copy()
        transcript2.id = "ENST00000560637"
        checked_conf.scoring.scoring["attributes.tpm"] = MinMaxScore.Schema(
        ).load({
            "rescaling": "max",
            "default": 0,
            "rtype": "float",
            'multiplier': 4,
            'use_raw': True,
            'percentage': True
        })
        transcript.attributes["tpm"] = 10

        int_source = ExternalSource('int', 'int', 0)
        float_source = ExternalSource('float', 'float', 0)
        bool_source = ExternalSource('bool', 'bool', 0)

        raw_int_source = ExternalSource('raw_int', 'int', 1)
        raw_float_source = ExternalSource('raw_float', 'float', 1)
        raw_bool_source = ExternalSource('raw_bool', 'bool', 1)

        int_score = External(1, 1, 10)
        float_score = External(1, 2, 10.0)
        bool_score = External(
            1, 3, int(False)
        )  # We cast as int here following external.py serialize function

        raw_int_score = External(1, 4, 8)
        raw_float_score = External(1, 5, 8.0)
        raw_bool_score = External(
            1, 6, int(True)
        )  # We cast as int here following external.py serialize function

        query = Query(transcript.id, transcript.cdna_length)
        query2 = Query(transcript2.id, transcript2.cdna_length)

        engine = create_engine("sqlite:///:memory:")
        db.metadata.create_all(engine)
        SessionMaker = sessionmaker(bind=engine)
        session = SessionMaker()
        session.add_all([
            int_source, float_source, bool_source, raw_int_source,
            raw_float_source, raw_bool_source
        ])
        session.add_all([query, query2])
        session.add_all([
            int_score, float_score, bool_score, raw_int_score, raw_float_score,
            raw_bool_score
        ])
        session.commit()
        sup = Superlocus(transcript, configuration=checked_conf)
        sup.session = session
        tid = transcript.id
        self.assertIn(tid, sup.transcripts)
        from collections import namedtuple
        qobj = {1: namedtuple('t', field_names=('query_name'))}
        qobj[1].query_name = 'ENST00000560636'
        external = asyncio.run(sup.get_external(qobj, [1]))

        self.assertEqual(
            external, {
                'ENST00000560636': {
                    'int': (10, False),
                    'float': (10.0, False),
                    'bool': (False, False),
                    'raw_int': (8, True),
                    'raw_float': (8.0, True),
                    'raw_bool': (True, True)
                }
            })

        sup.configuration.pick.output_format.report_all_external_metrics = False
        external = asyncio.run(sup.get_external(qobj, [1]))
        self.assertEqual(len(external), 0)
        # These are meaningless it's just to verify we are loading *only* these metrics.
        # We should *NOT* have 'float' as it is not present in any section.
        sup.configuration.scoring.scoring["external.int"] = MinMaxScore(
            rescaling="max", filter=None)
        sup.configuration.scoring.requirements.parameters[
            "external.raw_float"] = SizeFilter(operator="gt", value=100)
        sup.configuration.scoring.cds_requirements.parameters[
            "external.raw_int"] = SizeFilter(operator="lt", value=1)
        sup.configuration.scoring.as_requirements.parameters[
            "external.raw_bool"] = SizeFilter(operator="lt", value=1)
        sup.configuration.scoring.not_fragmentary.parameters[
            "external.bool"] = SizeFilter(operator="ne", value=False)
        external = asyncio.run(sup.get_external(qobj, [1]))
        self.assertEqual(
            external, {
                'ENST00000560636': {
                    'int': (10, False),
                    'raw_float': (8.0, True),
                    'bool': (False, False),
                    'raw_int': (8, True),
                    'raw_bool': (True, True)
                }
            })
Esempio n. 3
0
    def test_retrieval(self):
        engine = create_engine("sqlite:///:memory:")
        db.metadata.create_all(engine)
        SessionMaker = sessionmaker(bind=engine)
        session = SessionMaker()

        transcript = Transcript(accept_undefined_multi=True)
        transcript.chrom = "15"
        transcript.source = "protein_coding"
        transcript.start = 47631264
        transcript.end = 48051999

        exons = [(47631264, 47631416), (47704590, 47704669),
                 (47762671, 47762742), (47893062, 47893093),
                 (47895572, 47895655), (48051942, 48051999)]

        transcript.strand = "+"
        transcript.add_exons(exons)
        transcript.id = "ENST00000560636"
        transcript.parent = "ENSG00000137872"
        transcript2 = transcript.copy()
        transcript2.id = "ENST00000560637"

        chrom_one = Chrom("1", 10**8)
        chrom_fifteen = Chrom("15", 5 * 10**8)
        session.add_all([chrom_one, chrom_fifteen])
        session.commit()
        # junction_start, junction_end, name, strand, score, chrom_id)
        # This junction is on a different chrom
        junction_chrom_one = Junction(47704669 + 1, 47762671 - 1, "chrom_one",
                                      "+", 10, chrom_one.chrom_id)
        # This junction is too far away
        outside_chrom_15 = Junction(47704669 - 10**6 + 1, 47762671 - 10**6 - 1,
                                    "chrom_15_outside", "+", 10,
                                    chrom_fifteen.chrom_id)
        # This junction is in the right place but wrong strand
        wrong_strand_chrom_15 = Junction(47704669 + 1, 47762671 - 1,
                                         "chrom_15_wrong_strand", "-", 10,
                                         chrom_fifteen.chrom_id)
        # This one is correct
        chrom_15_junction = Junction(47704669 + 1, 47762671 - 1, "chrom_15",
                                     "+", 10, chrom_fifteen.chrom_id)
        session.add_all([
            junction_chrom_one, outside_chrom_15, wrong_strand_chrom_15,
            chrom_15_junction
        ])
        session.commit()

        self.assertEqual(junction_chrom_one.chrom, "1")
        for junc in [
                outside_chrom_15, wrong_strand_chrom_15, chrom_15_junction
        ]:
            self.assertEqual(junc.chrom, "15")

        for strand, stranded in itertools.product(("+", "-", None),
                                                  (True, False)):
            transcript.unfinalize()
            transcript.strand = strand
            transcript.finalize()
            sup = Superlocus(transcript, stranded=stranded)
            self.assertTrue(
                (chrom_15_junction.junction_start, chrom_15_junction.end)
                in sup.introns, (chrom_15_junction, sup.introns))
            sup.session = session
            asyncio.run(sup._load_introns())
            if stranded is True and strand is not None:
                self.assertEqual(
                    sup.locus_verified_introns,
                    {(chrom_15_junction.junction_start,
                      chrom_15_junction.junction_end, strand)},
                    (stranded, strand))
            elif stranded is False:
                self.assertEqual(
                    sup.locus_verified_introns,
                    {(chrom_15_junction.junction_start,
                      chrom_15_junction.junction_end,
                      chrom_15_junction.strand),
                     (wrong_strand_chrom_15.junction_start,
                      wrong_strand_chrom_15.junction_end,
                      wrong_strand_chrom_15.strand)}, (stranded, strand))
            elif stranded is True and strand is None:
                self.assertEqual(sup.locus_verified_introns, set())