Example #1
0
 def test_index_various(self):
     logger = create_default_logger("test_index_gff3")
     for fname in ("trinity.gff3", "trinity.gtf", "trinity.bed12",
                   "trinity.cDNA_match.gff3",
                   "trinity.match_matchpart.gff3"):
         with self.subTest(
                 fname=fname), tempfile.TemporaryDirectory() as out_folder:
             shutil.copy(
                 pkg_resources.resource_filename("Mikado.tests", fname),
                 out_folder)
             fhandle = os.path.join(out_folder, fname)
             index_name = "{}.midx".format(fhandle)
             indexing.create_index(parser_factory(fhandle),
                                   logger,
                                   index_name,
                                   ref_gff=(fname.endswith("gff3")))
             self.assertTrue(os.path.exists(index_name))
             self.assertGreater(os.stat(index_name).st_size, 0)
             # Now check the index has been created correctly
             indexing.check_index(index_name, logger)
             namespace = Namespace(default=False)
             namespace.reference = parser_factory(fhandle)
             _ = indexing.load_index(namespace, logger)
             # Now rebuild
             with self.assertLogs(logger, level="INFO") as cmo:
                 indexing.create_index(parser_factory(fhandle),
                                       logger,
                                       index_name,
                                       ref_gff=(fname.endswith("gff3")))
             self.assertTrue(
                 any([
                     re.search(r"Removing the old index", _.msg)
                     for _ in cmo.records
                 ]), cmo.records)
Example #2
0
    def test_asn(self):
        # Currently DISABLED because the ASN specifications requires the database to be where indicated by the
        # relative path within the ASN. So for the time being this test is *not active*.

        asns = [
            os.path.join("blast", "asn", "blast.asn.gz"),
            os.path.join("blast_parse_seqids", "asn", "blast.asn.gz")
        ]

        for folder in ["sanitised", "uniprot"]:
            conf = MikadoConfiguration()
            targets = os.path.join(self.master, folder, "uniprot.fasta")
            self.assertTrue(os.path.exists(targets))
            for asn in asns:
                asn = os.path.join(self.master, folder, asn)
                self.assertTrue(os.path.exists(asn))
                with tempfile.TemporaryDirectory() as out_folder:
                    conf.serialise.files.output_dir = out_folder
                    conf.serialise.files.blast_targets = [targets]
                    conf.serialise.files.transcripts = self.queries
                    conf.serialise.files.xml = [asn]
                    logger = create_default_logger(f"test_asn_{folder}")
                    out_db = os.path.join(out_folder, conf.db_settings.db)
                    conf.db_settings.db = out_db
                    self.run_loading(asn, out_db, logger, conf)
Example #3
0
 def test_not_existent(self):
     logger = create_default_logger("test_not_existent")
     with tempfile.NamedTemporaryFile() as foo, self.assertRaises(
             CorruptIndex):
         indexing.check_index(foo.name, logger)
     with tempfile.NamedTemporaryFile() as foo, self.assertRaises(
             CorruptIndex):
         namespace = Namespace(default=False)
         namespace.reference = open(foo.name)
         indexing.load_index(namespace, logger)
Example #4
0
    def test_fusion(self):

        t = Transcript()
        t.chrom, t.strand, t.start, t.end, t.id, t.parent = "Chr1", "+", 101, 1000, "foo.1", "foo"
        t.add_exons([(101, 500), (601, 800), (901, 1000)])
        t.finalize()
        t2 = Transcript()
        t2.chrom, t2.strand, t2.start, t2.end, t2.id, t2.parent = "Chr1", "+", 2001, 3000, "bar.1", "bar"
        t2.add_exons([(2001, 2500), (2601, 2800), (2901, 3000)])
        t2.finalize()

        t3 = Transcript()
        t3.chrom, t3.strand, t3.start, t3.end, t3.id, t3.parent = "Chr1", "+", 651, 2703, "faz.1", "faz"
        t3.add_exons([(651, 800), (901, 1300), (2230, 2500), (2601, 2703)])
        t3.finalize()

        logger = create_default_logger("test_fusion")
        with tempfile.TemporaryDirectory() as folder:
            with open(os.path.join(folder, "reference.gtf"),
                      "wt") as reference:
                print(t.format("gtf"), file=reference)
                print(t2.format("gtf"), file=reference)
            self.assertTrue(os.path.exists(reference.name))
            _ = [_ for _ in parser_factory(reference.name)]
            try:
                indexing.create_index(parser_factory(reference.name), logger,
                                      "{}.midx".format(reference.name))
            except InvalidParsingFormat:
                self.assertFalse(
                    True,
                    "\n".join([line.rstrip()
                               for line in open(reference.name)]))
            namespace = Namespace(default=False)
            namespace.out = os.path.join(folder, "out")
            for report in (False, True):
                with self.subTest(report=report):
                    namespace.report_fusions = report
                    assigner = Assigner("{}.midx".format(reference.name),
                                        args=namespace,
                                        printout_tmap=False)
                    result = assigner.get_best(t3)
                    if report:
                        self.assertTrue(len(result), 2)
                        self.assertTrue(result[0].ccode == ("f", "j"),
                                        str(result[0]))
                        self.assertTrue(result[1].ccode == ("f", "j"),
                                        str(result[1]))
                    else:
                        self.assertTrue(result.ccode == ("j", ), str(result))
Example #5
0
 def test_daa(self):
     daa_base = os.path.join("diamond", "daa", "blast.daa")
     for folder in ["sanitised", "uniprot"]:
         targets = os.path.join(self.master, folder, "uniprot.fasta")
         self.assertTrue(os.path.exists(targets))
         daa = os.path.join(self.master, folder, daa_base)
         self.assertTrue(os.path.exists(daa))
         with tempfile.TemporaryDirectory() as out_folder:
             conf = MikadoConfiguration()
             out_db = os.path.join(out_folder, conf.db_settings.db)
             conf.db_settings.db = out_db
             conf.serialise.files.output_dir = out_folder
             conf.serialise.files.blast_targets = [targets]
             conf.serialise.files.transcripts = self.queries
             conf.serialise.files.xml = [daa]
             logger = create_default_logger(f"test_daa_{folder}")
             self.assertTrue(tempfile.gettempdir() in out_db)
             self.run_loading(daa, out_db, logger, conf)
Example #6
0
 def test_xml(self):
     xml_base = os.path.join("xml", "blast.xml.gz")
     for folder in ["sanitised", "uniprot"]:
         for subfolder in ["blast", "blast_parse_seqids", "diamond"]:
             targets = os.path.join(self.master, folder, "uniprot.fasta")
             self.assertTrue(os.path.exists(targets))
             xml = os.path.join(self.master, folder, subfolder, xml_base)
             self.assertTrue(os.path.exists(xml))
             with tempfile.TemporaryDirectory() as out_folder:
                 conf = MikadoConfiguration()
                 conf.serialise.files.output_dir = out_folder
                 out_db = os.path.join(out_folder, conf.db_settings.db)
                 conf.db_settings.db = out_db
                 conf.serialise.files.blast_targets = [targets]
                 conf.serialise.files.transcripts = self.queries
                 conf.serialise.files.xml = [xml]
                 logger = create_default_logger(
                     f"test_xml_{folder}_{subfolder}")
                 self.assertTrue(tempfile.gettempdir() in out_db)
                 self.run_loading(xml, out_db, logger, conf)
Example #7
0
 def test_tsv(self):
     tsv_base = os.path.join("tsv", "blast.tsv.gz")
     for folder in ["uniprot", "sanitised"]:
         for subfolder in ["blast", "blast_parse_seqids", "diamond"]:
             targets = os.path.join(self.master, folder, "uniprot.fasta")
             self.assertTrue(os.path.exists(targets))
             tsv = os.path.join(self.master, folder, subfolder, tsv_base)
             self.assertTrue(os.path.exists(tsv))
             with tempfile.TemporaryDirectory() as out_folder:
                 conf = MikadoConfiguration()
                 conf.serialise.files.output_dir = out_folder
                 conf.serialise.files.blast_targets = [targets]
                 conf.serialise.files.transcripts = self.queries
                 conf.serialise.files.xml = [tsv]
                 out_db = os.path.join(out_folder, conf.db_settings.db)
                 conf.db_settings.db = out_db
                 self.assertTrue(tempfile.gettempdir() in out_db)
                 logger = create_default_logger(
                     f"test_tsv_{folder}_{subfolder}", level="DEBUG")
                 self.run_loading(tsv, out_db, logger, conf)
Example #8
0
    def test_subprocess_multi_empty_orfs(self):
        print("Started light test")
        self.fai = pysam.FastaFile(
            pkg_resources.resource_filename("Mikado.tests", "chr5.fas.gz"))
        self.configuration = configurator.load_and_validate_config(None)
        self.configuration.reference.genome = self.fai.filename.decode()

        xml = pkg_resources.resource_filename("Mikado.tests",
                                              "chunk-001-proteins.xml.gz")
        transcripts = pkg_resources.resource_filename("Mikado.tests",
                                                      "mikado_prepared.fasta")
        junctions = pkg_resources.resource_filename("Mikado.tests",
                                                    "junctions.bed")
        tmp_orf = tempfile.NamedTemporaryFile(suffix=".bed12")
        tmp_orf.write(b"#track\n")
        tmp_orf.write(
            b"cufflinks_star_at.23553.1\t0\t1733\tID=1_1;partial=01;start_type=ATG\t0\t+\t312\t1733\t0,0,0\t1\t1733\t0\n"
        )
        tmp_orf.flush()
        uniprot = pkg_resources.resource_filename(
            "Mikado.tests", "uniprot_sprot_plants.fasta.gz")
        mobjects = 300  # Let's test properly the serialisation for BLAST

        # Set up the command arguments
        with tempfile.TemporaryDirectory(prefix="has_to_fail") as folder_one, \
                tempfile.TemporaryDirectory(prefix="has_to_fail") as folder_two:
            for procs, folder in [(3, folder_one), (1, folder_two)]:
                self.configuration = configurator.load_and_validate_config(
                    None)
                self.configuration.reference.genome = self.fai.filename.decode(
                )
                json_file = os.path.join(folder, "mikado.yaml")
                db = os.path.join(folder, "mikado.db")
                log = "failed_serialise.log"
                uni_out = os.path.join(folder, "uniprot_sprot_plants.fasta")
                self.configuration.serialise.files.log = os.path.basename(log)
                self.configuration.multiprocessing_method = "fork"
                with gzip.open(uniprot,
                               "rb") as uni, open(uni_out,
                                                  "wb") as uni_out_handle:
                    uni_out_handle.write(uni.read())
                self.configuration.serialise.files.transcripts = transcripts
                self.configuration.serialise.files.blast_targets = uni_out
                self.configuration.serialise.files.log = log
                self.configuration.serialise.files.output_dir = folder
                self.configuration.serialise.force = True
                self.configuration.serialise.files.orfs = [tmp_orf.name]
                self.configuration.serialise.files.junctions = [junctions]
                self.configuration.serialise.files.xml = [xml]
                self.configuration.threads = procs
                self.configuration.serialise.max_objects = mobjects
                self.configuration.db_settings.db = db
                self.configuration.seed = 1078

                self.assertFalse(os.path.exists(db))
                logger = create_default_logger(f"test_light_serialise_{procs}",
                                               level="INFO")

                with self.assertRaises(InvalidSerialization), self.assertLogs(
                        logger.name) as cmo:
                    load_orfs(self.configuration, logger)

                self.assertTrue(
                    any("Mikado serialise failed due to problems with the input data. Please check the logs."
                        in line for line in cmo.output), cmo.output)
                self.assertTrue(
                    any("The provided ORFs do not match the transcripts provided and "
                        "already present in the database." in line
                        for line in cmo.output), print("\n".join(cmo.output)))
        print("Finished light test")