def test_stat(self): files = ["trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3"] files = [pkg_resources.resource_filename("Mikado.tests", filename) for filename in files] std_lines = [] with pkg_resources.resource_stream("Mikado.tests", "trinity_stats.txt") as t_stats: for line in t_stats: std_lines.append(line.decode().rstrip()) namespace = Namespace(default=False) namespace.tab_stats = None for filename in files: with self.subTest(filename=filename): namespace.gff = to_gff(filename) with open(os.path.join(tempfile.gettempdir(), "{}.txt".format(os.path.basename(filename))), "w") as out: namespace.out = out Calculator(namespace)() self.assertGreater(os.stat(out.name).st_size, 0) with open(out.name) as out_handle: lines = [_.rstrip() for _ in out_handle] self.assertEqual(std_lines, lines) os.remove(out.name)
def test_prepare_trinity_gff(self): self.conf["prepare"]["files"]["labels"].append("tr") self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir() args = Namespace() args.json_conf = self.conf for test_file in ("trinity.gff3", "trinity.match_matchpart.gff3", "trinity.cDNA_match.gff3", "trinity.gtf"): with self.subTest(test_file=test_file): self.conf["prepare"]["files"]["gff"] = [pkg_resources.resource_filename("Mikado.tests", test_file)] prepare.prepare(args, self.logger) # Now that the program has run, let's check the output fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")) res = dict((_, len(fa[_])) for _ in fa.keys()) fa.close() self.assertEqual(res, self.trinity_res) os.remove(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta.fai"))
def test_stat(self): files = [ "trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3" ] files = [ pkg_resources.resource_filename("Mikado.tests", filename) for filename in files ] std_lines = [] with pkg_resources.resource_stream("Mikado.tests", "trinity_stats.txt") as t_stats: for line in t_stats: std_lines.append(line.decode().rstrip()) namespace = Namespace(default=False) namespace.tab_stats = None for filename in files: with self.subTest(filename=filename): namespace.gff = to_gff(filename) with open( os.path.join( tempfile.gettempdir(), "{}.txt".format(os.path.basename(filename))), "w") as out: namespace.out = out Calculator(namespace)() self.assertGreater(os.stat(out.name).st_size, 0) with open(out.name) as out_handle: lines = [_.rstrip() for _ in out_handle] self.assertEqual(std_lines, lines) os.remove(out.name)
def test_prepare_trinity_gff(self): self.conf["prepare"]["files"]["labels"].append("tr") self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir() args = Namespace() args.json_conf = self.conf for test_file in ("trinity.gff3", "trinity.match_matchpart.gff3", "trinity.cDNA_match.gff3", "trinity.gtf"): with self.subTest(test_file=test_file): self.conf["prepare"]["files"]["gff"] = [ pkg_resources.resource_filename("Mikado.tests", test_file) ] prepare.prepare(args, self.logger) # Now that the program has run, let's check the output fa = pyfaidx.Fasta( os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")) res = dict((_, len(fa[_])) for _ in fa.keys()) fa.close() self.assertEqual(res, self.trinity_res) os.remove( os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta.fai"))
def test_compare_trinity(self): # Create the list of files files = [ "trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3" ] files = [ pkg_resources.resource_filename("Mikado.tests", filename) for filename in files ] namespace = Namespace(default=False) namespace.distance = 2000 namespace.no_save_index = True for ref, pred in itertools.permutations(files, 2): with self.subTest(ref=ref, pred=pred): namespace.reference = to_gff(ref) namespace.prediction = to_gff(pred) namespace.log = os.path.join( tempfile.gettempdir(), "compare_{}_{}.log".format(files.index(ref), files.index(pred))) namespace.out = os.path.join( tempfile.gettempdir(), "compare_{}_{}".format(files.index(ref), files.index(pred))) compare(namespace) refmap = "{}.refmap".format(namespace.out) tmap = "{}.tmap".format(namespace.out) stats = "{}.stats".format(namespace.out) self.assertTrue(os.path.exists(namespace.log)) # with open(log) as log_handle: # log = [_.rstrip() for _ in log_handle] for fname in [refmap, stats, tmap]: self.assertTrue(os.path.exists(fname)) self.assertGreater(os.stat(fname).st_size, 0) with open(refmap) as _: reader = csv.DictReader(_, delimiter="\t") counter = 0 for counter, line in enumerate(reader, start=1): ccode = line["ccode"] self.assertIn(ccode, ("_", "=", "f,_", "f,="), (ref, pred, line)) self.assertEqual(counter, 38) for suff in ["log", "refmap", "tmap", "stats"]: [ os.remove(_) for _ in glob.glob( os.path.join(tempfile.gettempdir(), "compare_*.{}".format( suff))) ]
def test_prepare_trinity_and_cufflinks(self): self.conf["prepare"]["files"]["labels"] = ["cl", "tr"] self.conf["prepare"]["files"]["gff"].append( pkg_resources.resource_filename("Mikado.tests", "cufflinks.gtf")) self.conf["prepare"]["files"]["gff"].append("") self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir() self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta" self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf" args = Namespace() args.json_conf = self.conf for test_file in ("trinity.gff3", "trinity.match_matchpart.gff3", "trinity.cDNA_match.gff3", "trinity.gtf"): with self.subTest(test_file=test_file): self.conf["prepare"]["files"]["gff"][ 1] = pkg_resources.resource_filename( "Mikado.tests", test_file) self.conf["prepare"]["files"][ "out_fasta"] = "mikado_prepared.fasta" self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf" prepare.prepare(args, self.logger) # Now that the program has run, let's check the output self.assertTrue( os.path.exists( os.path.join( self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta"))) self.assertGreater( os.stat( os.path.join( self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")).st_size, 0) fa = pyfaidx.Fasta( os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")) res = dict((_, len(fa[_])) for _ in fa.keys()) fa.close() precal = self.trinity_res.copy() precal.update(self.cuff_results) self.assertEqual(res, precal) os.remove( os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta.fai"))
def test_mikado_config(self): namespace = Namespace(default=False) namespace.scoring = None namespace.intron_range = None namespace.reference = "" namespace.external = None namespace.mode = ["permissive"] namespace.threads = 1 namespace.blast_targets = [] namespace.junctions = [] out = os.path.join(tempfile.gettempdir(), "configuration.yaml") with open(out, "w") as out_handle: namespace.out = out_handle Mikado.subprograms.configure.create_config(namespace) self.assertGreater(os.stat(out).st_size, 0) conf = Mikado.configuration.configurator.to_json(out) conf = Mikado.configuration.configurator.check_json(conf) conf = Mikado.configuration.configurator.check_json(conf) os.remove(out)
def test_index(self): # Create the list of files files = ["trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3"] # files = [pkg_resources.resource_filename("Mikado.tests", filename) for filename in files] namespace = Namespace(default=False) namespace.distance = 2000 namespace.index = True namespace.prediction = None namespace.log = os.path.join(tempfile.gettempdir(), "index.log") logger = create_null_logger("null") for ref in files: with self.subTest(ref=ref): temp_ref = os.path.join(tempfile.gettempdir(), ref) with pkg_resources.resource_stream("Mikado.tests", ref) as ref_handle,\ open(temp_ref, "wb") as out_handle: out_handle.write(ref_handle.read()) namespace.reference = to_gff(temp_ref) compare(namespace) self.assertTrue(os.path.exists(namespace.log)) self.assertTrue(os.path.exists("{}.midx".format(namespace.reference.name))) self.assertGreater(os.stat("{}.midx".format(namespace.reference.name)).st_size, 0) genes, positions = load_index(namespace, logger) self.assertIsInstance(genes, dict) self.assertIsInstance(positions, dict) self.assertEqual(len(genes), 38) os.remove(namespace.reference.name) os.remove(namespace.log) os.remove("{}.midx".format(namespace.reference.name))
def test_prepare_trinity_and_cufflinks(self): self.conf["prepare"]["files"]["labels"] = ["cl", "tr"] self.conf["prepare"]["files"]["gff"].append(pkg_resources.resource_filename("Mikado.tests", "cufflinks.gtf")) self.conf["prepare"]["files"]["gff"].append("") self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir() self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta" self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf" args = Namespace() args.json_conf = self.conf for test_file in ("trinity.gff3", "trinity.match_matchpart.gff3", "trinity.cDNA_match.gff3", "trinity.gtf"): with self.subTest(test_file=test_file): self.conf["prepare"]["files"]["gff"][1] = pkg_resources.resource_filename("Mikado.tests", test_file) self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta" self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf" prepare.prepare(args, self.logger) # Now that the program has run, let's check the output self.assertTrue(os.path.exists(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta"))) self.assertGreater(os.stat(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")).st_size, 0) fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")) res = dict((_, len(fa[_])) for _ in fa.keys()) fa.close() precal = self.trinity_res.copy() precal.update(self.cuff_results) self.assertEqual(res, precal) os.remove(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta.fai"))
def test_compare_trinity(self): # Create the list of files files = ["trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3"] files = [pkg_resources.resource_filename("Mikado.tests", filename) for filename in files] namespace = Namespace(default=False) namespace.distance = 2000 namespace.no_save_index = True for ref, pred in itertools.permutations(files, 2): with self.subTest(ref=ref, pred=pred): namespace.reference = to_gff(ref) namespace.prediction = to_gff(pred) namespace.log = os.path.join(tempfile.gettempdir(), "compare_{}_{}.log".format( files.index(ref), files.index(pred))) namespace.out = os.path.join(tempfile.gettempdir(), "compare_{}_{}".format( files.index(ref), files.index(pred))) compare(namespace) refmap = "{}.refmap".format(namespace.out) tmap = "{}.tmap".format(namespace.out) stats = "{}.stats".format(namespace.out) self.assertTrue(os.path.exists(namespace.log)) # with open(log) as log_handle: # log = [_.rstrip() for _ in log_handle] for fname in [refmap, stats, tmap]: self.assertTrue(os.path.exists(fname)) self.assertGreater(os.stat(fname).st_size, 0) with open(refmap) as _: reader = csv.DictReader(_, delimiter="\t") counter = 0 for counter, line in enumerate(reader, start=1): ccode = line["ccode"] self.assertIn(ccode, ("_", "=", "f,_", "f,="), (ref, pred, line)) self.assertEqual(counter, 38) for suff in ["log", "refmap", "tmap", "stats"]: [os.remove(_) for _ in glob.glob(os.path.join(tempfile.gettempdir(), "compare_*.{}".format(suff)))]
def test_index(self): # Create the list of files files = [ "trinity.gtf", "trinity.gff3", "trinity.cDNA_match.gff3", "trinity.match_matchpart.gff3" ] # files = [pkg_resources.resource_filename("Mikado.tests", filename) for filename in files] namespace = Namespace(default=False) namespace.distance = 2000 namespace.index = True namespace.prediction = None namespace.log = os.path.join(tempfile.gettempdir(), "index.log") logger = create_null_logger("null") for ref in files: with self.subTest(ref=ref): temp_ref = os.path.join(tempfile.gettempdir(), ref) with pkg_resources.resource_stream("Mikado.tests", ref) as ref_handle,\ open(temp_ref, "wb") as out_handle: out_handle.write(ref_handle.read()) namespace.reference = to_gff(temp_ref) compare(namespace) self.assertTrue(os.path.exists(namespace.log)) self.assertTrue( os.path.exists("{}.midx".format(namespace.reference.name))) self.assertGreater( os.stat("{}.midx".format( namespace.reference.name)).st_size, 0) genes, positions = load_index(namespace, logger) self.assertIsInstance(genes, dict) self.assertIsInstance(positions, dict) self.assertEqual(len(genes), 38) os.remove(namespace.reference.name) os.remove(namespace.log) os.remove("{}.midx".format(namespace.reference.name))
def test_daijin_config(self): # Check the basic function actually functions _ = daijin_configurator.create_daijin_base_config() namespace = Namespace(default=False) namespace.r1 = [] namespace.r2 = [] namespace.samples = [] namespace.strandedness = [] namespace.asm_methods = [] namespace.aligners = [] namespace.modes = ["nosplit"] namespace.cluster_config = None namespace.scheduler = "" namespace.flank = None namespace.intron_range = None namespace.prot_db = [] namespace.genome = self.__genomefile__.name namespace.transcriptome = "" namespace.name = "Daijin" namespace.out_dir = tempfile.gettempdir() namespace.threads = 1 namespace.scoring = random.choice( pkg_resources.resource_listdir("Mikado.configuration", "scoring_files")) out = os.path.join(tempfile.gettempdir(), "configuration.yaml") with open(out, "wt") as out_handle: namespace.out = out_handle daijin_configurator.create_daijin_config(namespace, level="ERROR") self.assertGreater(os.stat(out).st_size, 0) with open(out) as out_handle: config = yaml.load(out_handle) daijin_configurator.check_config(config)