def test_save_fasta(self): genome_mm10 = glbase3.genome() genome_mm10.bindSequence("test_data/seq") newl = [{ "name": "A", "loc": glbase3.location(loc="chr1:100-150") }, { "name": "X", "loc": glbase3.location(loc="chrA:100-150") }] newgl = glbase3.genelist() newgl.load_list(newl) fasta = genome_mm10.getSequences(newgl) fasta.saveFASTA(filename="/tmp/test_fasta.fa", name=["loc", "name"]) with open("/tmp/test_fasta.fa") as oh: self.assertEqual(oh.readline().strip(), '>chr1:100-150_A') self.assertEqual( oh.readline().strip(), 'ATCAGACAGGTAGATCATCTCGCTCCGAGCTTGCCACCAGCAAACCATTGC') self.assertEqual(oh.readline().strip(), '>chrA:100-150_X') self.assertEqual( oh.readline().strip(), 'GTAAAAACCCGATGGAATACTCATCCAGTAAGTCCGAACCACTTCAACATC') fasta.saveFASTA(filename="/tmp/test_fasta.fa") with open("/tmp/test_fasta.fa") as oh: self.assertEqual(oh.readline().strip(), '>chr1:100-150') self.assertEqual( oh.readline().strip(), 'ATCAGACAGGTAGATCATCTCGCTCCGAGCTTGCCACCAGCAAACCATTGC') self.assertEqual(oh.readline().strip(), '>chrA:100-150') self.assertEqual( oh.readline().strip(), 'GTAAAAACCCGATGGAATACTCATCCAGTAAGTCCGAACCACTTCAACATC')
def test_get_sequences(self): genome_mm10 = glbase3.genome() genome_mm10.bindSequence("test_data/seq") seq = genome_mm10.getSequence("chr1:100-150") self.assertEqual( seq, 'ATCAGACAGGTAGATCATCTCGCTCCGAGCTTGCCACCAGCAAACCATTGC') seq = genome_mm10.getSequence("chrA:100-150") self.assertEqual( seq, 'GTAAAAACCCGATGGAATACTCATCCAGTAAGTCCGAACCACTTCAACATC')
def test_tsv_sniffer_force(self): # These are all tsv files # sniffer correctly loads locations. a = gl.genelist(filename="test_data/mm9_refGene.tsv", force_tsv=True, format=gl.format.sniffer) d = gl.genome(filename="test_data/mm9_refGene.tsv", force_tsv=True, format=gl.format.sniffer) # Microarrays and delayedlists can't be sniffed # Make sure glbase is not just bodging it all in in one key: self.assertEqual("chr1:134212701-134212701", a[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", d[0]["tss_loc"])
def test_force_tsvarg(self): form = dict(tss_loc=1, skiplines=0) # This loads tss_loc as strings form_delayed = dict(tss_loc=1, skiplines=0) # delayedlists must have skiplines a = gl.genelist(filename="test_data/mm9_refGene.tsv", force_tsv=True, format=form) c = gl.delayedlist(filename="test_data/mm9_refGene.tsv", format=form, force_tsv=True) d = gl.genome(filename="test_data/mm9_refGene.tsv", format=form_delayed, force_tsv=True) e = gl.expression(filename="test_data/mm9_refGene.tsv", format=form, force_tsv=True, expn="column[5:]") # fake array data # must go last as it modifies format # Make sure glbase is not just bodging it all in in one key: self.assertEqual("chr1:134212701-134212701", a[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", c[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", d[0]["tss_loc"]) # dls should work as __getitem__() will return the zeroth entry. self.assertEqual("chr1:134212701-134212701", e[0]["tss_loc"])
def test_force_tsv_format(self): form = dict(tss_loc=1, force_tsv=True, chr=1) form_delayed = dict(tss_loc=1, force_tsv=True, skiplines=0) # delayedlists must have skiplines a = gl.genelist(filename="test_data/mm9_refGene.tsv", format=form) c = gl.delayedlist(filename="test_data/mm9_refGene.tsv", format=form_delayed) d = gl.genome(filename="test_data/mm9_refGene.tsv", format=form) e = gl.expression(filename="test_data/mm9_refGene.tsv", format=form, expn="column[5:]") # must go last as it modifies format # Make sure glbase is not just bodging it all in in one key: self.assertEqual("chr1:134212701-134212701", a[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", c[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", d[0]["tss_loc"]) self.assertEqual("chr1:134212701-134212701", e[0]["tss_loc"])
def setUp(self): self.a = glbase3.genelist(filename="test_data/testA.csv", format=glbase3.format.sniffer) self.g = glbase3.genome(filename="test_data/test-genome.csv", format=glbase3.format.sniffer)