Exemple #1
0
class _ScreedSeqInfoDict_ByName(object, UserDict.DictMixin):

    """seqInfoDict implementation that uses names to retrieve records."""

    def __init__(self, filepath):
        self.sdb = ScreedDB(filepath)

    def __getitem__(self, k):
        v = self.sdb[k]
        return _ScreedSequenceInfo(k, v)

    def keys(self):
        return self.sdb.keys()

    def itervalues(self):
        i = 0
        max_index = len(self.sdb)
        while i < max_index:
            v = self.sdb.loadRecordByIndex(i)
            yield _ScreedSequenceInfo(v.name, v)
            i += 1

    def iteritems(self):
        for v in self.itervalues():
            yield v.record.name, v
	def __init__(self):
		# the 12 is the size of K which can be set here:
		#self.ktable=khmer.new_ktable(12)
		self.ktable=khmer.new_hashbits(self.theK,1e9,4)
		#specify the files you want to load, they have to be screed files
		names=('chr01.fsa','chr02.fsa','chr03.fsa','chr04.fsa','chr05.fsa','chr06.fsa','chr07.fsa',
		'chr08.fsa','chr09.fsa','chr10.fsa','chr11.fsa','chr12.fsa','chr13.fsa','chr14.fsa','chr15.fsa','chr16.fsa')

		for name in names:
			self.fadb=ScreedDB(name)
			print name
			keys=self.fadb.keys()
			for key in keys:
				s=self.fadb[key]['sequence']
				self.ktable.consume(str(s))
		print "done consuming"
Exemple #3
0
class _ScreedSeqInfoDict_ByIndex(object, UserDict.DictMixin):
    """seqInfoDict implementation that uses indices to retrieve records."""
    def __init__(self, filepath):
        self.sdb = ScreedDB(filepath)

    def __getitem__(self, k):
        n = int(k) 
        v = self.sdb.loadRecordByIndex(n)
        return _ScreedSequenceInfo(k, v)

    def keys(self):
        return xrange(0, len(self.sdb))

    def iterkeys(self):
        i = 0
        max_index = len(self.sdb)
        while i < max_index:
            yield i
            i += 1
Exemple #4
0
class Test_tri_fasta:
    """
    Test screed methods on the tri fasta file
    """
    def setup(self):
        self.db = ScreedDB(tri + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['singleUn_100'] = {
            'id': 0,
            'description': '',
            'name' : 'singleUn_100',
            'sequence': 'TTTAAACACGTGTCCGCGCCATTTTTTTATTTATTTACCGATCAAGTGCA'}
        testcases['singleUn_9'] = {
            'id': 2210,
            'description': '',
            'name': 'singleUn_9',
            'sequence': 'TTTAATTTTTTTACAACTCAAAATTTTGAGTAGTGTTTTAAATAGTACAC'}
        testcases['ChLG6'] = {
            'id': 2016,
            'description' : '',
            'name': 'ChLG6',
            'sequence': 'CAAAAAAATTCATAACTCAAAAACTAAAAGTCGTAGAGCAATGCGGTTTG'}
        testcases['singleUn_286'] = {
            'id': 186,
            'description': '',
            'name': 'singleUn_286',
            'sequence': 'AAACTAAAACATCCTTTTCAGCATATTATTTGTTATATTTAAAAAAAAAC'}
        testcases['ChLG9'] = {
            'id': 2019,
            'description': '',
            'name': 'ChLG9',
            'sequence': 'CTGCCGATAATATTTCCTACCAGAAATAACCAATTTATTTTACGTATTAC'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #5
0
class Test_s22_fastq:
    """
    Test screed methods on the s22 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests22 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'] = {
            'id': 0,
            'annotations': '',
            'quality': 'AA7AAA3+AAAAAA.AAA.;7;AA;;;;*;<1;<<<',
            'name' : 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2',
            'sequence': 'ACAGCAAAATTGTGATTGAGGATGAAGAACTGCTGT'}

        testcases['HWI-EAS_4_PE-FC20GCB:2:162:131:826/2'] = {
            'id': 1895228,
            'annotations': '',
            'quality': 'AAAAAAAAAAAAAAAAAAAAAA+@6=7A<05<*15:',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:162:131:826/2',
            'sequence': 'ATGAATACAAACAATGCGGCAGTCATAATGCCCCTC'}

        testcases['HWI-EAS_4_PE-FC20GCB:2:330:88:628/2'] = {
            'id': 3790455,
            'annotations': '',
            'quality' : 'AA;AA??A5A;;+AA?AAAA;AA;9AA.AA?????9',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:330:88:628/2',
            'sequence': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA'}

        testcases['HWI-EAS_4_PE-FC20GCB:2:4:707:391/2'] = {
            'id': 29999,
            'annotations': '',
            'quality': 'AAAAAAAAAA@<)A*AAA6A::<@AA<>A>-8?>4<',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:4:707:391/2',
            'sequence': 'ATTAATCTCCAGTTTCTGGCAAACATTCAGGCCATT'}

        testcases['HWI-EAS_4_PE-FC20GCB:2:36:158:208/2'] = {
            'id': 342842,
            'annotations': '',
            'quality': 'AA5?AAAAA?AAAA5?AAA5A???5A>AAA4?;.;;',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:36:158:208/2',
            'sequence': 'TTTCCCTACAGAAGTGTCTGTACCGGTAATAAAGAA'}

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #6
0
class Test_mus_fasta:
    """
    Test screed methods on the mus_musculus fasta file
    """
    def setup(self):
        self.db = ScreedDB(mus + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['9'] = {
		'id': 0,
            'description': 'dna_rm:chromosome chromosome:NCBIM37:9:1:124076' \
            '172:1',
            'name' : '9',
            'sequence': 'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN' \
            'NNNNNNNNNN'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #7
0
class Test_po_fasta:
    """
    Test screed methods on the pongo fasta file
    """
    def setup(self):
        self.db = ScreedDB(pongo + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['GENSCAN00000032971'] = {
            'id': 0,
            'description': 'cdna:Genscan chromosome:PPYG2:6_qbl_hap2_random' \
            ':95622:98297:1',
            'name' : 'GENSCAN00000032971',
            'sequence': 'ATGGCGCCCCGAACCCTCCTCCTGCTGCTCTCGGCGGCCCTGGCCCCGAC' \
            'CGAGACCTGG'}
        testcases['GENSCAN00000042282'] = {
            'id': 53997,
            'description': 'cdna:Genscan chromosome:PPYG2:1:229892060:22989' \
            '2800:1',
            'name': 'GENSCAN00000042282',
            'sequence': 'ATGATGCCATTGCAAGGACCCTCTGCAGGGCCTCAGTCCCGAGGATGGCA' \
            'CACAGCCTTC'}
        testcases['GENSCAN00000051311'] = {
            'id': 30780,
            'description' : 'cdna:Genscan chromosome:PPYG2:10:132962172:132' \
            '962871:1',
            'name': 'GENSCAN00000051311',
            'sequence': 'ATGACCCAGCCACCTACCAGGCCGCTCTGCAGACCCCCCACGGGAGCAGC' \
            'CTCTGCCCCC'}
        testcases['GENSCAN00000006030'] = {
            'id': 1469,
            'description': 'cdna:Genscan chromosome:PPYG2:14_random:1765749' \
            ':1766075:-1',
            'name': 'GENSCAN00000006030',
            'sequence': 'ATGTGTGGCAACAAGGGCATTTCTGCCTTCCCTGAATCAGACCACCTTTT' \
            'CACATGGGTA'}
        testcases['GENSCAN00000048263'] = {
            'id': 43029,
            'description': 'cdna:Genscan chromosome:PPYG2:6:100388173:10048' \
            '5454:-1',
            'name': 'GENSCAN00000048263',
            'sequence': 'ATGTGTCCCTTTGAATATGCCGGAGAACAACAGTTGCCATGGATGTGTTC' \
            'TGGGGAGCCC'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #8
0
class Test_s31_fastq:
    """
    Test screed methods on the s31 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests31 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:3:1:71:840/1'] = {
            'id': 0,
            'annotations': '',
            'quality': 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC',
            'name' : 'HWI-EAS_4_PE-FC20GCB:3:1:71:840/1',
            'sequence': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'}

        testcases['HWI-EAS_4_PE-FC20GCB:3:330:957:433/1'] = {
            'id': 4439695,
            'annotations': '',
            'quality': 'AAAAAAAAAAA<A?<AA<AAAAA?AAA?<:*??&::',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:330:957:433/1',
            'sequence': 'CTTTGTGGAGAAGAGGGCGTGGGCAAGGCACTGATA'}

        testcases['HWI-EAS_4_PE-FC20GCB:3:166:443:410/1'] = {
            'id': 2219847,
            'annotations': '',
            'quality' : 'AAAAAAAAAAAAAAAAAAAAAAAA6<@AA959???%',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:166:443:410/1',
            'sequence': 'TGGCATTCGCACACATCATGATGGTGCTGACCGTAA'}

        testcases['HWI-EAS_4_PE-FC20GCB:3:1:803:878/1'] = {
            'id': 2999,
            'annotations': '',
            'quality': '?6AAA6A<A6AA<<AA?A&A066/6:/&?&1191+0',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:1:803:878/1',
            'sequence': 'AAGATGCTGTAGTGGCCGCATGTGTAATAGGCTTTA'}

        testcases['HWI-EAS_4_PE-FC20GCB:3:245:54:506/1'] = {
            'id': 3329772,
            'annotations': '',
            'quality': "AAAAAAAAAAAAAAAA>A+AAA+@AA+A>A%8*?'%",
            'name': 'HWI-EAS_4_PE-FC20GCB:3:245:54:506/1',
            'sequence': 'CTTCGTTGCTGTTTATCAGTAACTTTTTCTGGCTAG'}

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #9
0
class Test_sorex_fasta:
    """
    Test screed methods on the sorex fasta file
    """
    def setup(self):
        self.db = ScreedDB(sorex + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['scaffold_93039'] = {
            'id': 0,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_93'\
            '039:1:203:1',
            'name': 'scaffold_93039',
            'sequence': 'GCTGAGCCTTGTAGTTCTGCTCCCTTTGACTGACGGCCCACTATGGACCG'\
            'GAAAAACTAC'}
        
        testcases['scaffold_107701'] = {
            'id': 1,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_10'\
            '7701:1:203:1',
            'name' : 'scaffold_107701',
            'sequence': 'TAAACCCAAAATAAACATTCCCCAAATTATATTTCTTCCTTTCCTTCTGA'\
            'ATAAAAGAAA'}
        
        testcases['GeneScaffold_6994'] = {
            'id': 243135,
            'description': 'dna:genescaffold genescaffold:COMMON_SHREW1:Gen'\
            'eScaffold_6994:1:2349312:1',
            'name': 'GeneScaffold_6994',
            'sequence': 'TATTGAGAGAAGTGGGAACTTCTCTAGTGGTGGGGTATGGTGATGGAATG'\
            'ATGTATGAAT'}
        
        testcases['scaffold_118324'] = {
            'id': 13823,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_11'\
            '8324:1:884:1',
            'name': 'scaffold_118324',
            'sequence': 'CAGCCCCCTGCAACAAATTTTATACTCTAGAAACAGTTTAATGGCTGTTG'\
            'GAATATTTCC'}
        
        testcases['scaffold_92895'] = {
            'id': 14573,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_92'\
            '895:1:890:1',
            'name': 'scaffold_92895',
            'sequence': 'GGGAAGCTTGCAAGGCTGTCCCATGTGGGCAGGAAGCTCTCAGTAGCTTG'\
            'CCAGTTTCTC'}
        
        testcases['scaffold_62271'] = {
            'id': 37101,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_62'\
            '271:1:1064:1',
            'name': 'scaffold_62271',
            'sequence': 'AGAGTATCTCCCCCACATGGCAGAGCCTGGCAAGCTACCCATGGCGTATT'\
            'CAATATGCCA'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == \
                   self.db[case]['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #10
0
 def setup(self):
     self.db = ScreedDB(xeno + '_screed')
Exemple #11
0
class Test_xeno_fasta:
    """
    Test screed methods on the xeno fasta file
    """
    def setup(self):
        self.db = ScreedDB(xeno + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['scaffold_20095'] = {
            'id': 0,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_20095:1:2'\
            '001:1',
            'name' : 'scaffold_20095',
            'sequence': 'GATGAGATCACCTTTCATGCTTTTTGTATCCCTATTATCTAGAGACAACAA'\
            'ATCAGTTGC'}
        testcases['scaffold_1'] = {
            'id': 19500,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_1:1:781781'\
            '4:1',
            'name': 'scaffold_1',
            'sequence': 'CCTCCCTTTTTGGCTGTCTTTTCACTGTATCATAGCCTGGCGTGAACCAAG'\
            'CCTCAAAAA'}
        testcases['scaffold_271'] = {
            'id': 19230,
            'description' : 'dna:scaffold scaffold:JGI4.1:scaffold_271:1:156'\
            '7461:1',
            'name': 'scaffold_271',
            'sequence': 'CGATTTTTGCGGAAAAACGCGAGTTTTTGGTAGCCATTCCGAAAGTTGCGA'\
            'TTTTTTGTA'}
        testcases['scaffold_19901'] = {
            'id': 329,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_19901:1:22'\
            '56:1',
            'name': 'scaffold_19901',
            'sequence': 'ATACCGCAAAGGTTTCTTTCTTCTCAGTGCTCCATGCTGCCTCTCTTGTTT'\
            'TGCCTCCCT'}
        testcases['scaffold_95'] = {
            'id': 19408,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_95:1:28996'\
            '70:1',
            'name': 'scaffold_95',
            'sequence': 'CCCTCCTGGTGATCCCACTTCAATCTCCCCATAGGCACACATCACTTCTAG'\
            'CAGTTCACA'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #12
0
 def setup(self):
     self.db = ScreedDB(tri + '_screed')
Exemple #13
0
class Test_tri_fasta:
    """
    Test screed methods on the tri fasta file
    """
    def setup(self):
        self.db = ScreedDB(tri + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['singleUn_100'] = {
            'id': 0,
            'description': '',
            'name': 'singleUn_100',
            'sequence': 'TTTAAACACGTGTCCGCGCCATTTTTTTATTTATTTACCGATCAAGTGCA'
        }
        testcases['singleUn_9'] = {
            'id': 2210,
            'description': '',
            'name': 'singleUn_9',
            'sequence': 'TTTAATTTTTTTACAACTCAAAATTTTGAGTAGTGTTTTAAATAGTACAC'
        }
        testcases['ChLG6'] = {
            'id': 2016,
            'description': '',
            'name': 'ChLG6',
            'sequence': 'CAAAAAAATTCATAACTCAAAAACTAAAAGTCGTAGAGCAATGCGGTTTG'
        }
        testcases['singleUn_286'] = {
            'id': 186,
            'description': '',
            'name': 'singleUn_286',
            'sequence': 'AAACTAAAACATCCTTTTCAGCATATTATTTGTTATATTTAAAAAAAAAC'
        }
        testcases['ChLG9'] = {
            'id': 2019,
            'description': '',
            'name': 'ChLG9',
            'sequence': 'CTGCCGATAATATTTCCTACCAGAAATAACCAATTTATTTTACGTATTAC'
        }

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
class myFirstUI(Directory):
	_q_exports = ['','firstkMer','kmerNeighborhood']
	theK=17
	
	def __init__(self):
		# the 12 is the size of K which can be set here:
		#self.ktable=khmer.new_ktable(12)
		self.ktable=khmer.new_hashbits(self.theK,1e9,4)
		#specify the files you want to load, they have to be screed files
		names=('chr01.fsa','chr02.fsa','chr03.fsa','chr04.fsa','chr05.fsa','chr06.fsa','chr07.fsa',
		'chr08.fsa','chr09.fsa','chr10.fsa','chr11.fsa','chr12.fsa','chr13.fsa','chr14.fsa','chr15.fsa','chr16.fsa')

		for name in names:
			self.fadb=ScreedDB(name)
			print name
			keys=self.fadb.keys()
			for key in keys:
				s=self.fadb[key]['sequence']
				self.ktable.consume(str(s))
		print "done consuming"
		
	def _q_index(self):
		return "kmer browser database"

	def firstkMer(self):
		i=0
		while self.ktable.get(i)==0:
			i+=1
		return self.ktable.reverse_hash(i)

	def addAllKmers(self,currentKmer,depth,maxDepth):
		if depth<maxDepth:
			L=['A','C','G','T']
			rawStringLead=currentKmer[0:(self.theK-1)]
			rawStringTrail=currentKmer[1:self.theK]
			for l in L:
				s=rawStringTrail+l
				if self.ktable.get(s)!=0: 
					self.lines[currentKmer+'	'+s]=1
					if not s in self.liste:
						self.liste[s]=depth+1
						self.addAllKmers(s,depth+1,maxDepth)
				s=l+rawStringLead
				if self.ktable.get(s)!=0:
					self.lines[currentKmer+'	'+s]=1
					if not s in self.liste:
						self.liste[s]=depth+1
						self.addAllKmers(s,depth+1,maxDepth)
			
	def kmerNeighborhood(self):
		request=quixote.get_request()
		form=request.form
		n=int(form['n'])
		print n
		self.liste=dict()
		self.lines=dict()
		self.liste.clear()
		self.lines.clear()
		self.liste[str(form['kmer'])]=0
		self.addAllKmers(str(form['kmer']),0,n)
		S=str(len(self.liste))+'\n'
		for l in self.liste.keys():
			S=S+l+'	'+str(self.liste[l])+'\n'
		for l in self.lines.keys():
			S=S+l+'\n'
		return S
	
	def interface(self):
		myVariable=templatesdir
		template = env.get_template('kMerBrowserInterface.html')
		return template.render(locals())
Exemple #15
0
class Test_xeno_fasta:
    """
    Test screed methods on the xeno fasta file
    """
    def setup(self):
        self.db = ScreedDB(xeno + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['scaffold_20095'] = {
            'id': 0,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_20095:1:2'\
            '001:1',
            'name' : 'scaffold_20095',
            'sequence': 'GATGAGATCACCTTTCATGCTTTTTGTATCCCTATTATCTAGAGACAACAA'\
            'ATCAGTTGC'}
        testcases['scaffold_1'] = {
            'id': 19500,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_1:1:781781'\
            '4:1',
            'name': 'scaffold_1',
            'sequence': 'CCTCCCTTTTTGGCTGTCTTTTCACTGTATCATAGCCTGGCGTGAACCAAG'\
            'CCTCAAAAA'}
        testcases['scaffold_271'] = {
            'id': 19230,
            'description' : 'dna:scaffold scaffold:JGI4.1:scaffold_271:1:156'\
            '7461:1',
            'name': 'scaffold_271',
            'sequence': 'CGATTTTTGCGGAAAAACGCGAGTTTTTGGTAGCCATTCCGAAAGTTGCGA'\
            'TTTTTTGTA'}
        testcases['scaffold_19901'] = {
            'id': 329,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_19901:1:22'\
            '56:1',
            'name': 'scaffold_19901',
            'sequence': 'ATACCGCAAAGGTTTCTTTCTTCTCAGTGCTCCATGCTGCCTCTCTTGTTT'\
            'TGCCTCCCT'}
        testcases['scaffold_95'] = {
            'id': 19408,
            'description': 'dna:scaffold scaffold:JGI4.1:scaffold_95:1:28996'\
            '70:1',
            'name': 'scaffold_95',
            'sequence': 'CCCTCCTGGTGATCCCACTTCAATCTCCCCATAGGCACACATCACTTCTAG'\
            'CAGTTCACA'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #16
0
 def __init__(self, filepath):
     self.sdb = ScreedDB(filepath)
Exemple #17
0
class Test_sorex_fasta:
    """
    Test screed methods on the sorex fasta file
    """
    def setup(self):
        self.db = ScreedDB(sorex + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['scaffold_93039'] = {
            'id': 0,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_93'\
            '039:1:203:1',
            'name': 'scaffold_93039',
            'sequence': 'GCTGAGCCTTGTAGTTCTGCTCCCTTTGACTGACGGCCCACTATGGACCG'\
            'GAAAAACTAC'}

        testcases['scaffold_107701'] = {
            'id': 1,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_10'\
            '7701:1:203:1',
            'name' : 'scaffold_107701',
            'sequence': 'TAAACCCAAAATAAACATTCCCCAAATTATATTTCTTCCTTTCCTTCTGA'\
            'ATAAAAGAAA'}

        testcases['GeneScaffold_6994'] = {
            'id': 243135,
            'description': 'dna:genescaffold genescaffold:COMMON_SHREW1:Gen'\
            'eScaffold_6994:1:2349312:1',
            'name': 'GeneScaffold_6994',
            'sequence': 'TATTGAGAGAAGTGGGAACTTCTCTAGTGGTGGGGTATGGTGATGGAATG'\
            'ATGTATGAAT'}

        testcases['scaffold_118324'] = {
            'id': 13823,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_11'\
            '8324:1:884:1',
            'name': 'scaffold_118324',
            'sequence': 'CAGCCCCCTGCAACAAATTTTATACTCTAGAAACAGTTTAATGGCTGTTG'\
            'GAATATTTCC'}

        testcases['scaffold_92895'] = {
            'id': 14573,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_92'\
            '895:1:890:1',
            'name': 'scaffold_92895',
            'sequence': 'GGGAAGCTTGCAAGGCTGTCCCATGTGGGCAGGAAGCTCTCAGTAGCTTG'\
            'CCAGTTTCTC'}

        testcases['scaffold_62271'] = {
            'id': 37101,
            'description': 'dna:scaffold scaffold:COMMON_SHREW1:scaffold_62'\
            '271:1:1064:1',
            'name': 'scaffold_62271',
            'sequence': 'AGAGTATCTCCCCCACATGGCAGAGCCTGGCAAGCTACCCATGGCGTATT'\
            'CAATATGCCA'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == \
                   self.db[case]['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #18
0
class Test_s22_fastq:
    """
    Test screed methods on the s22 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests22 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:2:1:492:573/2'] = {
            'id': 0,
            'annotations': '',
            'quality': 'AA7AAA3+AAAAAA.AAA.;7;AA;;;;*;<1;<<<',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:1:492:573/2',
            'sequence': 'ACAGCAAAATTGTGATTGAGGATGAAGAACTGCTGT'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:2:162:131:826/2'] = {
            'id': 1895228,
            'annotations': '',
            'quality': 'AAAAAAAAAAAAAAAAAAAAAA+@6=7A<05<*15:',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:162:131:826/2',
            'sequence': 'ATGAATACAAACAATGCGGCAGTCATAATGCCCCTC'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:2:330:88:628/2'] = {
            'id': 3790455,
            'annotations': '',
            'quality': 'AA;AA??A5A;;+AA?AAAA;AA;9AA.AA?????9',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:330:88:628/2',
            'sequence': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:2:4:707:391/2'] = {
            'id': 29999,
            'annotations': '',
            'quality': 'AAAAAAAAAA@<)A*AAA6A::<@AA<>A>-8?>4<',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:4:707:391/2',
            'sequence': 'ATTAATCTCCAGTTTCTGGCAAACATTCAGGCCATT'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:2:36:158:208/2'] = {
            'id': 342842,
            'annotations': '',
            'quality': 'AA5?AAAAA?AAAA5?AAA5A???5A>AAA4?;.;;',
            'name': 'HWI-EAS_4_PE-FC20GCB:2:36:158:208/2',
            'sequence': 'TTTCCCTACAGAAGTGTCTGTACCGGTAATAAAGAA'
        }

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #19
0
 def setup(self):
     self.db = ScreedDB(sorex + '_screed')
Exemple #20
0
 def setup(self):
     self.db = ScreedDB(sorex + '_screed')
Exemple #21
0
class Test_s31_fastq:
    """
    Test screed methods on the s31 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests31 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:3:1:71:840/1'] = {
            'id': 0,
            'annotations': '',
            'quality': 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:1:71:840/1',
            'sequence': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:3:330:957:433/1'] = {
            'id': 4439695,
            'annotations': '',
            'quality': 'AAAAAAAAAAA<A?<AA<AAAAA?AAA?<:*??&::',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:330:957:433/1',
            'sequence': 'CTTTGTGGAGAAGAGGGCGTGGGCAAGGCACTGATA'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:3:166:443:410/1'] = {
            'id': 2219847,
            'annotations': '',
            'quality': 'AAAAAAAAAAAAAAAAAAAAAAAA6<@AA959???%',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:166:443:410/1',
            'sequence': 'TGGCATTCGCACACATCATGATGGTGCTGACCGTAA'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:3:1:803:878/1'] = {
            'id': 2999,
            'annotations': '',
            'quality': '?6AAA6A<A6AA<<AA?A&A066/6:/&?&1191+0',
            'name': 'HWI-EAS_4_PE-FC20GCB:3:1:803:878/1',
            'sequence': 'AAGATGCTGTAGTGGCCGCATGTGTAATAGGCTTTA'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:3:245:54:506/1'] = {
            'id': 3329772,
            'annotations': '',
            'quality': "AAAAAAAAAAAAAAAA>A+AAA+@AA+A>A%8*?'%",
            'name': 'HWI-EAS_4_PE-FC20GCB:3:245:54:506/1',
            'sequence': 'CTTCGTTGCTGTTTATCAGTAACTTTTTCTGGCTAG'
        }

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #22
0
class Test_s42_fastq:
    """
    Test screed methods on the s42 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests42 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd


    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:4:1:257:604/2'] = {
            'id': 0,
            'annotations': '',
            'quality': 'AAAAAAAA:4>>AAA:44>>->-&4;8+8826;66.',
            'name' : 'HWI-EAS_4_PE-FC20GCB:4:1:257:604/2',
            'sequence': 'TGTGGATAGTCGCCCGTGATGGCGTCGAAGTTCCGG'}
        
        testcases['HWI-EAS_4_PE-FC20GCB:4:330:96:902/2'] = {
            'id': 4148632,
            'annotations': '',
            'quality': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA??????',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:330:96:902/2',
            'sequence': 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC'}

        testcases['HWI-EAS_4_PE-FC20GCB:4:166:158:532/2'] = {
            'id': 2074316,
            'annotations': '',
            'quality' : 'AAAAAAA?A?AAAAAAA?A>A?A?AAAAAA?.<?-?',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:166:158:532/2',
            'sequence': 'ATCGCCAATGCCCAGGCCTGGTTCTCTTTAACCTAT'}

        testcases['HWI-EAS_4_PE-FC20GCB:4:1:332:634/2'] = {
            'id': 3000,
            'annotations': '',
            'quality': '?A?AAAAAAAAA8>AAAAAA*AA?A?AA.?)<9)9?',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:1:332:634/2',
            'sequence': 'ACCGTGCCAGATCAGAACCTAGTGGCGATTCCAATT'}

        testcases['HWI-EAS_4_PE-FC20GCB:4:242:843:13/2'] = {
            'id': 3111474,
            'annotations': '',
            'quality': "ABAAACA?CAAA??%A;2A;/5/&:?-*1-'11%71",
            'name': 'HWI-EAS_4_PE-FC20GCB:4:242:843:13/2',
            'sequence': 'GTTTCTATATTCTGGCGTTAGTCGTCGCCGATAATT'}

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #23
0
class Test_s42_fastq:
    """
    Test screed methods on the s42 fastq file
    """
    def setup(self):
        self.db = ScreedDB(tests42 + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['HWI-EAS_4_PE-FC20GCB:4:1:257:604/2'] = {
            'id': 0,
            'annotations': '',
            'quality': 'AAAAAAAA:4>>AAA:44>>->-&4;8+8826;66.',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:1:257:604/2',
            'sequence': 'TGTGGATAGTCGCCCGTGATGGCGTCGAAGTTCCGG'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:4:330:96:902/2'] = {
            'id': 4148632,
            'annotations': '',
            'quality': 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA??????',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:330:96:902/2',
            'sequence': 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:4:166:158:532/2'] = {
            'id': 2074316,
            'annotations': '',
            'quality': 'AAAAAAA?A?AAAAAAA?A>A?A?AAAAAA?.<?-?',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:166:158:532/2',
            'sequence': 'ATCGCCAATGCCCAGGCCTGGTTCTCTTTAACCTAT'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:4:1:332:634/2'] = {
            'id': 3000,
            'annotations': '',
            'quality': '?A?AAAAAAAAA8>AAAAAA*AA?A?AA.?)<9)9?',
            'name': 'HWI-EAS_4_PE-FC20GCB:4:1:332:634/2',
            'sequence': 'ACCGTGCCAGATCAGAACCTAGTGGCGATTCCAATT'
        }

        testcases['HWI-EAS_4_PE-FC20GCB:4:242:843:13/2'] = {
            'id': 3111474,
            'annotations': '',
            'quality': "ABAAACA?CAAA??%A;2A;/5/&:?-*1-'11%71",
            'name': 'HWI-EAS_4_PE-FC20GCB:4:242:843:13/2',
            'sequence': 'GTTTCTATATTCTGGCGTTAGTCGTCGCCGATAATT'
        }

        for case in testcases:
            assert testcases[case] == self.db[case]
Exemple #24
0
 def setup(self):
     self.db = ScreedDB(pongo + '_screed')
Exemple #25
0
 def setup(self):
     self.db = ScreedDB(tests42 + '_screed')
Exemple #26
0
 def setup(self):
     self.db = ScreedDB(mus + '_screed')
Exemple #27
0
class Test_po_fasta:
    """
    Test screed methods on the pongo fasta file
    """
    def setup(self):
        self.db = ScreedDB(pongo + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['GENSCAN00000032971'] = {
            'id': 0,
            'description': 'cdna:Genscan chromosome:PPYG2:6_qbl_hap2_random' \
            ':95622:98297:1',
            'name' : 'GENSCAN00000032971',
            'sequence': 'ATGGCGCCCCGAACCCTCCTCCTGCTGCTCTCGGCGGCCCTGGCCCCGAC' \
            'CGAGACCTGG'}
        testcases['GENSCAN00000042282'] = {
            'id': 53997,
            'description': 'cdna:Genscan chromosome:PPYG2:1:229892060:22989' \
            '2800:1',
            'name': 'GENSCAN00000042282',
            'sequence': 'ATGATGCCATTGCAAGGACCCTCTGCAGGGCCTCAGTCCCGAGGATGGCA' \
            'CACAGCCTTC'}
        testcases['GENSCAN00000051311'] = {
            'id': 30780,
            'description' : 'cdna:Genscan chromosome:PPYG2:10:132962172:132' \
            '962871:1',
            'name': 'GENSCAN00000051311',
            'sequence': 'ATGACCCAGCCACCTACCAGGCCGCTCTGCAGACCCCCCACGGGAGCAGC' \
            'CTCTGCCCCC'}
        testcases['GENSCAN00000006030'] = {
            'id': 1469,
            'description': 'cdna:Genscan chromosome:PPYG2:14_random:1765749' \
            ':1766075:-1',
            'name': 'GENSCAN00000006030',
            'sequence': 'ATGTGTGGCAACAAGGGCATTTCTGCCTTCCCTGAATCAGACCACCTTTT' \
            'CACATGGGTA'}
        testcases['GENSCAN00000048263'] = {
            'id': 43029,
            'description': 'cdna:Genscan chromosome:PPYG2:6:100388173:10048' \
            '5454:-1',
            'name': 'GENSCAN00000048263',
            'sequence': 'ATGTGTCCCTTTGAATATGCCGGAGAACAACAGTTGCCATGGATGTGTTC' \
            'TGGGGAGCCC'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #28
0
 def setup(self):
     self.db = ScreedDB(tests22 + '_screed')
Exemple #29
0
 def setup(self):
     self.db = ScreedDB(pongo + '_screed')
Exemple #30
0
 def setup(self):
     self.db = ScreedDB(tri + '_screed')
Exemple #31
0
class Test_mus_fasta:
    """
    Test screed methods on the mus_musculus fasta file
    """
    def setup(self):
        self.db = ScreedDB(mus + '_screed')

    def tearDown(self):
        del self.db
        gc.collect()

    def test_iteration(self):
        """
        Runs through the database, accessing each element by index and then by
        name
        """
        for idx in xrange(0, len(self.db)):
            rcrd = self.db.loadRecordByIndex(idx)
            nameRcrd = self.db[rcrd.name]
            assert rcrd == nameRcrd

    def test_dict_stuff(self):
        """
        Tests some dictionary methods on the database
        """
        keys = self.db.keys()
        ikeys = list(self.db.iterkeys())
        assert sorted(keys) == sorted(ikeys)
        del keys
        del ikeys
        gc.collect()

    def test_contains(self):
        for k in self.db:
            assert k in self.db

        assert not 'FOO' in self.db

    def test_get(self):
        for k in self.db:
            record = self.db.get(k)
            assert record.name == k

            record = self.db[k]
            assert record.name == k

        assert self.db.get('FOO') == None
        try:
            self.db['FOO']
            assert False, "the previous line should raise a KeyError"
        except KeyError:
            pass

    def test_missing(self):
        """
        Make sure that unsupported dict attributes are actually missing.
        """
        db = self.db

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.update({})
            assert 0
        except AttributeError:
            pass

        try:
            db.clear()
            assert 0
        except AttributeError:
            pass

        try:
            db.setdefault(None)
            assert 0
        except AttributeError:
            pass

        try:
            db.pop()
            assert 0
        except AttributeError:
            pass

        try:
            db.popitem()
            assert 0
        except AttributeError:
            pass

    def test_certain_records(self):
        """
        Pulls first, last, middle and few other records out of database and
        compares them to known quantities
        """
        testcases = {}
        testcases['9'] = {
            'id': 0,
            'description': 'dna_rm:chromosome chromosome:NCBIM37:9:1:124076' \
            '172:1',
            'name' : '9',
            'sequence': 'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN' \
            'NNNNNNNNNN'}

        for case in testcases:
            assert testcases[case]['name'] == self.db[case]['name']
            assert testcases[case]['description'] == self.db[case]\
                   ['description']
            assert str(self.db[case]['sequence']).startswith(testcases[case]\
                                                        ['sequence'])
Exemple #32
0
 def setup(self):
     self.db = ScreedDB(xeno + '_screed')
Exemple #33
0
 def setup(self):
     self.db = ScreedDB(mus + '_screed')
Exemple #34
0
 def __init__(self, filepath):
     self.sdb = ScreedDB(filepath)
import time as t
import os, os.path
import glob
import platform
global array
import screed
import sys
from screed import ScreedDB
import string

#corriendo = "N"
bioinfo_path = "/Users/ivanjimenez/Desktop/CLASES/INTERNSHIPS/BIOINFO INTERNSHIP FILES/RESULTS/newresults/"
viralgenome_path = bioinfo_path + "copy_birna_x_virus.fa"

screed.read_fasta_sequences("/Users/ivanjimenez/Desktop/CLASES/INTERNSHIPS/BIOINFO INTERNSHIP FILES/RESULTS/newresults/copy_birna_x_virus.fa")
birna_x_virusdb = ScreedDB(viralgenome_path + "_screed")

#Setting the number of mismatches that are allowed...
k = 6

def getpath():
    wd = os.path.dirname(os.path.abspath(__file__))
    if platform.system() == 'Windows':
        array = wd.split('\\')
        destination = "\\\\".join(array)
        destination += '\\\\'
    else:
        array = wd.split('//')
        destination = "////".join(array)
        destination += '////'
    return destination