def test_build_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) show_shallow_tree(index.TopDownIterator(index))
def test_index_text(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) assert len(index.text) == len(_ids) assert len(index.text[0]) == len(sequences[0])
def test_property_map(): _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Building index') index = seqan.IndexStringDNA5SetESA(sequences) logging.info('Creating property map of length %d', len(index)) property_map = [None] * (2 * len(index)) def set_properties(i): property_map[i.value.id] = i.representative, i.numOccurrences if i.goDown(): while True: set_properties(copy(i)) if not i.goRight(): break def show_properties(i): if len(i.representative) < 3: logging.info('%-2s : %5d', *property_map[i.value.id]) if i.goDown(): while True: show_properties(copy(i)) if not i.goRight(): break logging.info('Setting properties') set_properties(index.TopDownIterator(index)) logging.info('Showing properties') show_properties(index.TopDownIterator(index))
def test_index_text(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) assert len(index.text) == len(_ids) assert len(index.text[0]) == len(sequences[0])
def test_strings(): logging.info(sys._getframe().f_code.co_name) _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize ) logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize ) logging.info('Length of sequence %s %d', ids[3], len(sequences[3])) logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9)) logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14)) _s5 = seqan.StringDNA5('ACGTACGTACGTACGT') s4 = seqan.StringDNA('ACGTACGTACGTACGT') s = s4 infix = s.infix(3, 9) slice_ = s[3:9] logging.info('Infix %s', infix) logging.info(type(infix)) logging.info('Slice %s', slice_) logging.info(type(slice_)) # check object lifetimes are respected del s logging.info('Infix %s', infix) logging.info('Slice %s', slice_) # # Check iteration using __getitem__ works correctly # s = sequences[0] chars = set() for i, c in enumerate(s): if i >= len(s): assert False chars.add(c) assert len(chars) <= 4 assert i == len(s) - 1 s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT') print s5[0] == 'N' assert s5[0] == 'N' print s5[0] != 'G' assert s5[0] != 'G' print s5[1] != 'N' assert s5[1] != 'N' print s5[1] == 'A' assert s5[1] == 'A' print 'N' == s5[0] assert 'N' == s5[0] print 'G' != s5[0] assert 'G' != s5[0] print 'N' != s5[1] assert 'N' != s5[1] print 'A' == s5[1] assert 'A' == s5[1]
def test_read_fasta(): filename = fasta_file('dm01r.fasta') logging.info('Reading %s', filename) num_bases, sequences, ids = seqan.readFastaDNA5(filename) logging.info('Read %d bases in %d sequences', num_bases, len(sequences)) assert 4 == len(sequences), len(sequences) for _id, seq in zip(ids, sequences): assert 1500 == len(seq) logging.info('%5d bases in %s', len(seq), _id) map(len, sequences)
def test_infix_comparison(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) i = index.TopDownIterator(index) assert i.goDown('A') assert i.goDown('T') assert i.goDown('C') assert 'ATC' == i.representative, i.representative
def test_infix_comparison(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) i = index.TopDownIterator(index) assert i.goDown('A') assert i.goDown('T') assert i.goDown('C') assert 'ATC' == i.representative, i.representative
def test_save_load_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) tmpdir = tempfile.mkdtemp(suffix='test-index-save') filename = os.path.join(tmpdir, 'index.saved') index.save(filename) index2 = IndexDNA5.load(filename) i = index2.topdown() assert i.goDown('A') text = index2.text print text[0][:4]
def read_sequences(fasta): # Read and reverse the sequences num_bases, seqs_dna5, _ids = seqan.readFastaDNA5(fasta, reverse=True) for _id, seq in zip(_ids, seqs_dna5): logger.info('%s: %d bases', _id, len(seq)) logger.info('Read %d bases in total', num_bases) # Split the sequences into their known portions seqs_dna4 = seqan.StringDNASet() for seq in chain.from_iterable(imap(split_sequence, seqs_dna5)): logger.info(seq) seqs_dna4.appendValue(seqan.StringDNA(seq)) logger.info('Split %d sequences with %d possibly ambiguous bases into %d sections totalling %d unambiguous bases', len(seqs_dna5), num_bases, len(seqs_dna4), sum(imap(len, seqs_dna4)))
def test_save_load_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) tmpdir = tempfile.mkdtemp(suffix='test-index-save') filename = os.path.join(tmpdir, 'index.saved') index.save(filename) index2 = IndexDNA5.load(filename) i = index2.topdown() assert i.goDown('A') text = index2.text print text[0][:4]
def read_sequences(fasta): # Read and reverse the sequences num_bases, seqs_dna5, _ids = seqan.readFastaDNA5(fasta, reverse=True) for _id, seq in zip(_ids, seqs_dna5): logger.info('%s: %d bases', _id, len(seq)) logger.info('Read %d bases in total', num_bases) # Split the sequences into their known portions seqs_dna4 = seqan.StringDNASet() for seq in chain.from_iterable(imap(split_sequence, seqs_dna5)): logger.info(seq) seqs_dna4.appendValue(seqan.StringDNA(seq)) logger.info( 'Split %d sequences with %d possibly ambiguous bases into %d sections totalling %d unambiguous bases', len(seqs_dna5), num_bases, len(seqs_dna4), sum(imap(len, seqs_dna4)))
def test_build_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) show_shallow_tree(index.TopDownIterator(index))