def test_graphtool(): """Test creating a tree from a suffix tree. """ _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta')) #for s in sequences: # s.remove('T') index = seqan.IndexStringDNASetESA(sequences) suffix = 'ACGTATGC' predicate = seqan.traverse.suffixpredicate(suffix) #predicate = seqan.traverse.depthpredicate(4) builder = seqan.io.graphtool.Builder(index, predicate=predicate) #pos = GT.radial_tree_layout(builder.graph, builder.graph.vertex(0)) pos = GT.sfdp_layout(builder.graph) GT.graph_draw( builder.graph, pos=pos, vertex_size=2, vertex_fill_color="lightgrey", vertex_text=builder.occurrences, vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder), edge_text=seqan.io.graphtool.edge_labels_for_output(builder), edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder), edge_end_marker="none", edge_pen_width=2, edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges( builder, suffix), #edge_pen_width=builder.edge_lengths, output="graphtool.png") return builder
def test_graphtool(): """Test creating a tree from a suffix tree. """ _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta')) #for s in sequences: # s.remove('T') index = seqan.IndexStringDNASetESA(sequences) suffix = 'ACGTATGC' predicate = seqan.traverse.suffixpredicate(suffix) #predicate = seqan.traverse.depthpredicate(4) builder = seqan.io.graphtool.Builder(index, predicate=predicate) #pos = GT.radial_tree_layout(builder.graph, builder.graph.vertex(0)) pos = GT.sfdp_layout(builder.graph) GT.graph_draw( builder.graph, pos=pos, vertex_size=2, vertex_fill_color="lightgrey", vertex_text=builder.occurrences, vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder), edge_text=seqan.io.graphtool.edge_labels_for_output(builder), edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder), edge_end_marker="none", edge_pen_width=2, edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix), #edge_pen_width=builder.edge_lengths, output="graphtool.png" ) return builder
def test_property_map(): _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Building index') index = seqan.IndexStringDNA5SetESA(sequences) logging.info('Creating property map of length %d', len(index)) property_map = [None] * (2 * len(index)) def set_properties(i): property_map[i.value.id] = i.representative, i.numOccurrences if i.goDown(): while True: set_properties(copy(i)) if not i.goRight(): break def show_properties(i): if len(i.representative) < 3: logging.info('%-2s : %5d', *property_map[i.value.id]) if i.goDown(): while True: show_properties(copy(i)) if not i.goRight(): break logging.info('Setting properties') set_properties(index.TopDownIterator(index)) logging.info('Showing properties') show_properties(index.TopDownIterator(index))
def test_build_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) show_shallow_tree(index.TopDownIterator(index))
def test_index_text(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) assert len(index.text) == len(_ids) assert len(index.text[0]) == len(sequences[0])
def test_index_text(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) assert len(index.text) == len(_ids) assert len(index.text[0]) == len(sequences[0])
def test_strings(): logging.info(sys._getframe().f_code.co_name) _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize ) logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize ) logging.info('Length of sequence %s %d', ids[3], len(sequences[3])) logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9)) logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14)) _s5 = seqan.StringDNA5('ACGTACGTACGTACGT') s4 = seqan.StringDNA('ACGTACGTACGTACGT') s = s4 infix = s.infix(3, 9) slice_ = s[3:9] logging.info('Infix %s', infix) logging.info(type(infix)) logging.info('Slice %s', slice_) logging.info(type(slice_)) # check object lifetimes are respected del s logging.info('Infix %s', infix) logging.info('Slice %s', slice_) # # Check iteration using __getitem__ works correctly # s = sequences[0] chars = set() for i, c in enumerate(s): if i >= len(s): assert False chars.add(c) assert len(chars) <= 4 assert i == len(s) - 1 s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT') print s5[0] == 'N' assert s5[0] == 'N' print s5[0] != 'G' assert s5[0] != 'G' print s5[1] != 'N' assert s5[1] != 'N' print s5[1] == 'A' assert s5[1] == 'A' print 'N' == s5[0] assert 'N' == s5[0] print 'G' != s5[0] assert 'G' != s5[0] print 'N' != s5[1] assert 'N' != s5[1] print 'A' == s5[1] assert 'A' == s5[1]
def test_read_fasta(): filename = fasta_file('dm01r.fasta') logging.info('Reading %s', filename) num_bases, sequences, ids = seqan.readFastaDNA5(filename) logging.info('Read %d bases in %d sequences', num_bases, len(sequences)) assert 4 == len(sequences), len(sequences) for _id, seq in zip(ids, sequences): assert 1500 == len(seq) logging.info('%5d bases in %s', len(seq), _id) map(len, sequences)
def test_infix_comparison(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) i = index.TopDownIterator(index) assert i.goDown('A') assert i.goDown('T') assert i.goDown('C') assert 'ATC' == i.representative, i.representative
def test_infix_comparison(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) i = index.TopDownIterator(index) assert i.goDown('A') assert i.goDown('T') assert i.goDown('C') assert 'ATC' == i.representative, i.representative
def test_write_pgf(): """Test writing a PGF representing a suffix tree. """ _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta')) #for s in sequences: # s.remove('T') index = seqan.IndexStringDNASetESA(sequences) tikz, filename = tempfile.mkstemp('.tex', prefix='seqan-test-pgf') logger.info('Writing PGF to: %s', filename) seqan.io.pgf.write_pgf(os.fdopen(tikz, 'w'), index, highlightsuffix='GCCGAA', maxdepth=None)
def test_save_load_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) tmpdir = tempfile.mkdtemp(suffix='test-index-save') filename = os.path.join(tmpdir, 'index.saved') index.save(filename) index2 = IndexDNA5.load(filename) i = index2.topdown() assert i.goDown('A') text = index2.text print text[0][:4]
def test_write_pgf(): """Test writing a PGF representing a suffix tree. """ _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta')) #for s in sequences: # s.remove('T') index = seqan.IndexStringDNASetESA(sequences) tikz, filename = tempfile.mkstemp('.tex', prefix='seqan-test-pgf') logger.info('Writing PGF to: %s', filename) seqan.io.pgf.write_pgf( os.fdopen(tikz, 'w'), index, highlightsuffix='GCCGAA', maxdepth=None)
def test_save_load_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) tmpdir = tempfile.mkdtemp(suffix='test-index-save') filename = os.path.join(tmpdir, 'index.saved') index.save(filename) index2 = IndexDNA5.load(filename) i = index2.topdown() assert i.goDown('A') text = index2.text print text[0][:4]
def _build_index(): _num_bases, sequences, _ids = seqan.readFastaDNA5( fasta_file('dm01r.fasta')) logging.info('Building index') for Index in Indexes: yield Index(sequences)
def test_build_index(): _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = seqan.IndexStringDNA5SetESA(sequences) seqan.traverse.topdownhistorytraversal(index.topdownhistory(), visitvertex) seqan.traverse.topdowncopytraversal(index.topdown(), visitvertex)
def _build_index(): _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Building index') for Index in Indexes: yield Index(sequences)
def test_build_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) index = IndexDNA5(sequences) show_shallow_tree(index.TopDownIterator(index))