Example #1
0
def test_graphtool():
    """Test creating a tree from a suffix tree.
    """
    _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta'))
    #for s in sequences:
    #    s.remove('T')
    index = seqan.IndexStringDNASetESA(sequences)
    suffix = 'ACGTATGC'
    predicate = seqan.traverse.suffixpredicate(suffix)
    #predicate = seqan.traverse.depthpredicate(4)
    builder = seqan.io.graphtool.Builder(index, predicate=predicate)
    #pos = GT.radial_tree_layout(builder.graph, builder.graph.vertex(0))
    pos = GT.sfdp_layout(builder.graph)
    GT.graph_draw(
        builder.graph,
        pos=pos,
        vertex_size=2,
        vertex_fill_color="lightgrey",
        vertex_text=builder.occurrences,
        vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder),
        edge_text=seqan.io.graphtool.edge_labels_for_output(builder),
        edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder),
        edge_end_marker="none",
        edge_pen_width=2,
        edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(
            builder, suffix),
        #edge_pen_width=builder.edge_lengths,
        output="graphtool.png")
    return builder
Example #2
0
def test_graphtool():
    """Test creating a tree from a suffix tree.
    """
    _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta'))
    #for s in sequences:
    #    s.remove('T')
    index = seqan.IndexStringDNASetESA(sequences)
    suffix = 'ACGTATGC'
    predicate = seqan.traverse.suffixpredicate(suffix)
    #predicate = seqan.traverse.depthpredicate(4)
    builder = seqan.io.graphtool.Builder(index, predicate=predicate)
    #pos = GT.radial_tree_layout(builder.graph, builder.graph.vertex(0))
    pos = GT.sfdp_layout(builder.graph)
    GT.graph_draw(
        builder.graph,
        pos=pos,
        vertex_size=2,
        vertex_fill_color="lightgrey",
        vertex_text=builder.occurrences,
        vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder),
        edge_text=seqan.io.graphtool.edge_labels_for_output(builder),
        edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder),
        edge_end_marker="none",
        edge_pen_width=2,
        edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix),
        #edge_pen_width=builder.edge_lengths,
        output="graphtool.png"
    )
    return builder
Example #3
0
def test_property_map():
    _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
    logging.info('Building index')
    index = seqan.IndexStringDNA5SetESA(sequences)
    logging.info('Creating property map of length %d', len(index))
    property_map = [None] * (2 * len(index))

    def set_properties(i):
        property_map[i.value.id] = i.representative, i.numOccurrences
        if i.goDown():
            while True:
                set_properties(copy(i))
                if not i.goRight():
                    break

    def show_properties(i):
        if len(i.representative) < 3:
            logging.info('%-2s : %5d', *property_map[i.value.id])
            if i.goDown():
                while True:
                    show_properties(copy(i))
                    if not i.goRight():
                        break

    logging.info('Setting properties')
    set_properties(index.TopDownIterator(index))
    logging.info('Showing properties')
    show_properties(index.TopDownIterator(index))
Example #4
0
def test_build_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        show_shallow_tree(index.TopDownIterator(index))
Example #5
0
def test_index_text():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        assert len(index.text) == len(_ids)
        assert len(index.text[0]) == len(sequences[0])
Example #6
0
def test_index_text():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        assert len(index.text) == len(_ids)
        assert len(index.text[0]) == len(sequences[0])
Example #7
0
def test_strings():
    logging.info(sys._getframe().f_code.co_name)
    _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))

    logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize )
    logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize )
    logging.info('Length of sequence %s %d', ids[3], len(sequences[3]))
    logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9))
    logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14))


    _s5 = seqan.StringDNA5('ACGTACGTACGTACGT')
    s4 = seqan.StringDNA('ACGTACGTACGTACGT')
    s = s4
    infix = s.infix(3, 9)
    slice_ = s[3:9]
    logging.info('Infix %s', infix)
    logging.info(type(infix))
    logging.info('Slice %s', slice_)
    logging.info(type(slice_))

    # check object lifetimes are respected
    del s
    logging.info('Infix %s', infix)
    logging.info('Slice %s', slice_)

    #
    # Check iteration using __getitem__ works correctly
    #
    s = sequences[0]
    chars = set()
    for i, c in enumerate(s):
        if i >= len(s):
            assert False
        chars.add(c)
    assert len(chars) <= 4
    assert i == len(s) - 1


    s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT')
    print s5[0] == 'N'
    assert s5[0] == 'N'
    print s5[0] != 'G'
    assert s5[0] != 'G'
    print s5[1] != 'N'
    assert s5[1] != 'N'
    print s5[1] == 'A'
    assert s5[1] == 'A'
    print 'N' == s5[0]
    assert 'N' == s5[0]
    print 'G' != s5[0]
    assert 'G' != s5[0]
    print 'N' != s5[1]
    assert 'N' != s5[1]
    print 'A' == s5[1]
    assert 'A' == s5[1]
Example #8
0
def test_read_fasta():
    filename = fasta_file('dm01r.fasta')
    logging.info('Reading %s', filename)
    num_bases, sequences, ids = seqan.readFastaDNA5(filename)
    logging.info('Read %d bases in %d sequences', num_bases, len(sequences))
    assert 4 == len(sequences), len(sequences)
    for _id, seq in zip(ids, sequences):
        assert 1500 == len(seq)
        logging.info('%5d bases in %s', len(seq), _id)
    map(len, sequences)
Example #9
0
def test_infix_comparison():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        i = index.TopDownIterator(index)
        assert i.goDown('A')
        assert i.goDown('T')
        assert i.goDown('C')
        assert 'ATC' == i.representative, i.representative
Example #10
0
def test_read_fasta():
    filename = fasta_file('dm01r.fasta')
    logging.info('Reading %s', filename)
    num_bases, sequences, ids = seqan.readFastaDNA5(filename)
    logging.info('Read %d bases in %d sequences', num_bases, len(sequences))
    assert 4 == len(sequences), len(sequences) 
    for _id, seq in zip(ids, sequences):
        assert 1500 == len(seq)
        logging.info('%5d bases in %s', len(seq), _id)
    map(len, sequences)
Example #11
0
def test_infix_comparison():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        i = index.TopDownIterator(index)
        assert i.goDown('A')
        assert i.goDown('T')
        assert i.goDown('C')
        assert 'ATC' == i.representative, i.representative
Example #12
0
def test_write_pgf():
    """Test writing a PGF representing a suffix tree.
    """
    _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta'))
    #for s in sequences:
    #    s.remove('T')
    index = seqan.IndexStringDNASetESA(sequences)
    tikz, filename = tempfile.mkstemp('.tex', prefix='seqan-test-pgf')
    logger.info('Writing PGF to: %s', filename)
    seqan.io.pgf.write_pgf(os.fdopen(tikz, 'w'),
                           index,
                           highlightsuffix='GCCGAA',
                           maxdepth=None)
Example #13
0
def test_save_load_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        tmpdir = tempfile.mkdtemp(suffix='test-index-save')
        filename = os.path.join(tmpdir, 'index.saved')
        index.save(filename)
        index2 = IndexDNA5.load(filename)
        i = index2.topdown()
        assert i.goDown('A')
        text = index2.text
        print text[0][:4]
Example #14
0
def test_write_pgf():
    """Test writing a PGF representing a suffix tree.
    """
    _num_bases, sequences, _ids = seqan.readFastaDNA(fasta_file('dm01r.fasta'))
    #for s in sequences:
    #    s.remove('T')
    index = seqan.IndexStringDNASetESA(sequences)
    tikz, filename = tempfile.mkstemp('.tex', prefix='seqan-test-pgf')
    logger.info('Writing PGF to: %s', filename)
    seqan.io.pgf.write_pgf(
        os.fdopen(tikz, 'w'),
        index,
        highlightsuffix='GCCGAA',
        maxdepth=None)
Example #15
0
def test_save_load_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        tmpdir = tempfile.mkdtemp(suffix='test-index-save')
        filename = os.path.join(tmpdir, 'index.saved')
        index.save(filename)
        index2 = IndexDNA5.load(filename)
        i = index2.topdown()
        assert i.goDown('A')
        text = index2.text
        print text[0][:4]
Example #16
0
def _build_index():
    _num_bases, sequences, _ids = seqan.readFastaDNA5(
        fasta_file('dm01r.fasta'))
    logging.info('Building index')
    for Index in Indexes:
        yield Index(sequences)
Example #17
0
def test_build_index():
    _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
    index = seqan.IndexStringDNA5SetESA(sequences)
    seqan.traverse.topdownhistorytraversal(index.topdownhistory(), visitvertex)
    seqan.traverse.topdowncopytraversal(index.topdown(), visitvertex)
Example #18
0
def _build_index():
    _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
    logging.info('Building index')
    for Index in Indexes:
        yield Index(sequences)
Example #19
0
def test_build_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        show_shallow_tree(index.TopDownIterator(index))