Esempio n. 1
0
def test_string_reversecomplement():
    logging.info(sys._getframe().f_code.co_name)
    s1 = seqan.StringDNA5('AANNGG')
    rc = s1.reversecomplement()
    print rc
    print str(rc)
    assert seqan.StringDNA5('CCNNTT') == rc
    del s1
    assert seqan.StringDNA5('CCNNTT') == rc
Esempio n. 2
0
def test_strings():
    logging.info(sys._getframe().f_code.co_name)
    _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))

    logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize )
    logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize )
    logging.info('Length of sequence %s %d', ids[3], len(sequences[3]))
    logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9))
    logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14))


    _s5 = seqan.StringDNA5('ACGTACGTACGTACGT')
    s4 = seqan.StringDNA('ACGTACGTACGTACGT')
    s = s4
    infix = s.infix(3, 9)
    slice_ = s[3:9]
    logging.info('Infix %s', infix)
    logging.info(type(infix))
    logging.info('Slice %s', slice_)
    logging.info(type(slice_))

    # check object lifetimes are respected
    del s
    logging.info('Infix %s', infix)
    logging.info('Slice %s', slice_)

    #
    # Check iteration using __getitem__ works correctly
    #
    s = sequences[0]
    chars = set()
    for i, c in enumerate(s):
        if i >= len(s):
            assert False
        chars.add(c)
    assert len(chars) <= 4
    assert i == len(s) - 1


    s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT')
    print s5[0] == 'N'
    assert s5[0] == 'N'
    print s5[0] != 'G'
    assert s5[0] != 'G'
    print s5[1] != 'N'
    assert s5[1] != 'N'
    print s5[1] == 'A'
    assert s5[1] == 'A'
    print 'N' == s5[0]
    assert 'N' == s5[0]
    print 'G' != s5[0]
    assert 'G' != s5[0]
    print 'N' != s5[1]
    assert 'N' != s5[1]
    print 'A' == s5[1]
    assert 'A' == s5[1]
Esempio n. 3
0
def test_small_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        sequences = seqan.StringDNA5Set()
        sequences.appendValue(seqan.StringDNA5('A'))
        index = IndexDNA5(sequences)
        del sequences
        _i = index.TopDownIterator(index)
Esempio n. 4
0
def test_index_as_strings_custodian():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        sequences = seqan.StringDNA5Set()
        sequences.appendValue(seqan.StringDNA5('ACG'))
        index = IndexDNA5(sequences)
        del sequences
        i = index.TopDownIterator(index)
        assert i.goDown('A')
Esempio n. 5
0
def test_string_set_iteration():
    logging.info(sys._getframe().f_code.co_name)
    stringset = seqan.StringDNA5Set()
    stringset.appendValue(seqan.StringDNA5('ACGTACGTACGTNNN'))
    i = iter(stringset)
    try:
        while True:
            logging.info(len(i.next()))
    except StopIteration:
        pass
    map(len, stringset)
Esempio n. 6
0
def _test_empty_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        sequences = seqan.StringDNA5Set()
        sequences.appendValue(seqan.StringDNA5())
        index = IndexDNA5(sequences)
        logging.info('Creating top-down')
        i = index.topdown()
        1 / 0  # going down causes SEGV
        logging.info('Going down "A"')
        assert not i.goDown('A')
Esempio n. 7
0
def test_split_sequence():
    logger.info(sys._getframe().f_code.co_name)

    result = list(split_sequence(seqan.StringDNA5('NNACNGANGGN')))
    assert result[0] == 'AC', result[0]
    assert result[1] == 'GA', result[1]
    assert result[2] == 'GG', result[2]
    assert 3 == len(result)

    result = list(split_sequence('ACGTNNNNAAGG'))
    assert result[0] == 'ACGT', result[0]
    assert result[1] == 'AAGG', result[1]
    assert 2 == len(result)
Esempio n. 8
0
def test_go_down():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        sequences = seqan.StringDNA5Set()
        for seq in (
                'ACGT',
                'AAAA',
                'GGGG',
                'AC',
        ):
            sequences.appendValue(seqan.StringDNA5(seq))
        index = IndexDNA5(sequences)
        for expected_occs, texts in (
            (2, (seqan.StringDNA5('AC'), 'AC')),
            (1, (seqan.DNA5('T'), 'T', seqan.StringDNA5('T'))),
            (6, (seqan.DNA5('A'), 'A', seqan.StringDNA5('A'))),
        ):
            for text in texts:
                logging.info('Trying %s: %s', type(text).__name__, text)
                it = index.TopDownIterator(index)
                assert it.goDown(text)
                for occ in it.occurrences:
                    logging.info('Occurrence for %s: %s', text, occ)
                assert expected_occs == len(it.occurrences)
Esempio n. 9
0
def test_long_edges():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        sequences = seqan.StringDNA5Set()
        for seq in (
                'AA',
                'AC',
                'AG',
                'ATACCGGTT',
        ):
            sequences.appendValue(seqan.StringDNA5(seq))
        index = IndexDNA5(sequences)
        it = index.topdown()
        assert it.goDown('A')
        assert 'A' == it.representative
        assert it.goDown('T')
        logging.debug('%s: %s', IndexDNA5.__name__, it.representative)
        assert 'ATACCGGTT' == it.representative
        it = index.topdown()
        assert it.goDown('AT')
        logging.debug('%s: %s', IndexDNA5.__name__, it.representative)
        assert 'ATACCGGTT' == it.representative
Esempio n. 10
0
def test_string_set_append():
    logging.info(sys._getframe().f_code.co_name)
    stringset = seqan.StringDNA5Set()
    stringset.appendValue(seqan.StringDNA5('ACGTACGTACGTNNN'))