Ejemplo n.º 1
0
def test_build_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        show_shallow_tree(index.TopDownIterator(index))
Ejemplo n.º 2
0
def test_index_text():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        assert len(index.text) == len(_ids)
        assert len(index.text[0]) == len(sequences[0])
Ejemplo n.º 3
0
def test_property_map():
    _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
    logging.info('Building index')
    index = seqan.IndexStringDNA5SetESA(sequences)
    logging.info('Creating property map of length %d', len(index))
    property_map = [None] * (2 * len(index))

    def set_properties(i):
        property_map[i.value.id] = i.representative, i.numOccurrences
        if i.goDown():
            while True:
                set_properties(copy(i))
                if not i.goRight():
                    break

    def show_properties(i):
        if len(i.representative) < 3:
            logging.info('%-2s : %5d', *property_map[i.value.id])
            if i.goDown():
                while True:
                    show_properties(copy(i))
                    if not i.goRight():
                        break

    logging.info('Setting properties')
    set_properties(index.TopDownIterator(index))
    logging.info('Showing properties')
    show_properties(index.TopDownIterator(index))
Ejemplo n.º 4
0
def test_index_text():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        assert len(index.text) == len(_ids)
        assert len(index.text[0]) == len(sequences[0])
Ejemplo n.º 5
0
def test_strings():
    logging.info(sys._getframe().f_code.co_name)
    _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))

    logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize )
    logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize )
    logging.info('Length of sequence %s %d', ids[3], len(sequences[3]))
    logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9))
    logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14))


    _s5 = seqan.StringDNA5('ACGTACGTACGTACGT')
    s4 = seqan.StringDNA('ACGTACGTACGTACGT')
    s = s4
    infix = s.infix(3, 9)
    slice_ = s[3:9]
    logging.info('Infix %s', infix)
    logging.info(type(infix))
    logging.info('Slice %s', slice_)
    logging.info(type(slice_))

    # check object lifetimes are respected
    del s
    logging.info('Infix %s', infix)
    logging.info('Slice %s', slice_)

    #
    # Check iteration using __getitem__ works correctly
    #
    s = sequences[0]
    chars = set()
    for i, c in enumerate(s):
        if i >= len(s):
            assert False
        chars.add(c)
    assert len(chars) <= 4
    assert i == len(s) - 1


    s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT')
    print s5[0] == 'N'
    assert s5[0] == 'N'
    print s5[0] != 'G'
    assert s5[0] != 'G'
    print s5[1] != 'N'
    assert s5[1] != 'N'
    print s5[1] == 'A'
    assert s5[1] == 'A'
    print 'N' == s5[0]
    assert 'N' == s5[0]
    print 'G' != s5[0]
    assert 'G' != s5[0]
    print 'N' != s5[1]
    assert 'N' != s5[1]
    print 'A' == s5[1]
    assert 'A' == s5[1]
Ejemplo n.º 6
0
def test_read_fasta():
    filename = fasta_file('dm01r.fasta')
    logging.info('Reading %s', filename)
    num_bases, sequences, ids = seqan.readFastaDNA5(filename)
    logging.info('Read %d bases in %d sequences', num_bases, len(sequences))
    assert 4 == len(sequences), len(sequences) 
    for _id, seq in zip(ids, sequences):
        assert 1500 == len(seq)
        logging.info('%5d bases in %s', len(seq), _id)
    map(len, sequences)
Ejemplo n.º 7
0
def test_infix_comparison():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        i = index.TopDownIterator(index)
        assert i.goDown('A')
        assert i.goDown('T')
        assert i.goDown('C')
        assert 'ATC' == i.representative, i.representative
Ejemplo n.º 8
0
def test_read_fasta():
    filename = fasta_file('dm01r.fasta')
    logging.info('Reading %s', filename)
    num_bases, sequences, ids = seqan.readFastaDNA5(filename)
    logging.info('Read %d bases in %d sequences', num_bases, len(sequences))
    assert 4 == len(sequences), len(sequences)
    for _id, seq in zip(ids, sequences):
        assert 1500 == len(seq)
        logging.info('%5d bases in %s', len(seq), _id)
    map(len, sequences)
Ejemplo n.º 9
0
def test_infix_comparison():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        i = index.TopDownIterator(index)
        assert i.goDown('A')
        assert i.goDown('T')
        assert i.goDown('C')
        assert 'ATC' == i.representative, i.representative
Ejemplo n.º 10
0
def test_save_load_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        tmpdir = tempfile.mkdtemp(suffix='test-index-save')
        filename = os.path.join(tmpdir, 'index.saved')
        index.save(filename)
        index2 = IndexDNA5.load(filename)
        i = index2.topdown()
        assert i.goDown('A')
        text = index2.text
        print text[0][:4]
Ejemplo n.º 11
0
def read_sequences(fasta):
    # Read and reverse the sequences
    num_bases, seqs_dna5, _ids = seqan.readFastaDNA5(fasta, reverse=True)
    for _id, seq in zip(_ids, seqs_dna5):
        logger.info('%s: %d bases', _id, len(seq))
    logger.info('Read %d bases in total', num_bases)

    # Split the sequences into their known portions
    seqs_dna4 = seqan.StringDNASet()
    for seq in chain.from_iterable(imap(split_sequence, seqs_dna5)):
        logger.info(seq)
        seqs_dna4.appendValue(seqan.StringDNA(seq))
    logger.info('Split %d sequences with %d possibly ambiguous bases into %d sections totalling %d unambiguous bases',
                len(seqs_dna5), num_bases, len(seqs_dna4), sum(imap(len, seqs_dna4)))
Ejemplo n.º 12
0
def test_save_load_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(
            fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        tmpdir = tempfile.mkdtemp(suffix='test-index-save')
        filename = os.path.join(tmpdir, 'index.saved')
        index.save(filename)
        index2 = IndexDNA5.load(filename)
        i = index2.topdown()
        assert i.goDown('A')
        text = index2.text
        print text[0][:4]
Ejemplo n.º 13
0
def read_sequences(fasta):
    # Read and reverse the sequences
    num_bases, seqs_dna5, _ids = seqan.readFastaDNA5(fasta, reverse=True)
    for _id, seq in zip(_ids, seqs_dna5):
        logger.info('%s: %d bases', _id, len(seq))
    logger.info('Read %d bases in total', num_bases)

    # Split the sequences into their known portions
    seqs_dna4 = seqan.StringDNASet()
    for seq in chain.from_iterable(imap(split_sequence, seqs_dna5)):
        logger.info(seq)
        seqs_dna4.appendValue(seqan.StringDNA(seq))
    logger.info(
        'Split %d sequences with %d possibly ambiguous bases into %d sections totalling %d unambiguous bases',
        len(seqs_dna5), num_bases, len(seqs_dna4), sum(imap(len, seqs_dna4)))
Ejemplo n.º 14
0
def test_build_index():
    logging.info(sys._getframe().f_code.co_name)
    for IndexDNA5 in IndexesDNA5:
        _num_bases, sequences, _ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta'))
        index = IndexDNA5(sequences)
        show_shallow_tree(index.TopDownIterator(index))