Python isgap Beispiele

Programmiersprache: Python

Namespace / Paketname: bioutils

Methode / Funktion: isgap

Beispiele auf hotexamples.com: 6

Python isgap - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die bioutils.isgap, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def prune_aln(aln, what, fh_out=sys.stdout):
    """Prune what columns from alignment and print result
    """

    assert what in ['any_gap', 'all_gap', 'identical']

    keep_cols = []
    for i in xrange(aln.get_alignment_length()):
        # deprecated: col = aln.get_column(i)
        col_nucs = [sr.seq[i].upper() for sr in aln]
        counter = Counter(col_nucs)

        if what == 'any_gap':
            if any([bioutils.isgap(c) for c in counter.keys()]):
                continue
        if what == 'all_gap':
            if all([bioutils.isgap(c) for c in counter.keys()]):
                continue
        if what == 'identical':
            if len(set(counter.keys())) == 1:
                continue

        keep_cols.append(i)

    # FIXME add support for proper alignment output, not just
    # concatenated fasta
    LOG.info("Keeping the following columns: %s" %
             (', '.join([str(x + 1) for x in keep_cols])))
    for s in aln:
        fh_out.write(">%s\n" % s.id)
        fh_out.write('%s\n' % ''.join([s.seq[i] for i in keep_cols]))

Beispiel #2

Datei anzeigen

Datei: prune_aln_cols.py Projekt: aersoares81/compbio-utils

def prune_aln(aln, what, fh_out=sys.stdout):
    """Prune what columns from alignment and print result
    """

    assert what in ['any_gap', 'all_gap', 'identical']
    
    keep_cols = []
    for i in xrange(aln.get_alignment_length()):
        # deprecated: col = aln.get_column(i)
        col_nucs = [sr.seq[i].upper() for sr in aln]
        counter = Counter(col_nucs)

        if what == 'any_gap':
            if any([bioutils.isgap(c) for c in counter.keys()]):
                continue
        if what == 'all_gap':
            if all([bioutils.isgap(c) for c in counter.keys()]):
                continue
        if what == 'identical':
            if len(set(counter.keys())) == 1:
                continue

        keep_cols.append(i)

    # FIXME add support for proper alignment output, not just
    # concatenated fasta
    LOG.info("Keeping the following columns: %s" % (
        ', '.join([str(x+1) for x in keep_cols])))
    for s in aln:
        fh_out.write(">%s\n" % s.id)
        fh_out.write('%s\n' % ''.join([s.seq[i] for i in keep_cols]))

Beispiel #3

Datei anzeigen

Datei: seqstat.py Projekt: aersoares81/compbio-utils

def comp_pairwise_ident_matrix(seqrecs):
    """Returns a fake matrix (symmetric 2d list) of pairwise
    identities. Valid index range is [i][j], where i>=j, j>=0 and
    i<nseqs. values for i=j are None!
    """
    nseqs = len(seqrecs)

    # intentionally a list, not a matrix, because numpy doesn't know
    # about symmetric arrays
    mx = []
    for i in xrange(nseqs):
        jdists = []
        for j in xrange(0, i):
            s1 = str(seqrecs[i].seq).upper()
            s2 = str(seqrecs[j].seq).upper()
            pwid = pairwise_identity(s1, s2)
            jdists.append(pwid)

            if False:
                # tmp hack dna dist
                dist = sum(c1 != c2
                         for c1, c2 in izip(s1, s2)
                         if not bioutils.isgap(c1) and not bioutils.isgap(c2))
                print "TMP: dist %s vs %s: %d" % (seqrecs[i].id, seqrecs[j].id, dist)

        jdists.append(None) # self comparison not defined
        mx.append(jdists)
    return mx

Beispiel #4

Datei anzeigen

def comp_pairwise_ident_matrix(seqrecs):
    """Returns a fake matrix (symmetric 2d list) of pairwise
    identities. Valid index range is [i][j], where i>=j, j>=0 and
    i<nseqs. values for i=j are None!
    """
    nseqs = len(seqrecs)

    # intentionally a list, not a matrix, because numpy doesn't know
    # about symmetric arrays
    mx = []
    for i in xrange(nseqs):
        jdists = []
        for j in xrange(0, i):
            s1 = str(seqrecs[i].seq).upper()
            s2 = str(seqrecs[j].seq).upper()
            pwid = pairwise_identity(s1, s2)
            jdists.append(pwid)

            if False:
                # tmp hack dna dist
                dist = sum(
                    c1 != c2 for c1, c2 in izip(s1, s2)
                    if not bioutils.isgap(c1) and not bioutils.isgap(c2))
                print "TMP: dist %s vs %s: %d" % (seqrecs[i].id, seqrecs[j].id,
                                                  dist)

        jdists.append(None)  # self comparison not defined
        mx.append(jdists)
    return mx

Beispiel #5

Datei anzeigen

def pairwise_identity(s1, s2):
    """Return fractional pairwise identity between two aligned
    strings, which is defined here as the number of identical residues
    (case sensitive), divived by the smaller of the two ungapped
    sequences.

    Uppercase your sequence for case insensitivity. For mixed RNA/DNA
    you might want to replace T's with U's vice versa.
    
    Based on ideas from
    http://code.activestate.com/recipes/499304-hamming-distance/
    """

    assert len(s1) == len(s2)
    idents = sum(c1 == c2 for c1, c2 in izip(s1, s2)
                 if not bioutils.isgap(c1) and not bioutils.isgap(c2))
    min_ungapped_len = min(len(bioutils.ungap(s1)), len(bioutils.ungap(s2)))
    return idents / float(min_ungapped_len)

Beispiel #6

Datei anzeigen

Datei: seqstat.py Projekt: aersoares81/compbio-utils

def pairwise_identity(s1, s2):
    """Return fractional pairwise identity between two aligned
    strings, which is defined here as the number of identical residues
    (case sensitive), divived by the smaller of the two ungapped
    sequences.

    Uppercase your sequence for case insensitivity. For mixed RNA/DNA
    you might want to replace T's with U's vice versa.
    
    Based on ideas from
    http://code.activestate.com/recipes/499304-hamming-distance/
    """
    
    assert len(s1) == len(s2)
    idents = sum(c1 == c2
                 for c1, c2 in izip(s1, s2) 
                 if not bioutils.isgap(c1) and not bioutils.isgap(c2))
    min_ungapped_len = min(len(bioutils.ungap(s1)), len(bioutils.ungap(s2)))
    return idents / float(min_ungapped_len)