コード例 #1
0
ファイル: classify.py プロジェクト: jpassaro/seq-align-prep
def make_entry(hit,padded=True):
    '''Turns a record into a fasta entry. The 'SEQ' field is set to a default,
     if not already present: 'NAME' reflects subject source
    information about the hit as well as the specified group, while 'SEQ' is
    set to 'SSEQ', padded on the left with *fillchar* (default '-') to fit the
    query location unless *padded* is set to False.
    '''
    hit = hit.copy() ; hit.open()
    hit.setdefault('SEQ',_fillchar*(hit.QSTART-1)*padded + hit.pop('SSEQ'))
    return fasta.seq_entry(hit)
コード例 #2
0
ファイル: classify.py プロジェクト: jpassaro/seq-align-prep
def resolve_query_overlap(standalones,nests,overlap):
    '''Expects a list of standalone fragments and a list of nests. Nests are
    expected to have undergone the subject overlap truncation scheme (see
    *stratify*). The return value is a list of fasta entries. If the function
    detects no query overlap between any pair of fragments -- including those
    in nests -- the fragments are "assembled" (non-technical term) in order
    of query-ordinates into a single fasta entry, which is the only element of
    the returned list.
    '''
    standalones = list(standalones)
    nests = map(list,nests)
    if not (standalones or any(nests)):
      raise Error('Tried to resolve query overlap on an empty set of records!')
    
    # assign names before reordering
    for j,hit in enumerate(standalones): setname(hit,'standalone[{}]'.format(j))
    for i,nest in enumerate(nests,1):
        for j,hit in enumerate(nest): setname(hit,'nest{}[{}]'.format(i,j))
    
    # and then reorder by query ordinate
    recs = sorted(_it.chain(standalones,*nests),key=_attrget('QSTART'))
    
    if any(q_overlap(x,y)>=overlap for x,y in _it.izip(recs,recs[1:]))\
        or len(recs)==1: return _it.imap(make_entry,recs)
    prev = None
    with _cont.closing(_sIO()) as seq:
        for hit in recs:
            seq.write('-'*(hit.QSTART-1-( prev and prev.QEND or 0 )))
            if prev is None: st,end = hit._SSTART,hit._SEND
            else: st,end = min(st,hit._SSTART),max(end,hit._SEND);
            seq.write(hit.SSEQ)
            prev = hit
        result = fasta.seq_entry({'SEQ': seq.getvalue(),
         'NAME': _name_fmt.format(_GRP='all',SSEQID=recs[0].SSEQID,
         SSTART=min(h._SSTART for h in recs),SEND=max(h._SEND for h in recs))})
    return [result]