def make_entry(hit,padded=True): '''Turns a record into a fasta entry. The 'SEQ' field is set to a default, if not already present: 'NAME' reflects subject source information about the hit as well as the specified group, while 'SEQ' is set to 'SSEQ', padded on the left with *fillchar* (default '-') to fit the query location unless *padded* is set to False. ''' hit = hit.copy() ; hit.open() hit.setdefault('SEQ',_fillchar*(hit.QSTART-1)*padded + hit.pop('SSEQ')) return fasta.seq_entry(hit)
def resolve_query_overlap(standalones,nests,overlap): '''Expects a list of standalone fragments and a list of nests. Nests are expected to have undergone the subject overlap truncation scheme (see *stratify*). The return value is a list of fasta entries. If the function detects no query overlap between any pair of fragments -- including those in nests -- the fragments are "assembled" (non-technical term) in order of query-ordinates into a single fasta entry, which is the only element of the returned list. ''' standalones = list(standalones) nests = map(list,nests) if not (standalones or any(nests)): raise Error('Tried to resolve query overlap on an empty set of records!') # assign names before reordering for j,hit in enumerate(standalones): setname(hit,'standalone[{}]'.format(j)) for i,nest in enumerate(nests,1): for j,hit in enumerate(nest): setname(hit,'nest{}[{}]'.format(i,j)) # and then reorder by query ordinate recs = sorted(_it.chain(standalones,*nests),key=_attrget('QSTART')) if any(q_overlap(x,y)>=overlap for x,y in _it.izip(recs,recs[1:]))\ or len(recs)==1: return _it.imap(make_entry,recs) prev = None with _cont.closing(_sIO()) as seq: for hit in recs: seq.write('-'*(hit.QSTART-1-( prev and prev.QEND or 0 ))) if prev is None: st,end = hit._SSTART,hit._SEND else: st,end = min(st,hit._SSTART),max(end,hit._SEND); seq.write(hit.SSEQ) prev = hit result = fasta.seq_entry({'SEQ': seq.getvalue(), 'NAME': _name_fmt.format(_GRP='all',SSEQID=recs[0].SSEQID, SSTART=min(h._SSTART for h in recs),SEND=max(h._SEND for h in recs))}) return [result]