return (end - start + 1) / b.length def overlap(db, feature): overlaps = db.overlaps(feature) if len(overlaps) >= args.min_overlap_count and \ len(overlaps) <= args.max_overlap_count: valid = [] for o in overlaps: amt = calc_overlap(o, feature) if amt >= args.min_overlap and amt <= args.max_overlap: valid.append(o.ID) if len(valid) > 0: feature.attributes['overlaps'] = ','.join(valid) if __name__ == '__main__': args = parser.parse_args() db = PositionDatabase(Feature.from_file(args.reference)) chromosomes, genes, transcripts = build_tree(Feature.from_file(args.gff)) for t in transcripts.values(): overlap(db, t) flat = flatten_tree(chromosomes) print '\n'.join([str(f) for f in flat])
ID = x.findtext('.//Iteration_query-def') query_len = x.findtext('.//Iteration_query-len') hits = x.findall('.//Hit') for hit in hits: hit_def = hit.findtext('.//Hit_def') hit_len = hit.findtext('.//Hit_len') identity = hit.findtext('.//Hsp_identity') align_len = hit.findtext('.//Hsp_align-len') if identity != align_len: midline = hit.findtext('.//Hsp_midline') gap_match = rx.search(midline) if gap_match: continue p = (float(identity) / float(hit_len)) * (float(identity) / float(query_len)) if p >= args.min_match: try: t = transcripts[ID] except KeyError: pass else: try: t.attributes['blasts_to'] = ','.join([t.attributes['blasts_to'], hit_def]) except KeyError: t.attributes['blasts_to'] = hit_def for f in flatten_tree(chromosomes): print f