def main(input, truth_set, aaf_cutoff, chunk_size): iterator = JimFile(input) iterable = windower(iterator, chunker(chunk_size)) upton(iterable, truth_set, aaf_cutoff)
""" group by transcript and split at gaps > 50 bases""" return inext.transcript != grp[0].transcript \ or inext.start - grp[-1].end > 50 def smallchunk(grp, inext): return len(grp) > 50 or inext.transcript != grp[0].transcript \ or inext.start - grp[-1].end > 30 def rescale(vals): minv, maxv = min(vals), max(vals) return [float(v - minv) / ((maxv - minv) or 1) for v in vals] # allow a gap of at most 50 bases. from collections import defaultdict saved = defaultdict(list) for chunk in windower(iterator, smallchunk): frv = FRV(chunk) iafi = IAFI(chunk) values = dnds_ratio(chunk) saved["chrom"].append(chunk[0].chrom) saved["start"].append(chunk[0].start) saved["end"].append(chunk[-1].end) saved["frv"].append(frv) saved["iafi"].append(iafi) saved["dnds"].append(values["dnds"]) saved["dn"].append(values["dn"]) saved["ds"].append(values["ds"]) saved["na"].append(values["na"]) saved["trans"].append(chunk[0].transcript)