def plot_data(x, y, tour, M): from jcvi.graphics.base import plt, savefig plt.plot(x, y, "ro") for ia, ib in pairwise(tour): plt.plot((x[ia], x[ib]), (y[ia], y[ib]), "r-") score = evaluate(tour, M) plt.title("Score={0:.2f}".format(score)) savefig("demo.pdf")
def variation(args): """ %prog variation P1.bed P2.bed F1.bed Associate IES in parents and progeny. """ p = OptionParser(variation.__doc__) p.add_option("--diversity", choices=("breakpoint", "variant"), default="variant", help="Plot diversity") opts, args, iopts = p.set_image_options(args, figsize="6x6") if len(args) != 3: sys.exit(not p.print_help()) pfs = [op.basename(x).split('-')[0] for x in args] P1, P2, F1 = pfs newbedfile = "-".join(pfs) + ".bed" if need_update(args, newbedfile): newbed = Bed() for pf, filename in zip(pfs, args): bed = Bed(filename) for b in bed: b.accn = "-".join((pf, b.accn)) b.score = None newbed.append(b) newbed.print_to_file(newbedfile, sorted=True) neworder = Bed(newbedfile).order mergedbedfile = mergeBed(newbedfile, nms=True) bed = Bed(mergedbedfile) valid = 0 total_counts = Counter() F1_counts = [] bp_diff = [] novelbedfile = "novel.bed" fw = open(novelbedfile, "w") for b in bed: accns = b.accn.split(',') pfs_accns = [x.split("-")[0] for x in accns] pfs_counts = Counter(pfs_accns) if len(pfs_counts) != 3: print(b, file=fw) continue valid += 1 total_counts += pfs_counts F1_counts.append(pfs_counts[F1]) # Collect breakpoint positions between P1 and F1 P1_accns = [x for x in accns if x.split("-")[0] == P1] F1_accns = [x for x in accns if x.split("-")[0] == F1] if len(P1_accns) != 1: continue ri, ref = neworder[P1_accns[0]] P1_accns = [neworder[x][-1] for x in F1_accns] bp_diff.extend(x.start - ref.start for x in P1_accns) bp_diff.extend(x.end - ref.end for x in P1_accns) print("A total of {0} sites show consistent deletions across samples.".\ format(percentage(valid, len(bed))), file=sys.stderr) for pf, count in total_counts.items(): print("{0:>9}: {1:.2f} deletions/site".\ format(pf, count * 1. / valid), file=sys.stderr) F1_counts = Counter(F1_counts) # Plot the IES variant number diversity from jcvi.graphics.base import plt, savefig, set_ticklabels_helvetica fig = plt.figure(1, (iopts.w, iopts.h)) if opts.diversity == "variant": left, height = zip(*sorted(F1_counts.items())) for l, h in zip(left, height): print("{0:>9} variants: {1}".format(l, h), file=sys.stderr) plt.text(l, h + 5, str(h), color="darkslategray", size=8, ha="center", va="bottom", rotation=90) plt.bar(left, height, align="center") plt.xlabel("Identified number of IES per site") plt.ylabel("Counts") plt.title("IES variation in progeny pool") ax = plt.gca() set_ticklabels_helvetica(ax) savefig(F1 + ".counts.pdf") # Plot the IES breakpoint position diversity else: bp_diff = Counter(bp_diff) bp_diff_abs = Counter() for k, v in bp_diff.items(): bp_diff_abs[abs(k)] += v plt.figure(1, (iopts.w, iopts.h)) left, height = zip(*sorted(bp_diff_abs.items())) for l, h in zip(left, height)[:21]: plt.text(l, h + 50, str(h), color="darkslategray", size=8, ha="center", va="bottom", rotation=90) plt.bar(left, height, align="center") plt.xlabel("Progeny breakpoint relative to SB210") plt.ylabel("Counts") plt.xlim(-.5, 20.5) ax = plt.gca() set_ticklabels_helvetica(ax) savefig(F1 + ".breaks.pdf") # Serialize the data to a file fw = open("Breakpoint-offset-histogram.csv", "w") for k, v in sorted(bp_diff.items()): print("{0},{1}".format(k, v), file=fw) fw.close() total = sum(height) zeros = bp_diff[0] within_20 = sum([v for i, v in bp_diff.items() if -20 <= i <= 20]) print("No deviation: {0}".format(percentage(zeros, total)), file=sys.stderr) print(" Within 20bp: {0}".format(percentage(within_20, total)), file=sys.stderr)
def variation(args): """ %prog variation P1.bed P2.bed F1.bed Associate IES in parents and progeny. """ p = OptionParser(variation.__doc__) p.add_option("--diversity", choices=("breakpoint", "variant"), default="variant", help="Plot diversity") opts, args, iopts = p.set_image_options(args, figsize="6x6") if len(args) != 3: sys.exit(not p.print_help()) pfs = [op.basename(x).split('-')[0] for x in args] P1, P2, F1 = pfs newbedfile = "-".join(pfs) + ".bed" if need_update(args, newbedfile): newbed = Bed() for pf, filename in zip(pfs, args): bed = Bed(filename) for b in bed: b.accn = "-".join((pf, b.accn)) b.score = None newbed.append(b) newbed.print_to_file(newbedfile, sorted=True) neworder = Bed(newbedfile).order mergedbedfile = mergeBed(newbedfile, nms=True) bed = Bed(mergedbedfile) valid = 0 total_counts = Counter() F1_counts = [] bp_diff = [] novelbedfile = "novel.bed" fw = open(novelbedfile, "w") for b in bed: accns = b.accn.split(',') pfs_accns = [x.split("-")[0] for x in accns] pfs_counts = Counter(pfs_accns) if len(pfs_counts) != 3: print >> fw, b continue valid += 1 total_counts += pfs_counts F1_counts.append(pfs_counts[F1]) # Collect breakpoint positions between P1 and F1 P1_accns = [x for x in accns if x.split("-")[0] == P1] F1_accns = [x for x in accns if x.split("-")[0] == F1] if len(P1_accns) != 1: continue ri, ref = neworder[P1_accns[0]] P1_accns = [neworder[x][-1] for x in F1_accns] bp_diff.extend(x.start - ref.start for x in P1_accns) bp_diff.extend(x.end - ref.end for x in P1_accns) print >> sys.stderr, \ "A total of {0} sites show consistent deletions across samples.".\ format(percentage(valid, len(bed))) for pf, count in total_counts.items(): print >> sys.stderr, "{0:>9}: {1:.2f} deletions/site".\ format(pf, count * 1. / valid) F1_counts = Counter(F1_counts) # Plot the IES variant number diversity from jcvi.graphics.base import plt, savefig, set_ticklabels_helvetica fig = plt.figure(1, (iopts.w, iopts.h)) if opts.diversity == "variant": left, height = zip(*sorted(F1_counts.items())) for l, h in zip(left, height): print >> sys.stderr, "{0:>9} variants: {1}".format(l, h) plt.text(l, h + 5, str(h), color="darkslategray", size=8, ha="center", va="bottom", rotation=90) plt.bar(left, height, align="center") plt.xlabel("Identified number of IES per site") plt.ylabel("Counts") plt.title("IES variation in progeny pool") ax = plt.gca() set_ticklabels_helvetica(ax) savefig(F1 + ".counts.pdf") # Plot the IES breakpoint position diversity else: bp_diff = Counter(bp_diff) bp_diff_abs = Counter() for k, v in bp_diff.items(): bp_diff_abs[abs(k)] += v plt.figure(1, (iopts.w, iopts.h)) left, height = zip(*sorted(bp_diff_abs.items())) for l, h in zip(left, height)[:21]: plt.text(l, h + 50, str(h), color="darkslategray", size=8, ha="center", va="bottom", rotation=90) plt.bar(left, height, align="center") plt.xlabel("Progeny breakpoint relative to SB210") plt.ylabel("Counts") plt.xlim(-.5, 20.5) ax = plt.gca() set_ticklabels_helvetica(ax) savefig(F1 + ".breaks.pdf") # Serialize the data to a file fw = open("Breakpoint-offset-histogram.csv", "w") for k, v in sorted(bp_diff.items()): print >> fw, "{0},{1}".format(k, v) fw.close() total = sum(height) zeros = bp_diff[0] within_20 = sum([v for i, v in bp_diff.items() if -20 <= i <= 20]) print >> sys.stderr, "No deviation: {0}".format(percentage(zeros, total)) print >> sys.stderr, " Within 20bp: {0}".format(percentage(within_20, total))