def profile(args): interval = args.interval datafiles = [x.strip() for x in args.datafiles] annotation = args.annotation outfile = args.outfile colors = parse_colors(args.colors) for x in args.datafiles: if '.bam' in x and not os.path.isfile("{0}.bai".format(x)): print( "Data file '{0}' does not have an index file. Creating an index file for {0}." .format(x)) pysam.index(x) trackgroups = process_groups(args.trackgroups) if not trackgroups: trackgroups = [[x] for x in range(1, len(datafiles) + 1)] scalegroups = process_groups(args.scalegroups) scale = args.scale if scale: try: scale = [float(x) for x in scale.split(",")] except Exception: print("Error in scale argument") sys.exit(1) if trackgroups and scalegroups: if len(trackgroups) != sum([len(x) for x in scalegroups]): sys.stderr.write("Track groups and scales do not match!\n") sys.exit() # Group the tracks according to track_groups tracks = [] for group in trackgroups: tracks.append([datafiles[i - 1] for i in group]) # Create the image profile_screenshot(outfile, interval, tracks, annotation=annotation, scalegroups=scalegroups, fontsize=args.textfontsize, colors=colors, bgmode=args.background, fragmentsize=args.fragmentsize, scale=scale, show_scale=args.show_scale, rmdup=args.rmdup, rmrepeats=args.rmrepeats, reverse=args.reverse, adjscale=args.adjscale)
def profile(args): interval = args.interval datafiles = [x.strip() for x in args.datafiles] annotation = args.annotation outfile = args.outfile colors = parse_colors(args.colors) for x in args.datafiles: if '.bam' in x and not os.path.isfile("{0}.bai".format(x)): print("Data file '{0}' does not have an index file. Creating an index file for {0}.".format(x)) pysam.index(x) trackgroups = process_groups(args.trackgroups) if not trackgroups: trackgroups = [[x] for x in range(1, len(datafiles) + 1)] scalegroups = process_groups(args.scalegroups) scale = args.scale if scale: try: scale = [int(x) for x in scale.split(",")] except Exception: print("Error in scale argument") sys.exit(1) if trackgroups and scalegroups: if len(trackgroups) != sum([len(x) for x in scalegroups]): sys.stderr.write("Track groups and scales do not match!\n") sys.exit() # Group the tracks according to track_groups tracks = [] for group in trackgroups: tracks.append([datafiles[i - 1] for i in group]) # Create the image profile_screenshot(outfile, interval, tracks, annotation=annotation, scalegroups=scalegroups, fontsize=args.textfontsize, colors=colors, bgmode=args.background, fragmentsize=args.fragmentsize, scale=scale, show_scale=args.show_scale, rmdup=args.rmdup, rmrepeats=args.rmrepeats, reverse=args.reverse, adjscale=args.adjscale )
def bandplot(args): if (0 > args.scalar) or (args.scalar > 100): print "ERROR: -P value has to be between 0 and 100" sys.exit(1) else: scalar = args.scalar if not args.datafiles and not args.readCount: print 'You should provide data file(s) or the read counts file.' sys.exit() if args.datafiles and args.readCount: print 'You should choose only ONE option. Either data file(s) or the read counts file.' sys.exit() clust_file = args.clust_file if args.datafiles: for x in args.datafiles: if '.bam' in x and not os.path.isfile("{0}.bai".format(x)): print "Data file '{0}' does not have an index file. Creating an index file for {0}.".format(x) pysam.index(x) datafiles = [x.strip() for x in args.datafiles] fragmentsize = args.fragmentsize colors = parse_colors(args.colors) scalegroups = process_groups(args.scalegroups) percs = [int(x) for x in args.percs.split(",")] rmdup = args.rmdup rpkm = args.rpkm rmrepeats = args.rmrepeats bins = args.bins summary = args.summary fontsize = args.textfontsize font = FontProperties(size=fontsize / 1.25, family=["Nimbus Sans L", "Helvetica", "sans-serif"]) # Calculate the profile data if args.datafiles: data = load_cluster_data(clust_file, datafiles, bins, rpkm, rmdup, rmrepeats, fragmentsize=fragmentsize) tracks = [os.path.basename(x) for x in datafiles] titles = [os.path.splitext(x)[0] for x in tracks] else: titles, data = load_read_counts(args.readCount) tracks = titles for x in data: for i in data[x]: bins = len(data[x][i]) break break # Get cluster information cluster_data = load_bed_clusters(clust_file) clusters = cluster_data.keys() #Init x-axis t = np.arange(bins) rows = len(tracks) cols = len(clusters) if summary: rows += 1 cols += 1 # Get a figure with a lot of subplots fig, axes = create_grid_figure(rows, cols, plotwidth=cfg.PLOTWIDTH, plotheight=cfg.PLOTHEIGHT, padleft=cfg.PADLEFT, padtop=cfg.PADTOP, pad=cfg.PAD, padright=cfg.PADRIGHT, padbottom=cfg.PADBOTTOM) track_max = [] for track_num, track in enumerate(tracks): percentiles = [scoreatpercentile([data[track][x] for x in cluster_data[cluster]], scalar) for cluster in clusters] track_max.append(max(percentiles)) for track_num, track in enumerate(tracks): for i,cluster in enumerate(clusters): # Retrieve axes ax = axes[track_num][i] # Get the data vals = np.array([data[track][x] for x in cluster_data[cluster]]) # Make the plot coverage_plot(ax, t, vals, colors[track_num % len(colors)], percs) # Get scale max maxscale = track_max[track_num] if scalegroups and len(scalegroups) > 0: for group in scalegroups: if (track_num + 1) in group: maxscale = max([track_max[j - 1] for j in group]) break # Set scale ax.set_ylim(0, maxscale) ax.set_xlim(0, bins - 1) # Cluster titles if track_num == 0: ax.set_title("%s\nn=%s" % (cluster, len(cluster_data[cluster])), font_properties=font) # Track title and scale if i == 0: pos = axes[track_num][0].get_position().get_points() text_y = (pos[1][1] + pos[0][1]) / 2 text_x = pos[0][0] - (cfg.PAD / fig.get_figwidth()) plt.figtext(text_x, text_y, titles[track_num], clip_on=False, horizontalalignment="right", verticalalignment="center", font_properties=font) plt.figtext(text_x, pos[1][1], "%.4g" % maxscale, clip_on=False, horizontalalignment="right", verticalalignment="top", font_properties=font) plt.figtext(text_x, pos[0][1], 0, clip_on=False, horizontalalignment="right", verticalalignment="bottom", font_properties=font) if summary: for i,track in enumerate(tracks): ax = axes[i][cols - 1] l = len(clusters) min_alpha = 0.3 max_alpha = 0.9 if l > 1: step = (max_alpha - min_alpha) / (l - 1) alphas = np.arange(min_alpha, max_alpha + step, step) else: alphas = [max_alpha] for j,cluster in enumerate(clusters): vals = np.array([data[track][x] for x in cluster_data[cluster]]) m = np.median(vals, axis=0) ax.plot(np.arange(len(m)), m, color=colors[i % len(colors)], alpha=alphas[j]) ax.set_ylim(0, track_max[i]) for i,cluster in enumerate(clusters): ax = axes[rows - 1][i] max_max = 0 for j,track in enumerate(tracks): vals = np.array([data[track][x] for x in cluster_data[cluster]]) m = np.median(vals, axis=0) ax.plot(np.arange(len(m)), m, color=colors[j % len(colors)], alpha=0.8) if track_max[j] > max_max: max_max = track_max[j] ax.set_ylim(0, max_max) ax = axes[rows - 1][cols - 1] ax.set_frame_on(False) ax.axes.get_yaxis().set_visible(False) ax.axes.get_xaxis().set_visible(False) print "Saving figure" plt.savefig(args.outfile, dpi=600)