def CF_plot(args): try: import locale import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import pylab as P from matplotlib.lines import Line2D from matplotlib.patches import Rectangle _ = locale.setlocale(locale.LC_ALL, '') except: print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?" sys.exit(0) chr, start, stop = cf.parseLocString(args.region) r = cf.rpkm_reader(args.input) data = r.getExonValuesByRegion(chr,start,stop) _ = data.smooth() plt.gcf().clear() fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax.plot(data.rpkm, linewidth = 0.3, c='k') if args.sample != 'none': cnt = 1 coloriter = iter(['r','b','g','y']) for sample in args.sample: try: color, sampleID = sample.split(":") except: color =coloriter.next() sampleID = sample ax.plot(data.getSample([sampleID]), linewidth = 1, c=color, label = sampleID) if cnt == 1: cf.plotRawData(ax, r.getExonValuesByRegion(chr,start,stop,sampleList=[sampleID]).getSample([sampleID]),color=color) cnt +=1 plt.legend(prop={'size':10},frameon=False) cf.plotGenes(ax, data) cf.plotGenomicCoords(plt,data) plt.xlim(0,data.shape[1]) plt.ylim(-3,3) plt.title("%s: %s - %s" % (cf.chrInt2Str(chr),locale.format("%d",start, grouping=True),locale.format("%d",stop, grouping=True))) plt.xlabel("Position") plt.ylabel("SVD-ZRPKM Values") plt.savefig(args.output) sys.exit(0)
def CF_plotcalls(args): try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import pylab as P from matplotlib.lines import Line2D from matplotlib.patches import Rectangle except: print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?" sys.exit(0) import locale try: _ = locale.setlocale(locale.LC_ALL, 'en_US') except: _ = locale.setlocale(locale.LC_ALL, '') try: callfile_fn = str(args.calls) callfile_f = open(callfile_fn, mode='r') except IOError as e: print '[ERROR] Cannot open call file for reading: ', callfile_fn sys.exit(0) all_calls = [] header = callfile_f.readline() for line in callfile_f: sampleID, chr, start, stop, state = line.strip().split() chr = cf.chrStr2Int(chr) all_calls.append({ "chromosome": int(chr), "start": int(start), "stop": int(stop), "sampleID": sampleID }) r = cf.rpkm_reader(args.input) for call in all_calls: chr = call["chromosome"] start = call["start"] stop = call["stop"] sampleID = call["sampleID"] exons = r.getExonIDs(chr, int(start), int(stop)) window_start = max(exons[0] - args.window, 0) window_stop = exons[-1] + args.window data = r.getExonValuesByExons(chr, window_start, window_stop) _ = data.smooth() plt.gcf().clear() fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax.plot(data.rpkm, linewidth=0.3, c='k') ax.plot(data.getSample([sampleID]), linewidth=1, c='r', label=sampleID) cf.plotRawData(ax, r.getExonValuesByExons(chr, window_start, window_stop, sampleList=[sampleID]).getSample( [sampleID]), color='r') plt.legend(prop={'size': 10}, frameon=False) cf.plotGenes(ax, data) cf.plotGenomicCoords(plt, data) exon_start = np.where(data.exons["start"] == start)[0][0] exon_stop = np.where(data.exons["stop"] == stop)[0][0] _ = ax.add_line( matplotlib.lines.Line2D([exon_start, exon_stop], [2, 2], color='k', lw=6, linestyle='-', alpha=1, solid_capstyle='butt')) _ = plt.xlim(0, data.shape[1]) _ = plt.ylim(-3, 3) plt.title( "%s: %s - %s" % (cf.chrInt2Str(chr), locale.format("%d", start, grouping=True), locale.format("%d", stop, grouping=True))) plt.xlabel("Position") plt.ylabel("SVD-ZRPKM Values") outfile = "%s_%d_%d_%s.png" % (cf.chrInt2Str(chr), start, stop, sampleID) plt.savefig(args.outputdir + "/" + outfile)
def CF_call(args): try: h5file_in_fn = str(args.input) h5file_in = openFile(h5file_in_fn, mode='r') except IOError as e: print '[ERROR] Cannot open CoNIFER input file for reading: ', h5file_in_fn sys.exit(0) try: callfile_fn = str(args.output) callfile_f = open(callfile_fn, mode='w') except IOError as e: print '[ERROR] Cannot open output file for writing: ', callfile_fn sys.exit(0) chrs_to_process = [] for chr in h5file_in.root: if chr._v_title not in ('probes', 'samples'): chrs_to_process.append(chr._v_title.replace("chr", "")) h5file_in.close() print '[INIT] Initializing caller at threshold = %f' % (args.threshold) r = cf.rpkm_reader(h5file_in_fn) all_calls = [] for chr in chrs_to_process: print '[RUNNING] Now processing chr%s' % chr data = r.getExonValuesByRegion(chr) #raw_data = copy.copy(data) _ = data.smooth() mean = np.mean(data.rpkm, axis=1) sd = np.std(data.rpkm, axis=1) for sample in r.getSampleList(): sample_data = data.getSample([sample]).flatten() #sample_raw_data = raw_data.getSample([sample]).flatten() dup_mask = sample_data >= args.threshold del_mask = sample_data <= -1 * args.threshold dup_bkpoints = cf.getbkpoints( dup_mask ) #returns exon coordinates for this chromosome (numpy array coords) del_bkpoints = cf.getbkpoints(del_mask) dups = [] for start, stop in dup_bkpoints: try: new_start = np.max( np.where(sample_data[:start] < (mean[:start] + 3 * sd[:start]))) except ValueError: new_start = 0 try: new_stop = stop + np.min( np.where(sample_data[stop:] < (mean[stop:] + 3 * sd[stop:]))) except ValueError: new_stop = data.shape[1] - 1 dups.append({ "sampleID": sample, "chromosome": cf.chrInt2Str(chr), "start": data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "dup" }) dels = [] for start, stop in del_bkpoints: try: new_start = np.max( np.where(sample_data[:start] > (-1 * mean[:start] - 3 * sd[:start]))) except ValueError: new_start = 0 try: new_stop = stop + np.min( np.where(sample_data[stop:] > (-1 * mean[stop:] - 3 * sd[stop:]))) except ValueError: new_stop = data.shape[1] - 1 dels.append({ "sampleID": sample, "chromosome": cf.chrInt2Str(chr), "start": data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "del" }) dels = cf.mergeCalls(dels) #merges overlapping calls dups = cf.mergeCalls(dups) #print sampleID, len(dels), len(dups) all_calls.extend(list(dels)) all_calls.extend(list(dups)) # print calls to file header = ['sampleID', 'chromosome', 'start', 'stop', 'state'] callfile_f.write('\t'.join(header) + "\n") for call in all_calls: print "%s\t%s\t%d\t%d\t%s" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"]) callfile_f.write("%s\t%s\t%d\t%d\t%s\n" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"])) sys.exit(0)
def CF_plotcalls(args): try: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import pylab as P from matplotlib.lines import Line2D from matplotlib.patches import Rectangle except: print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?" sys.exit(0) import locale try: _ = locale.setlocale(locale.LC_ALL, 'en_US') except: _ = locale.setlocale(locale.LC_ALL, '') try: callfile_fn = str(args.calls) callfile_f = open(callfile_fn, mode='r') except IOError as e: print '[ERROR] Cannot open call file for reading: ', callfile_fn sys.exit(0) all_calls = [] header = callfile_f.readline() for line in callfile_f: sampleID, chr, start, stop, state = line.strip().split() chr = cf.chrStr2Int(chr) all_calls.append({"chromosome":int(chr), "start":int(start), "stop":int(stop), "sampleID":sampleID}) r = cf.rpkm_reader(args.input) for call in all_calls: chr = call["chromosome"] start = call["start"] stop = call["stop"] sampleID = call["sampleID"] exons = r.getExonIDs(chr,int(start),int(stop)) window_start = max(exons[0]-args.window,0) window_stop = exons[-1]+args.window data = r.getExonValuesByExons(chr,window_start, window_stop) _ = data.smooth() plt.gcf().clear() fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax.plot(data.rpkm, linewidth = 0.3, c='k') ax.plot(data.getSample([sampleID]), linewidth = 1, c='r', label = sampleID) cf.plotRawData(ax, r.getExonValuesByExons(chr,window_start, window_stop,sampleList=[sampleID]).getSample([sampleID]),color='r') plt.legend(prop={'size':10},frameon=False) cf.plotGenes(ax, data) cf.plotGenomicCoords(plt,data) exon_start = np.where(data.exons["start"] == start)[0][0] exon_stop = np.where(data.exons["stop"] == stop)[0][0] _ = ax.add_line(matplotlib.lines.Line2D([exon_start,exon_stop],[2,2],color='k',lw=6,linestyle='-',alpha=1,solid_capstyle='butt')) _ = plt.xlim(0,data.shape[1]) _ = plt.ylim(-3,3) plt.title("%s: %s - %s" % (cf.chrInt2Str(chr),locale.format("%d",start, grouping=True),locale.format("%d",stop, grouping=True))) plt.xlabel("Position") plt.ylabel("SVD-ZRPKM Values") outfile = "%s_%d_%d_%s.png" % (cf.chrInt2Str(chr), start, stop, sampleID) plt.savefig(args.outputdir + "/" + outfile)
def CF_call(args): try: h5file_in_fn = str(args.input) h5file_in = openFile(h5file_in_fn, mode='r') except IOError as e: print '[ERROR] Cannot open CoNIFER input file for reading: ', h5file_in_fn sys.exit(0) try: callfile_fn = str(args.output) callfile_f = open(callfile_fn, mode='w') except IOError as e: print '[ERROR] Cannot open output file for writing: ', callfile_fn sys.exit(0) chrs_to_process = [] for chr in h5file_in.root: if chr._v_title not in ('probes','samples'): chrs_to_process.append(chr._v_title.replace("chr","")) h5file_in.close() print '[INIT] Initializing caller at threshold = %f' % (args.threshold) r = cf.rpkm_reader(h5file_in_fn) all_calls = [] for chr in chrs_to_process: print '[RUNNING] Now processing chr%s' % chr data = r.getExonValuesByRegion(chr) #raw_data = copy.copy(data) _ = data.smooth() mean= np.mean(data.rpkm,axis=1) sd = np.std(data.rpkm,axis=1) for sample in r.getSampleList(): sample_data = data.getSample([sample]).flatten() #sample_raw_data = raw_data.getSample([sample]).flatten() dup_mask = sample_data >= args.threshold del_mask = sample_data <= -1*args.threshold dup_bkpoints = cf.getbkpoints(dup_mask) #returns exon coordinates for this chromosome (numpy array coords) del_bkpoints = cf.getbkpoints(del_mask) dups = [] for start,stop in dup_bkpoints: try: new_start = np.max(np.where(sample_data[:start] < (mean[:start] + 3*sd[:start]))) except ValueError: new_start = 0 try: new_stop = stop + np.min(np.where(sample_data[stop:] < (mean[stop:] + 3*sd[stop:]))) except ValueError: new_stop = data.shape[1]-1 dups.append({"sampleID":sample,"chromosome": cf.chrInt2Str(chr), "start":data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "dup"}) dels = [] for start,stop in del_bkpoints: try: new_start = np.max(np.where(sample_data[:start] > (-1*mean[:start] - 3*sd[:start]))) except ValueError: new_start = 0 try: new_stop = stop + np.min(np.where(sample_data[stop:] > (-1*mean[stop:] - 3*sd[stop:]))) except ValueError: new_stop = data.shape[1]-1 dels.append({"sampleID":sample,"chromosome": cf.chrInt2Str(chr), "start":data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "del"}) dels = cf.mergeCalls(dels) #merges overlapping calls dups = cf.mergeCalls(dups) #print sampleID, len(dels), len(dups) all_calls.extend(list(dels)) all_calls.extend(list(dups)) # print calls to file header = ['sampleID','chromosome','start','stop','state'] callfile_f.write('\t'.join(header) + "\n") for call in all_calls: print "%s\t%s\t%d\t%d\t%s" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"]) callfile_f.write("%s\t%s\t%d\t%d\t%s\n" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"])) sys.exit(0)