def generateContactPlots(links_file, bin_length, chrom_file=wsn_chroms, dir='.', prefix='', make_binned_file=True, use_log_scale=False, cross_segment_intensities=False): plot_dir = jpk_util.getDir(dir + '/contact_plots') link_dir = jpk_util.getDir(dir + '/links') # dictionary of segments & the start coordinate of each of their buckets seg_lens = jpk_util.getChromSizes(chrom_file) bin_dict = {} for segment in seg_lens: bin_dict[segment] = getChromBins(seg_lens[segment], bin_length) # Create bin matrices link_matrix = getBinnedLinkMatrix(links_file, bin_dict, seg_lens) if make_binned_file: makeBinnedFile(link_matrix, bin_length, dir=link_dir, prefix=prefix) drawContactPlots(link_matrix, chrom_file, dir=plot_dir, prefix=prefix, use_log_scale=use_log_scale, cross_segment_intensities=cross_segment_intensities) return link_matrix
def makeInteractionFiles(link_file, dir='.', prefix='', chrom_size_file=wsn_chroms, fromJuncs=True): ''' Generates all three files used for Circos ''' dir = jpk_util.getDir(dir) try: if (fromJuncs): junc_file = makeJunctionSiteFile(link_file, dir=dir, prefix=prefix) bg = makeBedgraphFromLinks(junc_file, dir=dir, prefix=prefix, chrom_size_file=chrom_size_file) return (junc_file, bg) else: bg = makeBedgraphFromLinks(link_file, dir=dir, prefix=prefix, chrom_size_file=chrom_size_file) return (None, bg) except Exception as e: raise e exit(2)
def makeJunctionSiteFile(link_file, dir='.', prefix='', out_name='', num_nt_from_junc=10): ''' Creates a link file of positions 10 nucleotides into the read from the junction sites. So if the link file says: PA 10 50 NA 7 30 Then the junction file will say: PA 40 50 NA 7 17 ''' dir = jpk_util.getDir(dir) junc_file = dir + '/' + prefix + '_inter_junctions.links' if (out_name != ''): junc_file = dir + '/' + out_name f_out = open(junc_file, 'w+') for line in open(link_file, 'r'): fields = line.split() if len(fields) < 6: continue seg1, st1, end1, seg2, st2, end2 = fields[0:6] if seg1 != seg2: # only look at intersegmental junctions st1_upd = str(int(end1) - num_nt_from_junc) end2_upd = str(int(st2) + num_nt_from_junc) out_line = '\t'.join([seg1, st1_upd, end1, seg2, st2, end2_upd]) if len(fields) > 6: # add circos color if it's there out_line += '\t' + fields[6] print(out_line, file=f_out) f_out.close() return junc_file
def makeBedgraphFromLinks(link_file, dir='.', prefix='', out_name='', chrom_size_file=wsn_chroms): ''' Generate a bedgraph from the links. This can act as a histogram for the interactions that can be added to the border of the Circos plot ''' dir = jpk_util.getDir(dir) bedgraph = dir + '/' + prefix + '_inter.bedgraph' if (out_name != ''): bedgraph = dir + '/' + out_name # This awk cmd will split the links into two separate lines, so: # PA 10 100 NA 20 30 # Will become: # PA 10 100 # NA 20 30 awk_cmd = '\'{print $1\"\\t\"$2\"\\t\"$3\"\\n\"$4\"\\t\"$5\"\\t\"$6}\'' # To leverage genomeCoverageBed, need to use system commands pipeline = ' '.join([ 'awk', awk_cmd, link_file, '|', 'sort', '-k1,1', '-k2,2n', '|', 'genomeCoverageBed', '-bga', '-i', 'stdin', '-g', chrom_size_file, '>', bedgraph ]) os.system(pipeline) return bedgraph
def generateThresholdLinks(inter_links_file, threshold=3, window=20, dir='.', prefix='', chroms_size_file=wsn_chroms, out_file=''): seg_lens = jpk_util.getChromSizes(chroms_size_file) bin_dict = {} for segment in seg_lens: bin_dict[segment] = cp.getChromBins(seg_lens[segment], window) new_dir = jpk_util.getDir(dir) if dir == '.': new_dir = (new_dir + '/binned_' + str(window) + '_junc_sites_t' + str(threshold)) link_matrix = cp.getBinnedLinkMatrix(inter_links_file, bin_dict, seg_lens, just_inter=True, include_nojuncs=False) binned_thresh_file = (new_dir + '/' + prefix + '_binned_' + str(window) + '_ge' + str(threshold) + '_links.txt') if out_file != '': binned_thresh_file = out_file f_out = open(binned_thresh_file, 'w+') for link in link_matrix: if link_matrix[link] >= threshold: tup1, tup2 = link.split('-') seg1, bin1 = tup1.split(':') next_bin = int(bin1) + 1 end1 = seg_lens[seg1] if len(bin_dict[seg1]) > next_bin: end1 = bin_dict[seg1][next_bin] - 1 st1 = bin_dict[seg1][int(bin1)] seg2, bin2 = tup2.split(':') next_bin = int(bin2) + 1 end2 = seg_lens[seg2] if len(bin_dict[seg2]) > next_bin: end2 = bin_dict[seg2][next_bin] - 1 st2 = bin_dict[seg2][int(bin2)] out_line = '\t'.join( [seg1, str(st1), str(end1), seg2, str(st2), str(end2)]) print(out_line, file=f_out) f_out.close() return binned_thresh_file
def makeBinnedFile(link_matrix, bin_len, dir='.', prefix=''): bins_with_vals = [(x, link_matrix[x]) for x in link_matrix if link_matrix[x] > 0] dir = jpk_util.getDir(dir) binned_links_file = (dir + '/' + prefix + '_binned_' + str(bin_len) + '_links.txt') out_f = open(binned_links_file, 'w+') for bin, numLinks in bins_with_vals: print(bin + "\t" + str(numLinks), file=out_f) out_f.close()
def makeIntersegFile(link_file, dir='.', prefix='', out_name=''): ''' Creates a link file of just intersegmental ineractions ''' dir = jpk_util.getDir(dir) interseg_file = dir + '/' + prefix + '_inter.links' if (out_name != ''): interseg_file = dir + '/' + out_name awk_cmd = 'awk \'$1!=$4\' ' + link_file + ' > ' + interseg_file os.system(awk_cmd) return interseg_file
def generateSashimi(links_file, dir='.', prefix=''): ''' links_file should be the output from makeLinkFile dir is the output directory prefix is the prefix for the sashimi bed file ''' dir = jpk_util.getDir(dir) sashimi_bed = dir + '/' + prefix + '_sashimi.bed' f1 = open(links_file, 'r') f2 = open(sashimi_bed, 'w+') # This will allow IGV to interpret the file as junctions f2.write('track name=%s_junctions or graphType=junctions\n' % prefix) for line in f1: ln = line.strip().split() if (len(ln) < 6): continue seg1 = ln[0] start1 = int(ln[1]) end1 = int(ln[2]) seg2 = ln[3] start2 = int(ln[4]) end2 = int(ln[5]) intra_segmental = seg1 == seg2 more_than_3_nt = abs(start1 - start2) > 3 non_overlapping = ((start1 < start2 or start1 > end2) and (start2 < start1 or start2 > end1)) if intra_segmental and more_than_3_nt and non_overlapping: s1 = min(start1, start2) s2 = max(start1, start2) e1 = min(end1, end2) e2 = max(end1, end2) f2.write( ('{}\t{}\t{}\t.\t1\t-\t{}\t{}\t150,50' + ',50,30\t2\t{},{}\t0,{}\n').format(seg1, s1, e2, s1, e2, e1 - s1, e2 - s2, s2 - s1)) f1.close() f2.close()
def runCircosFromLinks(links_file, dir='.', prefix='', chrom_size_file=wsn_chroms, colored=False, hitsclip_bg_path=None, include_hist=True, ribbon=False, fromJuncs=True): dir = jpk_util.getDir(dir) circos_dir = dir + '/circos' conf_dir = dir + '/circos/conf' juncs, bg = makeInteractionFiles(links_file, dir=circos_dir, prefix=prefix, chrom_size_file=chrom_size_file, fromJuncs=fromJuncs) if not include_hist: bg = None conf_file = makeCircosConf(links_file, prefix=prefix, dir=conf_dir, hitsclip_bg_path=hitsclip_bg_path, hist_path=bg, ribbon=ribbon, colored=colored) pipeline = ' '.join([ 'circos', '-conf', conf_file, '-outputdir', circos_dir, '-outputfile', prefix + '_circos' ]) os.system(pipeline)
def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hCSUxclvi:o:p:s:Hg:b:') except getopt.GetoptError: displayUsage() sys.exit(2) doCircos = False doSashimi = False doContact = False junction_file = '' prefix = '' output = '.' chrom_size_file = wsn_chroms hits_clip_file = None bin_size = DEFAULT_BIN_SIZE use_log_scale = False colored = False include_hist = True cross_segment_intensities = False for opt, arg in opts: if opt == '-h': displayUsage() sys.exit() elif opt == "-i": junction_file = arg elif opt == "-o": output = arg elif opt == "-p": prefix = arg elif opt == "-v": verbose = True elif opt == "-c": colored = True elif opt == "-s": chrom_size_file = arg elif opt == '-H': include_hist = False elif opt == '-g': hits_clip_file = arg elif opt == '-b': bin_size = int(arg) elif opt == '-l': use_log_scale = True elif opt == '-x': cross_segment_intensities = True elif opt == '-C': doCircos = True elif opt == '-S': doSashimi = True elif opt == '-U': doContact = True chrom_size_file = jpk_util.getDir(chrom_size_file) if junction_file == '': displayUsage() sys.exit(2) if prefix == '': prefix = os.path.basename(junction_file) prefix = prefix[0:prefix.index('.')] links_dir = output + '/links' link_file = jpk_util.makeLinkFile(junction_file, dir=links_dir, prefix=prefix) if doCircos: inter_links = circos.makeIntersegFile(link_file, dir=output + '/circos', prefix=prefix) circos.runCircosFromLinks(inter_links, dir=output, prefix=prefix, chrom_size_file=chrom_size_file, colored=colored, hitsclip_bg_path=hits_clip_file, include_hist=include_hist) if doSashimi: sashimi.generateSashimi(link_file, dir=output + '/sashimi', prefix=prefix) if doContact: contactPlots.generateContactPlots( link_file, bin_size, chrom_file=chrom_size_file, dir=output, prefix=prefix, use_log_scale=use_log_scale, cross_segment_intensities=cross_segment_intensities)
import sys import time import re import pandas as pd import matplotlib.pyplot as plt import subprocess import math import numpy as np import scipy import jpk_util from matplotlib import figure from PIL import Image from PIL import ImageChops from subprocess import call wsn_chroms = jpk_util.getDir('../genome/WSN/WSN.chrom.sizes') # Path used for annotation of the upper triangle plots SEGMENT_AXES = ('/Users/jpk90/Desktop/scripts/python/img/segment_axes') def getChromBins(chrom_len, bin_len): if bin_len > chrom_len: return [0] num_windows = int(chrom_len / bin_len) leftover_chrom = int(chrom_len % num_windows / 2) bins = [(leftover_chrom) + bin_len * i for i in range(1, num_windows)] bins = [0] + bins return bins
def makeCircosConf(inter_links, prefix='', dir='.', colored=False, hitsclip_bg_path=None, hist_path=None, use_redux=True, out_name='', ribbon=False): ''' colored defines whether the junctions will be colored or not. Nara has mentioned that it is probably more useful if it's not colored, since hitsclip_bg_path is an optional parameter to include a bedgraph of HITS-CLIP data which will be seen around the plot ''' dir = jpk_util.getDir(dir) if not colored: need_new_link_file = False for line in open(inter_links, 'r'): if len(line.strip().split()) > 6: need_new_link_file = True break if need_new_link_file: bname = '.'.join(os.path.basename(inter_links).split('.')[0:-1]) new_link_file = inter_links.replace(bname, bname + '_nocolor') f_out = open(new_link_file, 'w+') for line in open(inter_links, 'r'): fields = line.strip().split() if len(fields) >= 6: print('\t'.join(fields[0:6]), file=f_out) inter_links = new_link_file f_out.close() template = '' out_circos_name = dir + '/' + prefix + '.conf' template = conf_template f_in = open(template, 'r') f_out = open(out_circos_name, 'w+') for line in f_in: ln = line.strip().split() if len(ln) > 0 and ln[0] == 'links_file': f_out.write('links_file = ' + inter_links + '\n') elif len(ln) > 0 and ln[0] == 'hist_file' and hist_path: f_out.write('hist_file = ' + hist_path + '\n') elif len(ln) > 0 and ln[0] == 'hits_clip_file' and hitsclip_bg_path: f_out.write('hits_clip_file = ' + hitsclip_bg_path + '\n') elif len(ln) > 0 and ln[0] == 'ribbon': if ribbon: f_out.write('ribbon = yes\n') else: f_out.write('ribbon = no\n') elif len(ln) > 0 and ln[0] == 'karyotype': f_out.write('karyotype = ' + influenza_karyotype + '\n') else: f_out.write(line) f_in.close() f_out.close() return out_circos_name
import os import sys import subprocess import jpk_util import contactPlots as cp from subprocess import call # Circos template configuration path conf_template = jpk_util.getDir('../plots_lib/circos/template.conf') influenza_karyotype = jpk_util.getDir( '../plots_lib/circos/influenza_karyotype.txt') wsn_chroms = jpk_util.getDir('../genome/WSN/WSN.chrom.sizes') def makeIntersegFile(link_file, dir='.', prefix='', out_name=''): ''' Creates a link file of just intersegmental ineractions ''' dir = jpk_util.getDir(dir) interseg_file = dir + '/' + prefix + '_inter.links' if (out_name != ''): interseg_file = dir + '/' + out_name awk_cmd = 'awk \'$1!=$4\' ' + link_file + ' > ' + interseg_file os.system(awk_cmd) return interseg_file def makeJunctionSiteFile(link_file, dir='.', prefix='',