Ejemplo n.º 1
0
def generateContactPlots(links_file,
                         bin_length,
                         chrom_file=wsn_chroms,
                         dir='.',
                         prefix='',
                         make_binned_file=True,
                         use_log_scale=False,
                         cross_segment_intensities=False):

    plot_dir = jpk_util.getDir(dir + '/contact_plots')
    link_dir = jpk_util.getDir(dir + '/links')

    # dictionary of segments & the start coordinate of each of their buckets
    seg_lens = jpk_util.getChromSizes(chrom_file)
    bin_dict = {}
    for segment in seg_lens:
        bin_dict[segment] = getChromBins(seg_lens[segment], bin_length)

    # Create bin matrices
    link_matrix = getBinnedLinkMatrix(links_file, bin_dict, seg_lens)

    if make_binned_file:
        makeBinnedFile(link_matrix, bin_length, dir=link_dir, prefix=prefix)

    drawContactPlots(link_matrix,
                     chrom_file,
                     dir=plot_dir,
                     prefix=prefix,
                     use_log_scale=use_log_scale,
                     cross_segment_intensities=cross_segment_intensities)

    return link_matrix
Ejemplo n.º 2
0
def makeInteractionFiles(link_file,
                         dir='.',
                         prefix='',
                         chrom_size_file=wsn_chroms,
                         fromJuncs=True):
    '''
	Generates all three files used for Circos
	'''
    dir = jpk_util.getDir(dir)
    try:
        if (fromJuncs):
            junc_file = makeJunctionSiteFile(link_file, dir=dir, prefix=prefix)
            bg = makeBedgraphFromLinks(junc_file,
                                       dir=dir,
                                       prefix=prefix,
                                       chrom_size_file=chrom_size_file)
            return (junc_file, bg)
        else:
            bg = makeBedgraphFromLinks(link_file,
                                       dir=dir,
                                       prefix=prefix,
                                       chrom_size_file=chrom_size_file)
            return (None, bg)
    except Exception as e:
        raise e
        exit(2)
Ejemplo n.º 3
0
def makeJunctionSiteFile(link_file,
                         dir='.',
                         prefix='',
                         out_name='',
                         num_nt_from_junc=10):
    '''
	Creates a link file of positions 10 nucleotides into the read from
	the junction sites. So if the link file says:
		PA 	10 	50 	NA 	7 	30
	Then the junction file will say:
		PA 	40 	50 	NA 	7 	17
	'''
    dir = jpk_util.getDir(dir)
    junc_file = dir + '/' + prefix + '_inter_junctions.links'
    if (out_name != ''):
        junc_file = dir + '/' + out_name

    f_out = open(junc_file, 'w+')
    for line in open(link_file, 'r'):

        fields = line.split()
        if len(fields) < 6:
            continue
        seg1, st1, end1, seg2, st2, end2 = fields[0:6]

        if seg1 != seg2:  # only look at intersegmental junctions
            st1_upd = str(int(end1) - num_nt_from_junc)
            end2_upd = str(int(st2) + num_nt_from_junc)
            out_line = '\t'.join([seg1, st1_upd, end1, seg2, st2, end2_upd])
            if len(fields) > 6:  # add circos color if it's there
                out_line += '\t' + fields[6]
            print(out_line, file=f_out)

    f_out.close()
    return junc_file
Ejemplo n.º 4
0
def makeBedgraphFromLinks(link_file,
                          dir='.',
                          prefix='',
                          out_name='',
                          chrom_size_file=wsn_chroms):
    '''
	Generate a bedgraph from the links. This can act as a histogram for
	the interactions that can be added to the border of the Circos plot
	'''
    dir = jpk_util.getDir(dir)
    bedgraph = dir + '/' + prefix + '_inter.bedgraph'
    if (out_name != ''):
        bedgraph = dir + '/' + out_name

    # This awk cmd will split the links into two separate lines, so:
    #	PA 	10 	100 	NA 	20 	30
    # Will become:
    # 	PA 	10 	100
    # 	NA 	20 	30
    awk_cmd = '\'{print $1\"\\t\"$2\"\\t\"$3\"\\n\"$4\"\\t\"$5\"\\t\"$6}\''

    # To leverage genomeCoverageBed, need to use system commands
    pipeline = ' '.join([
        'awk', awk_cmd, link_file, '|', 'sort', '-k1,1', '-k2,2n', '|',
        'genomeCoverageBed', '-bga', '-i', 'stdin', '-g', chrom_size_file, '>',
        bedgraph
    ])
    os.system(pipeline)
    return bedgraph
Ejemplo n.º 5
0
def generateThresholdLinks(inter_links_file,
                           threshold=3,
                           window=20,
                           dir='.',
                           prefix='',
                           chroms_size_file=wsn_chroms,
                           out_file=''):

    seg_lens = jpk_util.getChromSizes(chroms_size_file)
    bin_dict = {}
    for segment in seg_lens:
        bin_dict[segment] = cp.getChromBins(seg_lens[segment], window)
    new_dir = jpk_util.getDir(dir)
    if dir == '.':
        new_dir = (new_dir + '/binned_' + str(window) + '_junc_sites_t' +
                   str(threshold))

    link_matrix = cp.getBinnedLinkMatrix(inter_links_file,
                                         bin_dict,
                                         seg_lens,
                                         just_inter=True,
                                         include_nojuncs=False)

    binned_thresh_file = (new_dir + '/' + prefix + '_binned_' + str(window) +
                          '_ge' + str(threshold) + '_links.txt')
    if out_file != '':
        binned_thresh_file = out_file

    f_out = open(binned_thresh_file, 'w+')
    for link in link_matrix:
        if link_matrix[link] >= threshold:
            tup1, tup2 = link.split('-')

            seg1, bin1 = tup1.split(':')
            next_bin = int(bin1) + 1
            end1 = seg_lens[seg1]
            if len(bin_dict[seg1]) > next_bin:
                end1 = bin_dict[seg1][next_bin] - 1
            st1 = bin_dict[seg1][int(bin1)]

            seg2, bin2 = tup2.split(':')
            next_bin = int(bin2) + 1
            end2 = seg_lens[seg2]
            if len(bin_dict[seg2]) > next_bin:
                end2 = bin_dict[seg2][next_bin] - 1
            st2 = bin_dict[seg2][int(bin2)]

            out_line = '\t'.join(
                [seg1, str(st1),
                 str(end1), seg2,
                 str(st2),
                 str(end2)])
            print(out_line, file=f_out)
    f_out.close()
    return binned_thresh_file
Ejemplo n.º 6
0
def makeBinnedFile(link_matrix, bin_len, dir='.', prefix=''):
    bins_with_vals = [(x, link_matrix[x]) for x in link_matrix
                      if link_matrix[x] > 0]
    dir = jpk_util.getDir(dir)

    binned_links_file = (dir + '/' + prefix + '_binned_' + str(bin_len) +
                         '_links.txt')
    out_f = open(binned_links_file, 'w+')
    for bin, numLinks in bins_with_vals:
        print(bin + "\t" + str(numLinks), file=out_f)
    out_f.close()
Ejemplo n.º 7
0
def makeIntersegFile(link_file, dir='.', prefix='', out_name=''):
    '''
	Creates a link file of just intersegmental ineractions
	'''
    dir = jpk_util.getDir(dir)
    interseg_file = dir + '/' + prefix + '_inter.links'
    if (out_name != ''):
        interseg_file = dir + '/' + out_name

    awk_cmd = 'awk \'$1!=$4\' ' + link_file + ' > ' + interseg_file
    os.system(awk_cmd)
    return interseg_file
Ejemplo n.º 8
0
def generateSashimi(links_file, dir='.', prefix=''):
    '''
	links_file should be the output from makeLinkFile
	dir is the output directory
	prefix is the prefix for the sashimi bed file 
	'''
    dir = jpk_util.getDir(dir)

    sashimi_bed = dir + '/' + prefix + '_sashimi.bed'

    f1 = open(links_file, 'r')
    f2 = open(sashimi_bed, 'w+')

    # This will allow IGV to interpret the file as junctions
    f2.write('track name=%s_junctions or graphType=junctions\n' % prefix)

    for line in f1:
        ln = line.strip().split()
        if (len(ln) < 6):
            continue

        seg1 = ln[0]
        start1 = int(ln[1])
        end1 = int(ln[2])
        seg2 = ln[3]
        start2 = int(ln[4])
        end2 = int(ln[5])
        intra_segmental = seg1 == seg2
        more_than_3_nt = abs(start1 - start2) > 3
        non_overlapping = ((start1 < start2 or start1 > end2)
                           and (start2 < start1 or start2 > end1))

        if intra_segmental and more_than_3_nt and non_overlapping:
            s1 = min(start1, start2)
            s2 = max(start1, start2)
            e1 = min(end1, end2)
            e2 = max(end1, end2)
            f2.write(
                ('{}\t{}\t{}\t.\t1\t-\t{}\t{}\t150,50' +
                 ',50,30\t2\t{},{}\t0,{}\n').format(seg1, s1, e2, s1, e2,
                                                    e1 - s1, e2 - s2, s2 - s1))
    f1.close()
    f2.close()
Ejemplo n.º 9
0
def runCircosFromLinks(links_file,
                       dir='.',
                       prefix='',
                       chrom_size_file=wsn_chroms,
                       colored=False,
                       hitsclip_bg_path=None,
                       include_hist=True,
                       ribbon=False,
                       fromJuncs=True):

    dir = jpk_util.getDir(dir)
    circos_dir = dir + '/circos'
    conf_dir = dir + '/circos/conf'

    juncs, bg = makeInteractionFiles(links_file,
                                     dir=circos_dir,
                                     prefix=prefix,
                                     chrom_size_file=chrom_size_file,
                                     fromJuncs=fromJuncs)

    if not include_hist:
        bg = None

    conf_file = makeCircosConf(links_file,
                               prefix=prefix,
                               dir=conf_dir,
                               hitsclip_bg_path=hitsclip_bg_path,
                               hist_path=bg,
                               ribbon=ribbon,
                               colored=colored)

    pipeline = ' '.join([
        'circos', '-conf', conf_file, '-outputdir', circos_dir, '-outputfile',
        prefix + '_circos'
    ])
    os.system(pipeline)
Ejemplo n.º 10
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hCSUxclvi:o:p:s:Hg:b:')
    except getopt.GetoptError:
        displayUsage()
        sys.exit(2)

    doCircos = False
    doSashimi = False
    doContact = False

    junction_file = ''
    prefix = ''
    output = '.'
    chrom_size_file = wsn_chroms
    hits_clip_file = None
    bin_size = DEFAULT_BIN_SIZE
    use_log_scale = False

    colored = False
    include_hist = True
    cross_segment_intensities = False

    for opt, arg in opts:
        if opt == '-h':
            displayUsage()
            sys.exit()
        elif opt == "-i":
            junction_file = arg
        elif opt == "-o":
            output = arg
        elif opt == "-p":
            prefix = arg
        elif opt == "-v":
            verbose = True
        elif opt == "-c":
            colored = True
        elif opt == "-s":
            chrom_size_file = arg
        elif opt == '-H':
            include_hist = False
        elif opt == '-g':
            hits_clip_file = arg
        elif opt == '-b':
            bin_size = int(arg)
        elif opt == '-l':
            use_log_scale = True
        elif opt == '-x':
            cross_segment_intensities = True
        elif opt == '-C':
            doCircos = True
        elif opt == '-S':
            doSashimi = True
        elif opt == '-U':
            doContact = True

    chrom_size_file = jpk_util.getDir(chrom_size_file)

    if junction_file == '':
        displayUsage()
        sys.exit(2)

    if prefix == '':
        prefix = os.path.basename(junction_file)
        prefix = prefix[0:prefix.index('.')]

    links_dir = output + '/links'
    link_file = jpk_util.makeLinkFile(junction_file,
                                      dir=links_dir,
                                      prefix=prefix)

    if doCircos:
        inter_links = circos.makeIntersegFile(link_file,
                                              dir=output + '/circos',
                                              prefix=prefix)
        circos.runCircosFromLinks(inter_links,
                                  dir=output,
                                  prefix=prefix,
                                  chrom_size_file=chrom_size_file,
                                  colored=colored,
                                  hitsclip_bg_path=hits_clip_file,
                                  include_hist=include_hist)

    if doSashimi:
        sashimi.generateSashimi(link_file,
                                dir=output + '/sashimi',
                                prefix=prefix)

    if doContact:
        contactPlots.generateContactPlots(
            link_file,
            bin_size,
            chrom_file=chrom_size_file,
            dir=output,
            prefix=prefix,
            use_log_scale=use_log_scale,
            cross_segment_intensities=cross_segment_intensities)
Ejemplo n.º 11
0
import sys
import time
import re
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
import math
import numpy as np
import scipy
import jpk_util
from matplotlib import figure
from PIL import Image
from PIL import ImageChops
from subprocess import call

wsn_chroms = jpk_util.getDir('../genome/WSN/WSN.chrom.sizes')

# Path used for annotation of the upper triangle plots
SEGMENT_AXES = ('/Users/jpk90/Desktop/scripts/python/img/segment_axes')


def getChromBins(chrom_len, bin_len):
    if bin_len > chrom_len:
        return [0]
    num_windows = int(chrom_len / bin_len)
    leftover_chrom = int(chrom_len % num_windows / 2)
    bins = [(leftover_chrom) + bin_len * i for i in range(1, num_windows)]
    bins = [0] + bins
    return bins

Ejemplo n.º 12
0
def makeCircosConf(inter_links,
                   prefix='',
                   dir='.',
                   colored=False,
                   hitsclip_bg_path=None,
                   hist_path=None,
                   use_redux=True,
                   out_name='',
                   ribbon=False):
    '''
	colored defines whether the junctions will be colored or not. Nara
		has mentioned that it is probably more useful if it's not colored,
		since 
	hitsclip_bg_path is an optional parameter to include a bedgraph of
		HITS-CLIP data which will be seen around the plot
	'''
    dir = jpk_util.getDir(dir)

    if not colored:
        need_new_link_file = False
        for line in open(inter_links, 'r'):
            if len(line.strip().split()) > 6:
                need_new_link_file = True
            break
        if need_new_link_file:
            bname = '.'.join(os.path.basename(inter_links).split('.')[0:-1])
            new_link_file = inter_links.replace(bname, bname + '_nocolor')
            f_out = open(new_link_file, 'w+')
            for line in open(inter_links, 'r'):
                fields = line.strip().split()
                if len(fields) >= 6:
                    print('\t'.join(fields[0:6]), file=f_out)
            inter_links = new_link_file
            f_out.close()

    template = ''
    out_circos_name = dir + '/' + prefix + '.conf'
    template = conf_template

    f_in = open(template, 'r')
    f_out = open(out_circos_name, 'w+')
    for line in f_in:
        ln = line.strip().split()
        if len(ln) > 0 and ln[0] == 'links_file':
            f_out.write('links_file = ' + inter_links + '\n')
        elif len(ln) > 0 and ln[0] == 'hist_file' and hist_path:
            f_out.write('hist_file = ' + hist_path + '\n')
        elif len(ln) > 0 and ln[0] == 'hits_clip_file' and hitsclip_bg_path:
            f_out.write('hits_clip_file = ' + hitsclip_bg_path + '\n')
        elif len(ln) > 0 and ln[0] == 'ribbon':
            if ribbon:
                f_out.write('ribbon = yes\n')
            else:
                f_out.write('ribbon = no\n')
        elif len(ln) > 0 and ln[0] == 'karyotype':
            f_out.write('karyotype = ' + influenza_karyotype + '\n')
        else:
            f_out.write(line)
    f_in.close()
    f_out.close()
    return out_circos_name
Ejemplo n.º 13
0
import os
import sys
import subprocess
import jpk_util
import contactPlots as cp
from subprocess import call

# Circos template configuration path
conf_template = jpk_util.getDir('../plots_lib/circos/template.conf')
influenza_karyotype = jpk_util.getDir(
    '../plots_lib/circos/influenza_karyotype.txt')
wsn_chroms = jpk_util.getDir('../genome/WSN/WSN.chrom.sizes')


def makeIntersegFile(link_file, dir='.', prefix='', out_name=''):
    '''
	Creates a link file of just intersegmental ineractions
	'''
    dir = jpk_util.getDir(dir)
    interseg_file = dir + '/' + prefix + '_inter.links'
    if (out_name != ''):
        interseg_file = dir + '/' + out_name

    awk_cmd = 'awk \'$1!=$4\' ' + link_file + ' > ' + interseg_file
    os.system(awk_cmd)
    return interseg_file


def makeJunctionSiteFile(link_file,
                         dir='.',
                         prefix='',