def panel4x4(fn0, fn1, fn2, fn3, fn_out):
    single_size = plot_config.single_figure_size
    sc.Figure(
        str(single_size[0] * 150) + "px",
        str(single_size[1] * 150) + "px",
        sc.Panel(sc.SVG(fn0).scale(1.0).move(0, 0)),
        sc.Text("A", 5, 25, size=plot_config.fontsize_xhuge, weight='bold'),
        sc.Panel(sc.SVG(fn1).scale(1.0).move(single_size[0] * 150 / 2., 0)),
        sc.Text("B",
                single_size[0] * 150 / 2 + 5.,
                25,
                size=plot_config.fontsize_xhuge,
                weight='bold'),
        sc.Panel(sc.SVG(fn2).scale(1.0).move(0, single_size[1] * 150 / 2.)),
        sc.Text("C",
                5,
                single_size[1] * 150 / 2. + 25,
                size=plot_config.fontsize_xhuge,
                weight='bold'),
        sc.Panel(
            sc.SVG(fn3).scale(1.0).move(single_size[0] * 150 / 2.,
                                        single_size[1] * 150 / 2.)),
        sc.Text("D",
                single_size[0] * 150 / 2. + 5,
                single_size[1] * 150 / 2. + 25,
                size=plot_config.fontsize_xhuge,
                weight='bold'),
    ).save(fn_out)
Ejemplo n.º 2
0
    def plot_analysis(self, display=True):
        figdir = set_figdir(verbose=False)
        figure_name = f"{figdir}/{timestamp()}-analysis.svg"

        fig_handles = [
            self.plot_singular_vals(savefig=True, verbose=False,
                                    display=False)[-1],
            self.plot_eigs(savefig=True, verbose=False, display=False)[-1],
            self.plot_power_spectrum(savefig=True,
                                     verbose=False,
                                     display=False)[-1]
        ]

        svgs = []
        for fig in fig_handles:
            svgs.append(sc.SVG(fig + '.svg', fix_mpl=True))
            os.remove(f"{fig}.svg")

        sc.Figure(
            sum(svg.width for svg in svgs), max([svg.height for svg in svgs]),
            sc.Panel(svgs[0], sc.Text("(a)", 6, 16, size=11)).move(0, 0),
            sc.Panel(svgs[1], sc.Text("(b)", 6, 16,
                                      size=11)).move(svgs[0].width, 0),
            sc.Panel(svgs[2],
                     sc.Text("(c)", 6, 16,
                             size=11)).move(svgs[0].width + svgs[1].width,
                                            0)).save(figure_name)

        if display:
            IPython.display.display(IPython.display.SVG(figure_name))
Ejemplo n.º 3
0
    def plot(self, gr1=None, gr2=None):
        """

        Parameters
        ----------
        gr1 : {str, GenomeRange}
            First genome range

        gr2 : {str, GenomeRange}, optional
            Second genome range
        """
        frame2grange = self.frame_granges(gr1, gr2)
        gr1, gr2 = self.current_range
        sub_frames = self.properties['sub_frames']

        frame_svgs = self.plot_frames(frame2grange)
        center_svg = self.plot_center(gr1, gr2)

        center_offsets = self.__get_center_offsets(sub_frames)

        center_svg.move(*self.cm2px(center_offsets))
        self.__transform_sub_svgs(frame_svgs, sub_frames, center_offsets)

        figsize = self.cm2px(self.__get_figsize(sub_frames))
        fig = sc.Figure(f"{figsize[0]}px", f"{figsize[1]}px",
                        sc.Panel(center_svg),
                        *[sc.Panel(svg) for svg in frame_svgs.values()])
        return fig
Ejemplo n.º 4
0
    def layout(codes, filename):
        """export pdf A4 pages filled with these barcodes, so that they
        can be printed as stickers
        filename will prefix temporary files produced and removed during
        the process, it shouldn't have any extension

        TODO: I guess it's possible achieving this without creating so
        many intermediate files. Try to get rid of them ;)
        """
        # A4
        sheet_size = WH(210., 297.) * XY.mm
        sticker_size = EAN13Data.full_size
        # so how many codes fit on one sheet?
        n_stickers = sheet_size / sticker_size
        n_stickers = np.floor(n_stickers).astype(int)
        stickers_per_sheet = n_stickers.w * n_stickers.h
        # so how many sheets do we need?
        n_sheets = ceil(len(codes) / stickers_per_sheet)
        sheets = []  # store produced .pdf sheets filenames here
        # iterate until they are all consumed
        stickers = iter(codes)
        for n in range(n_sheets):
            panels = []  # according to sc logic
            try:
                for i in range(n_stickers.w):
                    for j in range(n_stickers.h):
                        # export this code as svg temp file
                        code = next(stickers)
                        tpfile = code.id + '.svg'
                        code.draw(tpfile)
                        panels.append(
                            sc.Panel(
                                sc.SVG(tpfile).scale(1.).move(
                                    i * sticker_size.w, j * sticker_size.h)))
                        # cleanup
                        os.remove(tpfile)
            except StopIteration:
                pass  # no more stickers to print
            sheetname = filename + ('-' + str(n + 1) if n_sheets > 1 else '')
            ssvg = sheetname + '.svg'
            spdf = sheetname + '.pdf'
            sc.Figure(sheet_size.w, sheet_size.h, *panels).save(ssvg)
            # convert to .pdf
            renderPDF.drawToFile(svg2rlg(ssvg), spdf)
            # remove .svg file
            os.remove(ssvg)
            # remember this other temp file
            sheets.append(spdf)

        # bring all sheets together into pdf pages
        if n_sheets > 1:
            final = PdfFileWriter()
            for sheet in sheets:
                append_pdf(PdfFileReader(sheet), final)
            final.write(open(filename + '.pdf', 'wb'))
            # so we can now supress them
            while sheets:
                os.remove(sheets.pop())
Ejemplo n.º 5
0
def test_embedded_svg():
    svg = sc.SVG("examples/files/svg_logo.svg")
    fig = sc.Figure("5cm", "5cm", svg)
    poly = fig.root.find(".//{}polygon".format(SVG))

    ok_(poly.get("id") == "V")

    ok_(svg.height is None)
    ok_(svg.width is None)
def panel2x2(fn0, fn1, fn_out, single_size=plot_config.single_figure_size):
    sc.Figure(
        str(single_size[0] * 150) + "px",
        str(single_size[1] * 80) + "px",
        sc.Panel(sc.SVG(fn0).scale(1.0).move(0, 15)),
        sc.Text("A",
                4,
                12,
                size=plot_config.fontsize_large,
                weight='bold',
                font='serif'),
        sc.Panel(sc.SVG(fn1).scale(1.0).move(single_size[0] * 150 / 2., 15)),
        sc.Text("B",
                single_size[0] * 150 / 2 + 4.,
                12,
                size=plot_config.fontsize_large,
                weight='bold',
                font='serif')).save(fn_out)
Ejemplo n.º 7
0
def compose_svg(svg_board, svg_plot, svg_combined):
    """
    Create a combined SVG in which the board image is put in the background of
    the axes area of the plot image.

    :param svg_board: filename of existing board image
    :type svg_board: str

    :param svg_plot: filename of existing plot image
    :type svg_plot: str

    :param svg_combined: filename of combined image, to be written
    :type svg_combined: str
    """
    scale = TOP_RIGHT_MARGIN - LEFT_BOTTOM_MARGIN
    xdel = LEFT_BOTTOM_MARGIN * SIZE
    ydel = (1.0 - TOP_RIGHT_MARGIN) * SIZE
    compose.Figure(
        SIZE, SIZE,
        compose.Panel(compose.SVG(svg_board).scale(scale).move(xdel, ydel)),
        compose.Panel(compose.SVG(svg_plot))).save(svg_combined)
Ejemplo n.º 8
0
def make_multipanel_fig(FIGS, CAP_SIZE=14,\
                                fig_name="fig.svg",\
                                transparent=True, correc_factor=70., DPI=100.):
    """ take a list of figures and make a multi panel plot"""

    label = list(string.ascii_uppercase)[:len(FIGS)]

    SIZE = []
    for fig in FIGS:
        SIZE.append(fig.get_size_inches())
    width = np.max([s[0] for s in SIZE])
    height = np.max([s[1] for s in SIZE])

    LABELS, XCOORD, YCOORD, SCALE = [], [], [], []

    for i in range(len(FIGS)):
        ff = 'f.svg'
        FIGS[i].savefig('/tmp/' + str(i) + '.svg',
                        format='svg',
                        transparent=transparent)
        if translate_to_bitmap_if_too_big(FIGS[i], '/tmp/' + str(i) + '.svg'):
            SCALE.append(.7)
        else:
            SCALE.append(1.)
        LABELS.append(label[i])
        XCOORD.append((i % 3) * width * correc_factor)
        YCOORD.append(int(i / 3) * height * correc_factor)

    PANELS = []
    for i in range(len(FIGS)):
        PANELS.append(sg.Panel(\
            sg.SVG('/tmp/'+str(i)+'.svg').move(XCOORD[i],YCOORD[i]).scale(SCALE[i]),\
            sg.Text(LABELS[i], 25, 20, size=22, weight='bold').move(\
                                                XCOORD[i]-15,YCOORD[i]))\
        )
    sg.Figure(str((min(len(FIGS)%3,3))*inch_to_cm(width))+"cm",\
              str(inch_to_cm(height)*(int(len(FIGS)/3.01)+1))+"cm",\
              *PANELS).save(fig_name)
Ejemplo n.º 9
0
def draw_plot(data, output):
    """Draws piecharts from given data on lab012 background
    Args:
        data - list of lists of three points with experiment results
        output - name of generated svg file
    """

    positions = [[1.6, 1], [1.6, 3], [1.6, 5], [5, 1], [5, 3], [5, 5],
                 [8.7, 1], [8.7, 3], [8.7, 5], [12, 1], [12, 3], [12, 5]]

    # prepare background
    fig, ax = plt.subplots()
    ax.imshow([[[0, 0, 0, 0]], [[0, 0, 0, 0]]], extent=[-1, 13.25, -1, 7.3])
    ax.patch.set_alpha(0.0)
    ax.axis('off')

    # robot blob
    draw_single_pie([1.3, 5.7],
                    0.3, [1, 1, 1],
                    ax, ('k', 'k', 'k'),
                    plot_labels=False)
    ax.text(1.6, 5.9, "Mikrofon")

    # draw points
    for i, d in enumerate(zip(positions, data)):
        draw_single_pie(d[0], 0.5, d[1], ax, ('blue', 'orange', 'green'),
                        str(i + 1))

    # save
    fig.tight_layout()
    fig.savefig("out.svg", dpi=200, transparent=True)

    # merge with proper background
    sc.Figure("247.59521mm", "129.31232mm",
              sc.Panel(sc.SVG("./assets/012-base.svg").scale(0.352)),
              sc.Panel(sc.SVG("out.svg").scale(0.6).move(-25,
                                                         -40))).save(output)
    os.remove("./out.svg")
Ejemplo n.º 10
0
def multipanel_figure(graph_env,
                      FIGS,
                      X = None, Y = None, Labels=None,
                      LABELS = None, X_LABELS = None, Y_LABELS = None,
                      width=85.,# mm
                      height=None, # mm
                      grid=False,
                      autoposition=False,
                      SCALING_FACTOR = 1.34, fontsize=None, fontweight='bold',
                      export_to_png=False, bg='white',
                      fig_name='fig.svg'):
    """
    
    """
    # building the figure matrix if not explicited
    if type(FIGS) is mpl.figure.Figure:
        FIGS = [[FIGS]]
    elif type(FIGS) is list:
        if (len(FIGS)>0) and (type(FIGS[0]) is mpl.figure.Figure):
            FIGS = [FIGS]
        elif (len(FIGS)>0) and (type(FIGS[0]) is str):
            FIGS = [FIGS]
    # else should be list of list

    if autoposition:
        X, Y = [], []
        y = [0]
        for i, lfig in enumerate(FIGS):
            Y.append([np.max(y) for fig in lfig])
            x = []
            for fig in lfig:
                if type(fig) is not str:
                    x.append(72.*fig.get_size_inches()[0])
                    y.append(72.*fig.get_size_inches()[1])
                else:
                    x.append(120)
                    y.append(80)
            X.append([0]+list(np.cumsum(x)))
            y = [dy+Y[-1][0] for dy in y]
        Y.append([np.max(y)])
        print('X = ', X)
        print('Y = ', Y)
    
    if X is None:
        X = [[0 for fig in lfig] for lfig in FIGS]
    if Y is None:
        Y = [[0 for fig in lfig] for lfig in FIGS]
    if LABELS is None:
        LABELS = [['' for fig in lfig] for lfig in FIGS]
    if X_LABELS is None:
       X_LABELS = X 
    if Y_LABELS is None:
       Y_LABELS = Y
    
    if height is None:
        try:
            height = np.max([50, Y[-1][-1]])*0.27 # TO BE SET UP
        except IndexError:
            height = 50
    
    # size
    if width=='single-column':
        width = 85.
    elif width=='one-and-a-half-column':
        width = 114.
    elif width=='one-column-and-a-half':
        width = 114.
    elif width=='double-column':
        width = 174.

    if fontsize is None:
        fontsize = graph_env.fontsize+1
        
    LOCATIONS, PANELS = [], []
    for i, lfig in enumerate(FIGS):
        LOCATIONS.append([])
        for j, fig in enumerate(lfig):
            if type(FIGS[i][j]) is str:
                LOCATIONS[i].append(FIGS[i][j])
                # 1.26625 -- NEW SCALING FACTOR
            else:
                LOCATIONS[i].append(os.path.join(gettempdir(), '%i_%i.svg' % (i,j)))
                FIGS[i][j].savefig(LOCATIONS[i][j], format='svg',
                                   transparent=graph_env.transparency)
            PANELS.append(sg.Panel(sg.SVG(LOCATIONS[i][j]).move(X[i][j], Y[i][j])))

    for i, labels in enumerate(LABELS):
        for j, label in enumerate(labels):
            if label!='':
                PANELS.append(sg.Panel(sg.Text(label, 3, 10, 
                                               size=fontsize, weight=fontweight).move(\
                                                        X_LABELS[i][j],Y_LABELS[i][j])))

    if grid:
        sg.Figure("%.1fcm" % (width/10.), "%.1fcm" % (height/10.),
                  *PANELS, sg.Grid(40,40)).scale(SCALING_FACTOR).save(fig_name.replace('.png', '.svg'))
    else:
        sg.Figure("%.1fcm" % (width/10.), "%.1fcm" % (height/10.),
                  *PANELS).scale(SCALING_FACTOR).save(fig_name.replace('.png', '.svg'))

    if fig_name.endswith('.png'):
        export_as_png(fig_name.replace('.png', '.svg'), dpi=300, background=bg)
        os.remove(fig_name.replace('.png', '.svg'))
        print('[ok] removed %s' % fig_name.replace('.png', '.svg'))
    elif export_to_png:
        export_as_png(fig_name, dpi=300, background=bg)
Ejemplo n.º 11
0
def calculate_matrix_svg(snplst, pop, request, r2_d="r2"):

    # Set data directories using config.yml
    with open('config.yml', 'r') as f:
        config = yaml.load(f)
    gene_dir=config['data']['gene_dir']
    snp_dir=config['data']['snp_dir']
    pop_dir=config['data']['pop_dir']
    vcf_dir=config['data']['vcf_dir']

    tmp_dir = "./tmp/"

    # Ensure tmp directory exists
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    # Open SNP list file
    snps_raw = open(snplst).readlines()

    # Remove duplicate RS numbers
    snps = []
    for snp_raw in snps_raw:
        snp = snp_raw.strip().split()
        if snp not in snps:
            snps.append(snp)

    # Select desired ancestral populations
    pops = pop.split("+")
    pop_dirs = []
    for pop_i in pops:
        if pop_i in ["ALL", "AFR", "AMR", "EAS", "EUR", "SAS", "ACB", "ASW", "BEB", "CDX", "CEU", "CHB", "CHS", "CLM", "ESN", "FIN", "GBR", "GIH", "GWD", "IBS", "ITU", "JPT", "KHV", "LWK", "MSL", "MXL", "PEL", "PJL", "PUR", "STU", "TSI", "YRI"]:
            pop_dirs.append(pop_dir + pop_i + ".txt")

    get_pops = "cat " + " ".join(pop_dirs)
    proc = subprocess.Popen(get_pops, shell=True, stdout=subprocess.PIPE)
    pop_list = proc.stdout.readlines()

    ids = [i.strip() for i in pop_list]
    pop_ids = list(set(ids))

    # Connect to snp database
    conn = sqlite3.connect(snp_dir)
    conn.text_factory = str
    cur = conn.cursor()

    def get_coords(rs):
        id = rs.strip("rs")
        t = (id,)
        cur.execute("SELECT * FROM tbl_" + id[-1] + " WHERE id=?", t)
        return cur.fetchone()

    # Find RS numbers in snp database
    rs_nums = []
    snp_pos = []
    snp_coords = []
    tabix_coords = ""
    for snp_i in snps:
        if len(snp_i) > 0:
            if len(snp_i[0]) > 2:
                if snp_i[0][0:2] == "rs" and snp_i[0][-1].isdigit():
                    snp_coord = get_coords(snp_i[0])
                    if snp_coord != None:
                        rs_nums.append(snp_i[0])
                        snp_pos.append(snp_coord[2])
                        temp = [snp_i[0], snp_coord[1], snp_coord[2]]
                        snp_coords.append(temp)

    # Close snp connection
    cur.close()
    conn.close()


    # Check max distance between SNPs
    distance_bp = []
    for i in range(len(snp_coords)):
        distance_bp.append(int(snp_coords[i][2]))

    # Sort coordinates and make tabix formatted coordinates
    snp_pos_int = [int(i) for i in snp_pos]
    snp_pos_int.sort()
    snp_coord_str = [snp_coords[0][1] + ":" +
                     str(i) + "-" + str(i) for i in snp_pos_int]
    tabix_coords = " " + " ".join(snp_coord_str)

    # Extract 1000 Genomes phased genotypes
    vcf_file = vcf_dir + \
        snp_coords[0][
            1] + ".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"
    tabix_snps = "tabix -h {0}{1} | grep -v -e END".format(
        vcf_file, tabix_coords)
    proc = subprocess.Popen(tabix_snps, shell=True, stdout=subprocess.PIPE)

    # Define function to correct indel alleles
    def set_alleles(a1, a2):
        if len(a1) == 1 and len(a2) == 1:
            a1_n = a1
            a2_n = a2
        elif len(a1) == 1 and len(a2) > 1:
            a1_n = "-"
            a2_n = a2[1:]
        elif len(a1) > 1 and len(a2) == 1:
            a1_n = a1[1:]
            a2_n = "-"
        elif len(a1) > 1 and len(a2) > 1:
            a1_n = a1[1:]
            a2_n = a2[1:]
        return(a1_n, a2_n)

    # Import SNP VCF files
    vcf = proc.stdout.readlines()

    h = 0
    while vcf[h][0:2] == "##":
        h += 1

    head = vcf[h].strip().split()

    # Extract haplotypes
    index = []
    for i in range(9, len(head)):
        if head[i] in pop_ids:
            index.append(i)

    hap1 = [[]]
    for i in range(len(index) - 1):
        hap1.append([])
    hap2 = [[]]
    for i in range(len(index) - 1):
        hap2.append([])

    rsnum_lst = []
    allele_lst = []
    pos_lst = []

    for g in range(h + 1, len(vcf)):
        geno = vcf[g].strip().split()
        if geno[1] not in snp_pos:
            continue

        if snp_pos.count(geno[1]) == 1:
            rs_query = rs_nums[snp_pos.index(geno[1])]

        else:
            pos_index = []
            for p in range(len(snp_pos)):
                if snp_pos[p] == geno[1]:
                    pos_index.append(p)
            for p in pos_index:
                if rs_nums[p] not in rsnum_lst:
                    rs_query = rs_nums[p]
                    break

        if rs_query in rsnum_lst:
            continue

        rs_1000g = geno[2]

        if rs_query == rs_1000g:
            rsnum = rs_1000g
        else:
            count = -2
            found = "false"
            while count <= 2 and count + g < len(vcf):
                geno_next = vcf[g + count].strip().split()
                if rs_query == geno_next[2]:
                    found = "true"
                    break
                count += 1

            if found == "false":
                indx = [i[0] for i in snps].index(rs_query)
                # snps[indx][0] = geno[2]
                # rsnum = geno[2]
                snps[indx][0]=rs_query
                rsnum=rs_query
            else:
                continue

        if "," not in geno[3] and "," not in geno[4]:
            a1, a2 = set_alleles(geno[3], geno[4])
            for i in range(len(index)):
                if geno[index[i]] == "0|0":
                    hap1[i].append(a1)
                    hap2[i].append(a1)
                elif geno[index[i]] == "0|1":
                    hap1[i].append(a1)
                    hap2[i].append(a2)
                elif geno[index[i]] == "1|0":
                    hap1[i].append(a2)
                    hap2[i].append(a1)
                elif geno[index[i]] == "1|1":
                    hap1[i].append(a2)
                    hap2[i].append(a2)
                elif geno[index[i]] == "0":
                    hap1[i].append(a1)
                    hap2[i].append(".")
                elif geno[index[i]] == "1":
                    hap1[i].append(a2)
                    hap2[i].append(".")
                else:
                    hap1[i].append(".")
                    hap2[i].append(".")

            rsnum_lst.append(rsnum)

            position = "chr" + geno[0] + ":" + geno[1] + "-" + geno[1]
            pos_lst.append(position)
            alleles = a1 + "/" + a2
            allele_lst.append(alleles)

    # Calculate Pairwise LD Statistics
    all_haps = hap1 + hap2
    ld_matrix = [[[None for v in range(2)] for i in range(
        len(all_haps[0]))] for j in range(len(all_haps[0]))]

    for i in range(len(all_haps[0])):
        for j in range(i, len(all_haps[0])):
            hap = {}
            for k in range(len(all_haps)):
                # Extract haplotypes
                hap_k = all_haps[k][i] + all_haps[k][j]
                if hap_k in hap:
                    hap[hap_k] += 1
                else:
                    hap[hap_k] = 1

            # Remove Missing Haplotypes
            keys = hap.keys()
            for key in keys:
                if "." in key:
                    hap.pop(key, None)

            # Check all haplotypes are present
            if len(hap) != 4:
                snp_i_a = allele_lst[i].split("/")
                snp_j_a = allele_lst[j].split("/")
                haps = [snp_i_a[0] + snp_j_a[0], snp_i_a[0] + snp_j_a[1],
                        snp_i_a[1] + snp_j_a[0], snp_i_a[1] + snp_j_a[1]]
                for h in haps:
                    if h not in hap:
                        hap[h] = 0

            # Perform LD calculations
            A = hap[sorted(hap)[0]]
            B = hap[sorted(hap)[1]]
            C = hap[sorted(hap)[2]]
            D = hap[sorted(hap)[3]]
            tmax = max(A, B, C, D)
            delta = float(A * D - B * C)
            Ms = float((A + C) * (B + D) * (A + B) * (C + D))
            if Ms != 0:
                # D prime
                if delta < 0:
                    D_prime = round(
                        abs(delta / min((A + C) * (A + B), (B + D) * (C + D))), 3)
                else:
                    D_prime = round(
                        abs(delta / min((A + C) * (C + D), (A + B) * (B + D))), 3)

                # R2
                r2 = round((delta**2) / Ms, 3)

                # Find Correlated Alleles
                if r2 > 0.1:
                    N = A + B + C + D
                    # Expected Cell Counts
                    eA = (A + B) * (A + C) / N
                    eB = (B + A) * (B + D) / N
                    eC = (C + A) * (C + D) / N
                    eD = (D + C) * (D + B) / N

                    # Calculate Deltas
                    dA = (A - eA)**2
                    dB = (B - eB)**2
                    dC = (C - eC)**2
                    dD = (D - eD)**2
                    dmax = max(dA, dB, dC, dD)

                    if dA == dB == dC == dD:
                        if tmax == dA or tmax == dD:
                            match = sorted(hap)[0][
                                0] + "=" + sorted(hap)[0][1] + "," + sorted(hap)[2][0] + "=" + sorted(hap)[1][1]
                        else:
                            match = sorted(hap)[0][
                                0] + "=" + sorted(hap)[1][1] + "," + sorted(hap)[2][0] + "=" + sorted(hap)[0][1]
                    elif dmax == dA or dmax == dD:
                        match = sorted(hap)[0][
                            0] + "=" + sorted(hap)[0][1] + "," + sorted(hap)[2][0] + "=" + sorted(hap)[1][1]
                    else:
                        match = sorted(hap)[0][
                            0] + "=" + sorted(hap)[1][1] + "," + sorted(hap)[2][0] + "=" + sorted(hap)[0][1]
                else:
                    match = "  =  ,  =  "
            else:
                D_prime = "NA"
                r2 = "NA"
                match = "  =  ,  =  "

            snp1 = rsnum_lst[i]
            snp2 = rsnum_lst[j]
            pos1 = pos_lst[i].split("-")[0]
            pos2 = pos_lst[j].split("-")[0]
            allele1 = allele_lst[i]
            allele2 = allele_lst[j]
            corr = match.split(",")[0].split("=")[1] + "=" + match.split(",")[0].split("=")[
                0] + "," + match.split(",")[1].split("=")[1] + "=" + match.split(",")[1].split("=")[0]
            corr_f = match

            ld_matrix[i][j] = [snp1, snp2, allele1,
                               allele2, corr, pos1, pos2, D_prime, r2]
            ld_matrix[j][i] = [snp2, snp1, allele2,
                               allele1, corr_f, pos2, pos1, D_prime, r2]

    # Generate Plot Variables
    out = [j for i in ld_matrix for j in i]
    xnames = []
    ynames = []
    xA = []
    yA = []
    corA = []
    xpos = []
    ypos = []
    D = []
    R = []
    box_color = []
    box_trans = []

    if r2_d not in ["r2", "d"]:
        r2_d = "r2"

    for i in range(len(out)):
        snp1, snp2, allele1, allele2, corr, pos1, pos2, D_prime, r2 = out[i]
        xnames.append(snp1)
        ynames.append(snp2)
        xA.append(allele1)
        yA.append(allele2)
        corA.append(corr)
        xpos.append(pos1)
        ypos.append(pos2)
        if r2_d == "r2" and r2 != "NA":
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("red")
            box_trans.append(r2)
        elif r2_d == "d" and D_prime != "NA":
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("red")
            box_trans.append(abs(D_prime))
        else:
            D.append("NA")
            R.append("NA")
            box_color.append("blue")
            box_trans.append(0.1)

    # Import plotting modules
    from collections import OrderedDict
    from bokeh.embed import components, file_html
    from bokeh.layouts import gridplot
    from bokeh.models import HoverTool, LinearAxis, Range1d
    from bokeh.plotting import ColumnDataSource, curdoc, figure, output_file, reset_output, save
    from bokeh.resources import CDN
    from bokeh.io import export_svgs
    import svgutils.compose as sg
    from math import pi

    reset_output()

    # Aggregate Plotting Data
    x = []
    y = []
    w = []
    h = []
    coord_snps_plot = []
    snp_id_plot = []
    alleles_snp_plot = []
    for i in range(0, len(xpos), int(len(xpos)**0.5)):
        x.append(int(xpos[i].split(":")[1]) / 1000000.0)
        y.append(0.5)
        w.append(0.00003)
        h.append(1.06)
        coord_snps_plot.append(xpos[i])
        snp_id_plot.append(xnames[i])
        alleles_snp_plot.append(xA[i])
    

    buffer = (x[-1] - x[0]) * 0.025
    xr = Range1d(start=x[0] - buffer, end=x[-1] + buffer)
    yr = Range1d(start=-0.03, end=1.03)
    y2_ll = [-0.03] * len(x)
    y2_ul = [1.03] * len(x)

    yr_pos = Range1d(start=(x[-1] + buffer) * -1, end=(x[0] - buffer) * -1)
    yr0 = Range1d(start=0, end=1)
    yr2 = Range1d(start=0, end=3.8)
    yr3 = Range1d(start=0, end=1)

    spacing = (x[-1] - x[0] + buffer + buffer) / (len(x) * 1.0)
    x2 = []
    y0 = []
    y1 = []
    y2 = []
    y3 = []
    y4 = []
    for i in range(len(x)):
        x2.append(x[0] - buffer + spacing * (i + 0.5))
        y0.append(0)
        y1.append(0.20)
        y2.append(0.80)
        y3.append(1)
        y4.append(1.15)

    xname_pos = []
    for i in x2:
        for j in range(len(x2)):
            xname_pos.append(i)

    data = {
            'xname': xnames,
            'xname_pos': xname_pos,
            'yname': ynames,
            'xA': xA,
            'yA': yA,
            'xpos': xpos,
            'ypos': ypos,
            'R2': R,
            'Dp': D,
            'corA': corA,
            'box_color': box_color,
            'box_trans': box_trans
    }

    source = ColumnDataSource(data)

    threshold = 70
    if len(snps) < threshold:
        matrix_plot = figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5,
                             x_range=xr, y_range=list(reversed(rsnum_lst)),
                             h_symmetry=False, v_symmetry=False, border_fill_color='white', x_axis_type=None, logo=None,
                             tools="hover,undo,redo,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700)

    else:
        matrix_plot = figure(outline_line_color="white", min_border_top=0, min_border_bottom=2, min_border_left=100, min_border_right=5,
                             x_range=xr, y_range=list(reversed(rsnum_lst)),
                             h_symmetry=False, v_symmetry=False, border_fill_color='white', x_axis_type=None, y_axis_type=None, logo=None,
                             tools="hover,undo,redo,reset,pan,box_zoom,previewsave", title=" ", plot_width=800, plot_height=700)
    

    matrix_plot.rect(x='xname_pos', y='yname', width=0.95 * spacing, height=0.95, source=source,
                    color="box_color", alpha="box_trans", line_color=None)
    
    matrix_plot.grid.grid_line_color = None
    matrix_plot.axis.axis_line_color = None
    matrix_plot.axis.major_tick_line_color = None
    if len(snps) < threshold:
        matrix_plot.axis.major_label_text_font_size = "8pt"
        matrix_plot.xaxis.major_label_orientation = "vertical"

    matrix_plot.axis.major_label_text_font_style = "normal"
    matrix_plot.xaxis.major_label_standoff = 0

    sup_2 = u"\u00B2"

    hover = matrix_plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("Variant 1", " " + "@yname (@yA)"),
        ("Variant 2", " " + "@xname (@xA)"),
        ("D\'", " " + "@Dp"),
        ("R" + sup_2, " " + "@R2"),
        ("Correlated Alleles", " " + "@corA"),
    ])

    # Connecting and Rug Plots
    # Connector Plot
    if len(snps) < threshold:
        connector = figure(outline_line_color="white", y_axis_type=None, x_axis_type=None,
                           x_range=xr, y_range=yr2, border_fill_color='white',
                           title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False,
                           plot_width=800, plot_height=90, tools="xpan,tap")
        connector.segment(x, y0, x, y1, color="black")
        connector.segment(x, y1, x2, y2, color="black")
        connector.segment(x2, y2, x2, y3, color="black")
        connector.text(x2, y4, text=snp_id_plot, alpha=1, angle=pi / 2,
                       text_font_size="8pt", text_baseline="middle", text_align="left")
    else:
        connector = figure(outline_line_color="white", y_axis_type=None, x_axis_type=None,
                           x_range=xr, y_range=yr3, border_fill_color='white',
                           title="", min_border_left=100, min_border_right=5, min_border_top=0, min_border_bottom=0, h_symmetry=False, v_symmetry=False,
                           plot_width=800, plot_height=30, tools="xpan,tap")
        connector.segment(x, y0, x, y1, color="black")
        connector.segment(x, y1, x2, y2, color="black")
        connector.segment(x2, y2, x2, y3, color="black")

    connector.yaxis.major_label_text_color = None
    connector.yaxis.minor_tick_line_alpha = 0  # Option does not work
    connector.yaxis.axis_label = " "
    connector.grid.grid_line_color = None
    connector.axis.axis_line_color = None
    connector.axis.major_tick_line_color = None
    connector.axis.minor_tick_line_color = None

    connector.toolbar_location = None

    data_rug = {
        'x': x,
        'y': y,
        'w': w,
        'h': h,
        'coord_snps_plot': coord_snps_plot,
        'snp_id_plot': snp_id_plot,
        'alleles_snp_plot': alleles_snp_plot
    }

    source_rug = ColumnDataSource(data_rug)

    # Rug Plot
    rug = figure(x_range=xr, y_range=yr, y_axis_type=None,
                 title="", min_border_top=1, min_border_bottom=0, min_border_left=100, min_border_right=5, h_symmetry=False, v_symmetry=False,
                 plot_width=800, plot_height=50, tools="hover,xpan,tap")
    rug.rect(x='x', y='y', width='w', height='h', fill_color='red', dilate=True, line_color=None, fill_alpha=0.6, source=source_rug)

    hover = rug.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("SNP", "@snp_id_plot (@alleles_snp_plot)"),
        ("Coord", "@coord_snps_plot"),
    ])

    rug.toolbar_location = None

    # Gene Plot
    tabix_gene = "tabix -fh {0} {1}:{2}-{3} > {4}".format(gene_dir, snp_coords[1][1], int(
        (x[0] - buffer) * 1000000), int((x[-1] + buffer) * 1000000), tmp_dir + "genes_" + request + ".txt")
    subprocess.call(tabix_gene, shell=True)
    filename = tmp_dir + "genes_" + request + ".txt"
    genes_raw = open(filename).readlines()

    genes_plot_start = []
    genes_plot_end = []
    genes_plot_y = []
    genes_plot_name = []
    exons_plot_x = []
    exons_plot_y = []
    exons_plot_w = []
    exons_plot_h = []
    exons_plot_name = []
    exons_plot_id = []
    exons_plot_exon = []
    message = ["Too many genes to plot."]
    lines = [0]
    gap = 80000
    tall = 0.75
    if genes_raw != None:
        for i in range(len(genes_raw)):
            bin, name_id, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts, exonEnds, score, name2, cdsStartStat, cdsEndStat, exonFrames = genes_raw[
                i].strip().split()
            name = name2
            id = name_id
            e_start = exonStarts.split(",")
            e_end = exonEnds.split(",")

            # Determine Y Coordinate
            i = 0
            y_coord = None
            while y_coord == None:
                if i > len(lines) - 1:
                    y_coord = i + 1
                    lines.append(int(txEnd))
                elif int(txStart) > (gap + lines[i]):
                    y_coord = i + 1
                    lines[i] = int(txEnd)
                else:
                    i += 1

            genes_plot_start.append(int(txStart) / 1000000.0)
            genes_plot_end.append(int(txEnd) / 1000000.0)
            genes_plot_y.append(y_coord)
            genes_plot_name.append(name + "  ")

            for i in range(len(e_start) - 1):
                if strand == "+":
                    exon = i + 1
                else:
                    exon = len(e_start) - 1 - i

                width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                exons_plot_x.append(x_coord)
                exons_plot_y.append(y_coord)
                exons_plot_w.append(width)
                exons_plot_h.append(tall)
                exons_plot_name.append(name)
                exons_plot_id.append(id)
                exons_plot_exon.append(exon)

    n_rows = len(lines)
    genes_plot_yn = [n_rows - w + 0.5 for w in genes_plot_y]
    exons_plot_yn = [n_rows - w + 0.5 for w in exons_plot_y]
    yr2 = Range1d(start=0, end=n_rows)

    data_gene_plot = {
        'exons_plot_x': exons_plot_x,
        'exons_plot_yn': exons_plot_yn,
        'exons_plot_w': exons_plot_w,
        'exons_plot_h': exons_plot_h,
        'exons_plot_name': exons_plot_name,
        'exons_plot_id': exons_plot_id,
        'exons_plot_exon': exons_plot_exon,
        'coord_snps_plot': coord_snps_plot,
        'snp_id_plot': snp_id_plot,
        'alleles_snp_plot': alleles_snp_plot
    }

    source_gene_plot = ColumnDataSource(data_gene_plot)

    max_genes = 40
    if len(lines) < 3 or len(genes_raw) > max_genes:
        plot_h_pix = 150
    else:
        plot_h_pix = 150 + (len(lines) - 2) * 50

    gene_plot = figure(min_border_top=2, min_border_bottom=0, min_border_left=100, min_border_right=5,
                       x_range=xr, y_range=yr2, border_fill_color='white',
                       title="", h_symmetry=False, v_symmetry=False, logo=None,
                       plot_width=800, plot_height=plot_h_pix, tools="hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

    if len(genes_raw) <= max_genes:
        gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end,
                          genes_plot_yn, color="black", alpha=1, line_width=2)
        gene_plot.rect(x='exons_plot_x', y='exons_plot_yn', width='exons_plot_w', height='exons_plot_h',
                        source=source_gene_plot, fill_color='grey', line_color="grey")
        gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt",
                       text_font_style="bold", text_baseline="middle", text_align="right", angle=0)
        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_plot_name"),
            ("ID", "@exons_plot_id"),
            ("Exon", "@exons_plot_exon"),
        ])

    else:
        x_coord_text = x[0] + (x[-1] - x[0]) / 2.0
        gene_plot.text(x_coord_text, n_rows / 2.0, text=message, alpha=1,
                       text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

    gene_plot.xaxis.axis_label = "Chromosome " + \
        snp_coords[1][1] + " Coordinate (Mb)(GRCh37)"
    gene_plot.yaxis.axis_label = "Genes"
    gene_plot.ygrid.grid_line_color = None
    gene_plot.yaxis.axis_line_color = None
    gene_plot.yaxis.minor_tick_line_color = None
    gene_plot.yaxis.major_tick_line_color = None
    gene_plot.yaxis.major_label_text_color = None

    gene_plot.toolbar_location = "below"

    # Change output backend to SVG temporarily for headless export
    # Will be changed back to canvas in LDlink.js
    matrix_plot.output_backend = "svg"
    rug.output_backend = "svg"
    gene_plot.output_backend = "svg"
    export_svgs(matrix_plot, filename=tmp_dir + "matrix_plot_1_" + request + ".svg")
    export_svgs(gene_plot, filename=tmp_dir + "gene_plot_1_" + request + ".svg")

    # Concatenate svgs
    sg.Figure("21.59cm", "27.94cm",
        sg.SVG(tmp_dir + "matrix_plot_1_" + request + ".svg"),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").move(0, 720)
        ).save(tmp_dir + "matrix_plot_" + request + ".svg")

    sg.Figure("107.95cm", "139.70cm",
        sg.SVG(tmp_dir + "matrix_plot_1_" + request + ".svg").scale(5),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(5).move(0, 3600)
        ).save(tmp_dir + "matrix_plot_scaled_" + request + ".svg")

    # Export to PDF
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "matrix_plot_" + request + ".svg " + tmp_dir + "matrix_plot_" + request + ".pdf", shell=True)
    # Export to PNG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "matrix_plot_scaled_" + request + ".svg " + tmp_dir + "matrix_plot_" + request + ".png", shell=True)
    # Export to JPEG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "matrix_plot_scaled_" + request + ".svg " + tmp_dir + "matrix_plot_" + request + ".jpeg", shell=True)    
    # Remove individual SVG files after they are combined
    subprocess.call("rm " + tmp_dir + "matrix_plot_1_" + request + ".svg", shell=True)
    subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg", shell=True)
    # Remove scaled SVG file after it is converted to png and jpeg
    subprocess.call("rm " + tmp_dir + "matrix_plot_scaled_" + request + ".svg", shell=True)

    reset_output()

    return None
Ejemplo n.º 12
0
import svgutils.compose as cg
from tqdm import tqdm


for c in tqdm([1,2,4,9,18]):

    wh = str(16*c/12)+"cm"
    cg.Figure(wh,wh,*[cg.SVG('img/cpu.svg').scale(3) for __ in range(c*c)]).tile(c,c).save("img/cpugrids/cpu1-"+str(c)+".svg")
Ejemplo n.º 13
0
def calculate_matrix_svg(snplst,
                         pop,
                         request,
                         genome_build,
                         r2_d="r2",
                         collapseTranscript=True):

    # Set data directories using config.yml
    with open('config.yml', 'r') as yml_file:
        config = yaml.load(yml_file)
    env = config['env']
    api_mongo_addr = config['api']['api_mongo_addr']
    population_samples_dir = config['data']['population_samples_dir']
    data_dir = config['data']['data_dir']
    tmp_dir = config['data']['tmp_dir']
    genotypes_dir = config['data']['genotypes_dir']
    aws_info = config['aws']
    mongo_username = config['database']['mongo_user_readonly']
    mongo_password = config['database']['mongo_password']
    mongo_port = config['database']['mongo_port']

    export_s3_keys = retrieveAWSCredentials()

    # Ensure tmp directory exists
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    # Open SNP list file
    snps_raw = open(snplst).readlines()

    # Remove duplicate RS numbers
    snps = []
    for snp_raw in snps_raw:
        snp = snp_raw.strip().split()
        if snp not in snps:
            snps.append(snp)

    # Select desired ancestral populations
    pops = pop.split("+")
    pop_dirs = []
    for pop_i in pops:
        if pop_i in [
                "ALL", "AFR", "AMR", "EAS", "EUR", "SAS", "ACB", "ASW", "BEB",
                "CDX", "CEU", "CHB", "CHS", "CLM", "ESN", "FIN", "GBR", "GIH",
                "GWD", "IBS", "ITU", "JPT", "KHV", "LWK", "MSL", "MXL", "PEL",
                "PJL", "PUR", "STU", "TSI", "YRI"
        ]:
            pop_dirs.append(data_dir + population_samples_dir + pop_i + ".txt")

    get_pops = "cat " + " ".join(pop_dirs)
    pop_list = [
        x.decode('utf-8') for x in subprocess.Popen(
            get_pops, shell=True, stdout=subprocess.PIPE).stdout.readlines()
    ]

    ids = [i.strip() for i in pop_list]
    pop_ids = list(set(ids))

    # Connect to Mongo snp database
    if env == 'local':
        mongo_host = api_mongo_addr
    else:
        mongo_host = 'localhost'
    client = MongoClient(
        'mongodb://' + mongo_username + ':' + mongo_password + '@' +
        mongo_host + '/admin', mongo_port)
    db = client["LDLink"]

    def get_coords(db, rsid):
        rsid = rsid.strip("rs")
        query_results = db.dbsnp.find_one({"id": rsid})
        query_results_sanitized = json.loads(json_util.dumps(query_results))
        return query_results_sanitized

    # Query genomic coordinates
    def get_rsnum(db, coord):
        temp_coord = coord.strip("chr").split(":")
        chro = temp_coord[0]
        pos = temp_coord[1]
        query_results = db.dbsnp.find({
            "chromosome":
            chro.upper() if chro == 'x' or chro == 'y' else str(chro),
            genome_build_vars[genome_build]['position']:
            str(pos)
        })
        query_results_sanitized = json.loads(json_util.dumps(query_results))
        return query_results_sanitized

    # Replace input genomic coordinates with variant ids (rsids)
    def replace_coords_rsid(db, snp_lst):
        new_snp_lst = []
        for snp_raw_i in snp_lst:
            if snp_raw_i[0][0:2] == "rs":
                new_snp_lst.append(snp_raw_i)
            else:
                snp_info_lst = get_rsnum(db, snp_raw_i[0])
                print("snp_info_lst")
                print(snp_info_lst)
                if snp_info_lst != None:
                    if len(snp_info_lst) > 1:
                        var_id = "rs" + snp_info_lst[0]['id']
                        ref_variants = []
                        for snp_info in snp_info_lst:
                            if snp_info['id'] == snp_info['ref_id']:
                                ref_variants.append(snp_info['id'])
                        if len(ref_variants) > 1:
                            var_id = "rs" + ref_variants[0]
                        elif len(ref_variants) == 0 and len(snp_info_lst) > 1:
                            var_id = "rs" + snp_info_lst[0]['id']
                        else:
                            var_id = "rs" + ref_variants[0]
                        new_snp_lst.append([var_id])
                    elif len(snp_info_lst) == 1:
                        var_id = "rs" + snp_info_lst[0]['id']
                        new_snp_lst.append([var_id])
                    else:
                        new_snp_lst.append(snp_raw_i)
                else:
                    new_snp_lst.append(snp_raw_i)
        return new_snp_lst

    snps = replace_coords_rsid(db, snps)

    # Find RS numbers in snp database
    rs_nums = []
    snp_pos = []
    snp_coords = []
    tabix_coords = ""
    for snp_i in snps:
        if len(snp_i) > 0:
            if len(snp_i[0]) > 2:
                if (snp_i[0][0:2] == "rs"
                        or snp_i[0][0:3] == "chr") and snp_i[0][-1].isdigit():
                    snp_coord = get_coords(db, snp_i[0])
                    if snp_coord != None and snp_coord[genome_build_vars[
                            genome_build]['position']] != "NA":
                        # check if variant is on chrY for genome build = GRCh38
                        if not (snp_coord['chromosome'] == "Y" and
                                (genome_build == "grch38"
                                 or genome_build == "grch38_high_coverage")):
                            rs_nums.append(snp_i[0])
                            snp_pos.append(snp_coord[
                                genome_build_vars[genome_build]['position']])
                            temp = [
                                snp_i[0], snp_coord['chromosome'],
                                snp_coord[genome_build_vars[genome_build]
                                          ['position']]
                            ]
                            snp_coords.append(temp)

    # Check max distance between SNPs
    distance_bp = []
    for i in range(len(snp_coords)):
        distance_bp.append(int(snp_coords[i][2]))

    # Sort coordinates and make tabix formatted coordinates
    snp_pos_int = [int(i) for i in snp_pos]
    snp_pos_int.sort()
    snp_coord_str = [
        genome_build_vars[genome_build]['1000G_chr_prefix'] +
        snp_coords[0][1] + ":" + str(i) + "-" + str(i) for i in snp_pos_int
    ]
    tabix_coords = " " + " ".join(snp_coord_str)

    # Extract 1000 Genomes phased genotypes
    vcf_filePath = "%s/%s%s/%s" % (
        config['aws']['data_subfolder'], genotypes_dir,
        genome_build_vars[genome_build]['1000G_dir'],
        genome_build_vars[genome_build]['1000G_file'] % (snp_coords[0][1]))
    vcf_query_snp_file = "s3://%s/%s" % (config['aws']['bucket'], vcf_filePath)

    checkS3File(aws_info, config['aws']['bucket'], vcf_filePath)

    # Define function to correct indel alleles
    def set_alleles(a1, a2):
        if len(a1) == 1 and len(a2) == 1:
            a1_n = a1
            a2_n = a2
        elif len(a1) == 1 and len(a2) > 1:
            a1_n = "-"
            a2_n = a2[1:]
        elif len(a1) > 1 and len(a2) == 1:
            a1_n = a1[1:]
            a2_n = "-"
        elif len(a1) > 1 and len(a2) > 1:
            a1_n = a1[1:]
            a2_n = a2[1:]
        return (a1_n, a2_n)

    # Import SNP VCF files
    tabix_snps = export_s3_keys + " cd {2}; tabix -fhD {0}{1} | grep -v -e END".format(
        vcf_query_snp_file, tabix_coords, data_dir + genotypes_dir +
        genome_build_vars[genome_build]['1000G_dir'])
    vcf = [
        x.decode('utf-8') for x in subprocess.Popen(
            tabix_snps, shell=True, stdout=subprocess.PIPE).stdout.readlines()
    ]

    h = 0
    while vcf[h][0:2] == "##":
        h += 1

    head = vcf[h].strip().split()

    # Extract haplotypes
    index = []
    for i in range(9, len(head)):
        if head[i] in pop_ids:
            index.append(i)

    hap1 = [[]]
    for i in range(len(index) - 1):
        hap1.append([])
    hap2 = [[]]
    for i in range(len(index) - 1):
        hap2.append([])

    rsnum_lst = []
    allele_lst = []
    pos_lst = []

    for g in range(h + 1, len(vcf)):
        geno = vcf[g].strip().split()
        geno[0] = geno[0].lstrip('chr')
        if geno[1] not in snp_pos:
            continue

        if snp_pos.count(geno[1]) == 1:
            rs_query = rs_nums[snp_pos.index(geno[1])]

        else:
            pos_index = []
            for p in range(len(snp_pos)):
                if snp_pos[p] == geno[1]:
                    pos_index.append(p)
            for p in pos_index:
                if rs_nums[p] not in rsnum_lst:
                    rs_query = rs_nums[p]
                    break

        if rs_query in rsnum_lst:
            continue

        rs_1000g = geno[2]

        if rs_query == rs_1000g:
            rsnum = rs_1000g
        else:
            count = -2
            found = "false"
            while count <= 2 and count + g < len(vcf):
                geno_next = vcf[g + count].strip().split()
                geno_next[0] = geno_next[0].lstrip('chr')
                if len(geno_next) >= 3 and rs_query == geno_next[2]:
                    found = "true"
                    break
                count += 1

            if found == "false":
                indx = [i[0] for i in snps].index(rs_query)
                # snps[indx][0] = geno[2]
                # rsnum = geno[2]
                snps[indx][0] = rs_query
                rsnum = rs_query
            else:
                continue

        if "," not in geno[3] and "," not in geno[4]:
            a1, a2 = set_alleles(geno[3], geno[4])
            for i in range(len(index)):
                if geno[index[i]] == "0|0":
                    hap1[i].append(a1)
                    hap2[i].append(a1)
                elif geno[index[i]] == "0|1":
                    hap1[i].append(a1)
                    hap2[i].append(a2)
                elif geno[index[i]] == "1|0":
                    hap1[i].append(a2)
                    hap2[i].append(a1)
                elif geno[index[i]] == "1|1":
                    hap1[i].append(a2)
                    hap2[i].append(a2)
                elif geno[index[i]] == "0":
                    hap1[i].append(a1)
                    hap2[i].append(".")
                elif geno[index[i]] == "1":
                    hap1[i].append(a2)
                    hap2[i].append(".")
                else:
                    hap1[i].append(".")
                    hap2[i].append(".")

            rsnum_lst.append(rsnum)

            position = "chr" + geno[0] + ":" + geno[1] + "-" + geno[1]
            pos_lst.append(position)
            alleles = a1 + "/" + a2
            allele_lst.append(alleles)

    # Calculate Pairwise LD Statistics
    all_haps = hap1 + hap2
    ld_matrix = [[[None for v in range(2)] for i in range(len(all_haps[0]))]
                 for j in range(len(all_haps[0]))]

    for i in range(len(all_haps[0])):
        for j in range(i, len(all_haps[0])):
            hap = {}
            for k in range(len(all_haps)):
                # Extract haplotypes
                hap_k = all_haps[k][i] + all_haps[k][j]
                if hap_k in hap:
                    hap[hap_k] += 1
                else:
                    hap[hap_k] = 1

            # Remove Missing Haplotypes
            keys = list(hap.keys())
            for key in keys:
                if "." in key:
                    hap.pop(key, None)

            # Check all haplotypes are present
            if len(hap) != 4:
                snp_i_a = allele_lst[i].split("/")
                snp_j_a = allele_lst[j].split("/")
                haps = [
                    snp_i_a[0] + snp_j_a[0], snp_i_a[0] + snp_j_a[1],
                    snp_i_a[1] + snp_j_a[0], snp_i_a[1] + snp_j_a[1]
                ]
                for h in haps:
                    if h not in hap:
                        hap[h] = 0

            # Perform LD calculations
            A = hap[sorted(hap)[0]]
            B = hap[sorted(hap)[1]]
            C = hap[sorted(hap)[2]]
            D = hap[sorted(hap)[3]]
            tmax = max(A, B, C, D)
            delta = float(A * D - B * C)
            Ms = float((A + C) * (B + D) * (A + B) * (C + D))
            if Ms != 0:
                # D prime
                if delta < 0:
                    D_prime = round(
                        abs(delta / min((A + C) * (A + B), (B + D) * (C + D))),
                        3)
                else:
                    D_prime = round(
                        abs(delta / min((A + C) * (C + D), (A + B) * (B + D))),
                        3)

                # R2
                r2 = round((delta**2) / Ms, 3)

                # Find Correlated Alleles
                if str(r2) != "NA" and float(r2) > 0.1:
                    Ac = hap[sorted(hap)[0]]
                    Bc = hap[sorted(hap)[1]]
                    Cc = hap[sorted(hap)[2]]
                    Dc = hap[sorted(hap)[3]]

                    if ((Ac * Dc) / max((Bc * Cc), 0.01) > 1):
                        match = sorted(hap)[0][0] + "=" + sorted(
                            hap)[0][1] + "," + sorted(
                                hap)[3][0] + "=" + sorted(hap)[3][1]
                    else:
                        match = sorted(hap)[1][0] + "=" + sorted(
                            hap)[1][1] + "," + sorted(
                                hap)[2][0] + "=" + sorted(hap)[2][1]
                else:
                    match = "  =  ,  =  "
            else:
                D_prime = "NA"
                r2 = "NA"
                match = "  =  ,  =  "

            snp1 = rsnum_lst[i]
            snp2 = rsnum_lst[j]
            pos1 = pos_lst[i].split("-")[0]
            pos2 = pos_lst[j].split("-")[0]
            allele1 = allele_lst[i]
            allele2 = allele_lst[j]
            corr = match.split(",")[0].split("=")[1] + "=" + match.split(
                ",")[0].split("=")[0] + "," + match.split(",")[1].split(
                    "=")[1] + "=" + match.split(",")[1].split("=")[0]
            corr_f = match

            ld_matrix[i][j] = [
                snp1, snp2, allele1, allele2, corr, pos1, pos2, D_prime, r2
            ]
            ld_matrix[j][i] = [
                snp2, snp1, allele2, allele1, corr_f, pos2, pos1, D_prime, r2
            ]

    # Generate Plot Variables
    out = [j for i in ld_matrix for j in i]
    xnames = []
    ynames = []
    xA = []
    yA = []
    corA = []
    xpos = []
    ypos = []
    D = []
    R = []
    box_color = []
    box_trans = []

    if r2_d not in ["r2", "d"]:
        r2_d = "r2"

    for i in range(len(out)):
        snp1, snp2, allele1, allele2, corr, pos1, pos2, D_prime, r2 = out[i]
        xnames.append(snp1)
        ynames.append(snp2)
        xA.append(allele1)
        yA.append(allele2)
        corA.append(corr)
        xpos.append(pos1)
        ypos.append(pos2)
        sqrti = math.floor(math.sqrt(len(out)))
        if sqrti == 0:
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("red")
            box_trans.append(r2)
        elif i % sqrti < i // sqrti and r2 != "NA":
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("blue")
            box_trans.append(abs(D_prime))
        elif i % sqrti > i // sqrti and D_prime != "NA":
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("red")
            box_trans.append(r2)
        elif i % sqrti == i // sqrti and D_prime != "NA":
            D.append(str(round(float(D_prime), 4)))
            R.append(str(round(float(r2), 4)))
            box_color.append("purple")
            box_trans.append(r2)
        else:
            D.append("NA")
            R.append("NA")
            box_color.append("gray")
            box_trans.append(0.1)
    # Import plotting modules
    from collections import OrderedDict
    from bokeh.embed import components, file_html
    from bokeh.layouts import gridplot
    from bokeh.models import HoverTool, LinearAxis, Range1d
    from bokeh.plotting import ColumnDataSource, curdoc, figure, output_file, reset_output, save
    from bokeh.resources import CDN
    from bokeh.io import export_svgs
    import svgutils.compose as sg
    from math import pi

    reset_output()

    # Aggregate Plotting Data
    x = []
    y = []
    w = []
    h = []
    coord_snps_plot = []
    snp_id_plot = []
    alleles_snp_plot = []
    for i in range(0, len(xpos), int(len(xpos)**0.5)):
        x.append(int(xpos[i].split(":")[1]) / 1000000.0)
        y.append(0.5)
        w.append(0.00003)
        h.append(1.06)
        coord_snps_plot.append(xpos[i])
        snp_id_plot.append(xnames[i])
        alleles_snp_plot.append(xA[i])

    buffer = (x[-1] - x[0]) * 0.025
    xr = Range1d(start=x[0] - buffer, end=x[-1] + buffer)
    yr = Range1d(start=-0.03, end=1.03)
    y2_ll = [-0.03] * len(x)
    y2_ul = [1.03] * len(x)

    yr_pos = Range1d(start=(x[-1] + buffer) * -1, end=(x[0] - buffer) * -1)
    yr0 = Range1d(start=0, end=1)
    yr2 = Range1d(start=0, end=3.8)
    yr3 = Range1d(start=0, end=1)

    spacing = (x[-1] - x[0] + buffer + buffer) / (len(x) * 1.0)
    x2 = []
    y0 = []
    y1 = []
    y2 = []
    y3 = []
    y4 = []
    for i in range(len(x)):
        x2.append(x[0] - buffer + spacing * (i + 0.5))
        y0.append(0)
        y1.append(0.20)
        y2.append(0.80)
        y3.append(1)
        y4.append(1.15)

    xname_pos = []
    for i in x2:
        for j in range(len(x2)):
            xname_pos.append(i)

    data = {
        'xname': xnames,
        'xname_pos': xname_pos,
        'yname': ynames,
        'xA': xA,
        'yA': yA,
        'xpos': xpos,
        'ypos': ypos,
        'R2': R,
        'Dp': D,
        'corA': corA,
        'box_color': box_color,
        'box_trans': box_trans
    }

    source = ColumnDataSource(data)

    threshold = 70
    if len(snps) < threshold:
        matrix_plot = figure(
            outline_line_color="white",
            min_border_top=0,
            min_border_bottom=2,
            min_border_left=100,
            min_border_right=5,
            x_range=xr,
            y_range=list(reversed(rsnum_lst)),
            h_symmetry=False,
            v_symmetry=False,
            border_fill_color='white',
            x_axis_type=None,
            logo=None,
            tools="hover,undo,redo,reset,pan,box_zoom,previewsave",
            title=" ",
            plot_width=800,
            plot_height=700)

    else:
        matrix_plot = figure(
            outline_line_color="white",
            min_border_top=0,
            min_border_bottom=2,
            min_border_left=100,
            min_border_right=5,
            x_range=xr,
            y_range=list(reversed(rsnum_lst)),
            h_symmetry=False,
            v_symmetry=False,
            border_fill_color='white',
            x_axis_type=None,
            y_axis_type=None,
            logo=None,
            tools="hover,undo,redo,reset,pan,box_zoom,previewsave",
            title=" ",
            plot_width=800,
            plot_height=700)

    matrix_plot.rect(x='xname_pos',
                     y='yname',
                     width=0.95 * spacing,
                     height=0.95,
                     source=source,
                     color="box_color",
                     alpha="box_trans",
                     line_color=None)

    matrix_plot.grid.grid_line_color = None
    matrix_plot.axis.axis_line_color = None
    matrix_plot.axis.major_tick_line_color = None
    if len(snps) < threshold:
        matrix_plot.axis.major_label_text_font_size = "8pt"
        matrix_plot.xaxis.major_label_orientation = "vertical"

    matrix_plot.axis.major_label_text_font_style = "normal"
    matrix_plot.xaxis.major_label_standoff = 0

    sup_2 = "\u00B2"

    hover = matrix_plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("Variant 1", " " + "@yname (@yA)"),
        ("Variant 2", " " + "@xname (@xA)"),
        ("D\'", " " + "@Dp"),
        ("R" + sup_2, " " + "@R2"),
        ("Correlated Alleles", " " + "@corA"),
    ])

    # Connecting and Rug Plots
    # Connector Plot
    if len(snps) < threshold:
        connector = figure(outline_line_color="white",
                           y_axis_type=None,
                           x_axis_type=None,
                           x_range=xr,
                           y_range=yr2,
                           border_fill_color='white',
                           title="",
                           min_border_left=100,
                           min_border_right=5,
                           min_border_top=0,
                           min_border_bottom=0,
                           h_symmetry=False,
                           v_symmetry=False,
                           plot_width=800,
                           plot_height=90,
                           tools="xpan,tap")
        connector.segment(x, y0, x, y1, color="black")
        connector.segment(x, y1, x2, y2, color="black")
        connector.segment(x2, y2, x2, y3, color="black")
        connector.text(x2,
                       y4,
                       text=snp_id_plot,
                       alpha=1,
                       angle=pi / 2,
                       text_font_size="8pt",
                       text_baseline="middle",
                       text_align="left")
    else:
        connector = figure(outline_line_color="white",
                           y_axis_type=None,
                           x_axis_type=None,
                           x_range=xr,
                           y_range=yr3,
                           border_fill_color='white',
                           title="",
                           min_border_left=100,
                           min_border_right=5,
                           min_border_top=0,
                           min_border_bottom=0,
                           h_symmetry=False,
                           v_symmetry=False,
                           plot_width=800,
                           plot_height=30,
                           tools="xpan,tap")
        connector.segment(x, y0, x, y1, color="black")
        connector.segment(x, y1, x2, y2, color="black")
        connector.segment(x2, y2, x2, y3, color="black")

    connector.yaxis.major_label_text_color = None
    connector.yaxis.minor_tick_line_alpha = 0  # Option does not work
    connector.yaxis.axis_label = " "
    connector.grid.grid_line_color = None
    connector.axis.axis_line_color = None
    connector.axis.major_tick_line_color = None
    connector.axis.minor_tick_line_color = None

    connector.toolbar_location = None

    data_rug = {
        'x': x,
        'y': y,
        'w': w,
        'h': h,
        'coord_snps_plot': coord_snps_plot,
        'snp_id_plot': snp_id_plot,
        'alleles_snp_plot': alleles_snp_plot
    }

    source_rug = ColumnDataSource(data_rug)

    # Rug Plot
    rug = figure(x_range=xr,
                 y_range=yr,
                 y_axis_type=None,
                 title="",
                 min_border_top=1,
                 min_border_bottom=0,
                 min_border_left=100,
                 min_border_right=5,
                 h_symmetry=False,
                 v_symmetry=False,
                 plot_width=800,
                 plot_height=50,
                 tools="hover,xpan,tap")
    rug.rect(x='x',
             y='y',
             width='w',
             height='h',
             fill_color='red',
             dilate=True,
             line_color=None,
             fill_alpha=0.6,
             source=source_rug)

    hover = rug.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("SNP", "@snp_id_plot (@alleles_snp_plot)"),
        ("Coord", "@coord_snps_plot"),
    ])

    rug.toolbar_location = None

    if collapseTranscript == "false":
        # Gene Plot (All Transcripts)
        genes_file = tmp_dir + "genes_" + request + ".json"
        genes_raw = open(genes_file).readlines()

        genes_plot_start = []
        genes_plot_end = []
        genes_plot_y = []
        genes_plot_name = []
        exons_plot_x = []
        exons_plot_y = []
        exons_plot_w = []
        exons_plot_h = []
        exons_plot_name = []
        exons_plot_id = []
        exons_plot_exon = []
        message = ["Too many genes to plot."]
        lines = [0]
        gap = 80000
        tall = 0.75
        if genes_raw != None and len(genes_raw) > 0:
            for gene_raw_obj in genes_raw:
                gene_obj = json.loads(gene_raw_obj)
                bin = gene_obj["bin"]
                name_id = gene_obj["name"]
                chrom = gene_obj["chrom"]
                strand = gene_obj["strand"]
                txStart = gene_obj["txStart"]
                txEnd = gene_obj["txEnd"]
                cdsStart = gene_obj["cdsStart"]
                cdsEnd = gene_obj["cdsEnd"]
                exonCount = gene_obj["exonCount"]
                exonStarts = gene_obj["exonStarts"]
                exonEnds = gene_obj["exonEnds"]
                score = gene_obj["score"]
                name2 = gene_obj["name2"]
                cdsStartStat = gene_obj["cdsStartStat"]
                cdsEndStat = gene_obj["cdsEndStat"]
                exonFrames = gene_obj["exonFrames"]
                name = name2
                id = name_id
                e_start = exonStarts.split(",")
                e_end = exonEnds.split(",")

                # Determine Y Coordinate
                i = 0
                y_coord = None
                while y_coord == None:
                    if i > len(lines) - 1:
                        y_coord = i + 1
                        lines.append(int(txEnd))
                    elif int(txStart) > (gap + lines[i]):
                        y_coord = i + 1
                        lines[i] = int(txEnd)
                    else:
                        i += 1

                genes_plot_start.append(int(txStart) / 1000000.0)
                genes_plot_end.append(int(txEnd) / 1000000.0)
                genes_plot_y.append(y_coord)
                genes_plot_name.append(name + "  ")

                for i in range(len(e_start) - 1):
                    if strand == "+":
                        exon = i + 1
                    else:
                        exon = len(e_start) - 1 - i

                    width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                    x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                    exons_plot_x.append(x_coord)
                    exons_plot_y.append(y_coord)
                    exons_plot_w.append(width)
                    exons_plot_h.append(tall)
                    exons_plot_name.append(name)
                    exons_plot_id.append(id)
                    exons_plot_exon.append(exon)

        n_rows = len(lines)
        genes_plot_yn = [n_rows - w + 0.5 for w in genes_plot_y]
        exons_plot_yn = [n_rows - w + 0.5 for w in exons_plot_y]
        yr2 = Range1d(start=0, end=n_rows)

        data_gene_plot = {
            'exons_plot_x': exons_plot_x,
            'exons_plot_yn': exons_plot_yn,
            'exons_plot_w': exons_plot_w,
            'exons_plot_h': exons_plot_h,
            'exons_plot_name': exons_plot_name,
            'exons_plot_id': exons_plot_id,
            'exons_plot_exon': exons_plot_exon,
            'coord_snps_plot': coord_snps_plot,
            'snp_id_plot': snp_id_plot,
            'alleles_snp_plot': alleles_snp_plot
        }

        source_gene_plot = ColumnDataSource(data_gene_plot)

        max_genes = 40
        # if len(lines) < 3 or len(genes_raw) > max_genes:
        if len(lines) < 3:
            plot_h_pix = 250
        else:
            plot_h_pix = 250 + (len(lines) - 2) * 50

        gene_plot = figure(
            min_border_top=2,
            min_border_bottom=0,
            min_border_left=100,
            min_border_right=5,
            x_range=xr,
            y_range=yr2,
            border_fill_color='white',
            title="",
            h_symmetry=False,
            v_symmetry=False,
            logo=None,
            plot_width=800,
            plot_height=plot_h_pix,
            tools=
            "hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

        # if len(genes_raw) <= max_genes:
        gene_plot.segment(genes_plot_start,
                          genes_plot_yn,
                          genes_plot_end,
                          genes_plot_yn,
                          color="black",
                          alpha=1,
                          line_width=2)
        gene_plot.rect(x='exons_plot_x',
                       y='exons_plot_yn',
                       width='exons_plot_w',
                       height='exons_plot_h',
                       source=source_gene_plot,
                       fill_color='grey',
                       line_color="grey")
        gene_plot.text(genes_plot_start,
                       genes_plot_yn,
                       text=genes_plot_name,
                       alpha=1,
                       text_font_size="7pt",
                       text_font_style="bold",
                       text_baseline="middle",
                       text_align="right",
                       angle=0)
        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_plot_name"),
            ("ID", "@exons_plot_id"),
            ("Exon", "@exons_plot_exon"),
        ])

        # else:
        #     x_coord_text = x[0] + (x[-1] - x[0]) / 2.0
        #     gene_plot.text(x_coord_text, n_rows / 2.0, text=message, alpha=1,
        #                    text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

        gene_plot.xaxis.axis_label = "Chromosome " + \
            snp_coords[1][1] + " Coordinate (Mb)(" + genome_build_vars[genome_build]['title'] + ")"
        gene_plot.yaxis.axis_label = "Genes (All Transcripts)"
        gene_plot.ygrid.grid_line_color = None
        gene_plot.yaxis.axis_line_color = None
        gene_plot.yaxis.minor_tick_line_color = None
        gene_plot.yaxis.major_tick_line_color = None
        gene_plot.yaxis.major_label_text_color = None

        gene_plot.toolbar_location = "below"

    # Gene Plot (Collapsed)
    else:
        genes_c_file = tmp_dir + "genes_c_" + request + ".json"
        genes_c_raw = open(genes_c_file).readlines()

        genes_c_plot_start = []
        genes_c_plot_end = []
        genes_c_plot_y = []
        genes_c_plot_name = []
        exons_c_plot_x = []
        exons_c_plot_y = []
        exons_c_plot_w = []
        exons_c_plot_h = []
        exons_c_plot_name = []
        exons_c_plot_id = []
        message_c = ["Too many genes to plot."]
        lines_c = [0]
        gap = 80000
        tall = 0.75
        if genes_c_raw != None and len(genes_c_raw) > 0:
            for gene_c_raw_obj in genes_c_raw:
                gene_c_obj = json.loads(gene_c_raw_obj)
                chrom = gene_c_obj["chrom"]
                txStart = gene_c_obj["txStart"]
                txEnd = gene_c_obj["txEnd"]
                exonStarts = gene_c_obj["exonStarts"]
                exonEnds = gene_c_obj["exonEnds"]
                name2 = gene_c_obj["name2"]
                transcripts = gene_c_obj["transcripts"]
                name = name2
                e_start = exonStarts.split(",")
                e_end = exonEnds.split(",")
                e_transcripts = transcripts.split(",")

                # Determine Y Coordinate
                i = 0
                y_coord = None
                while y_coord == None:
                    if i > len(lines_c) - 1:
                        y_coord = i + 1
                        lines_c.append(int(txEnd))
                    elif int(txStart) > (gap + lines_c[i]):
                        y_coord = i + 1
                        lines_c[i] = int(txEnd)
                    else:
                        i += 1

                genes_c_plot_start.append(int(txStart) / 1000000.0)
                genes_c_plot_end.append(int(txEnd) / 1000000.0)
                genes_c_plot_y.append(y_coord)
                genes_c_plot_name.append(name + "  ")

                # for i in range(len(e_start)):
                for i in range(len(e_start) - 1):
                    width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                    x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                    exons_c_plot_x.append(x_coord)
                    exons_c_plot_y.append(y_coord)
                    exons_c_plot_w.append(width)
                    exons_c_plot_h.append(tall)
                    exons_c_plot_name.append(name)
                    exons_c_plot_id.append(e_transcripts[i].replace("-", ","))

        n_rows_c = len(lines_c)
        genes_c_plot_yn = [n_rows_c - x + 0.5 for x in genes_c_plot_y]
        exons_c_plot_yn = [n_rows_c - x + 0.5 for x in exons_c_plot_y]
        yr2_c = Range1d(start=0, end=n_rows_c)

        data_gene_c_plot = {
            'exons_c_plot_x': exons_c_plot_x,
            'exons_c_plot_yn': exons_c_plot_yn,
            'exons_c_plot_w': exons_c_plot_w,
            'exons_c_plot_h': exons_c_plot_h,
            'exons_c_plot_name': exons_c_plot_name,
            'exons_c_plot_id': exons_c_plot_id
        }
        source_gene_c_plot = ColumnDataSource(data_gene_c_plot)
        max_genes_c = 40
        # if len(lines_c) < 3 or len(genes_c_raw) > max_genes_c:
        if len(lines_c) < 3:
            plot_h_pix = 250
        else:
            plot_h_pix = 250 + (len(lines_c) - 2) * 50

        gene_plot = figure(
            min_border_top=2,
            min_border_bottom=0,
            min_border_left=100,
            min_border_right=5,
            x_range=xr,
            y_range=yr2_c,
            border_fill_color='white',
            title="",
            h_symmetry=False,
            v_symmetry=False,
            logo=None,
            plot_width=900,
            plot_height=plot_h_pix,
            tools=
            "hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

        # if len(genes_c_raw) <= max_genes_c:
        gene_plot.segment(genes_c_plot_start,
                          genes_c_plot_yn,
                          genes_c_plot_end,
                          genes_c_plot_yn,
                          color="black",
                          alpha=1,
                          line_width=2)
        gene_plot.rect(x='exons_c_plot_x',
                       y='exons_c_plot_yn',
                       width='exons_c_plot_w',
                       height='exons_c_plot_h',
                       source=source_gene_c_plot,
                       fill_color="grey",
                       line_color="grey")
        gene_plot.text(genes_c_plot_start,
                       genes_c_plot_yn,
                       text=genes_c_plot_name,
                       alpha=1,
                       text_font_size="7pt",
                       text_font_style="bold",
                       text_baseline="middle",
                       text_align="right",
                       angle=0)
        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_c_plot_name"),
            ("Transcript IDs", "@exons_c_plot_id"),
        ])

        # else:
        # 	x_coord_text = coord1/1000000.0 + (coord2/1000000.0 - coord1/1000000.0) / 2.0
        # 	gene_c_plot.text(x_coord_text, n_rows_c / 2.0, text=message_c, alpha=1,
        # 				   text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

        gene_plot.xaxis.axis_label = "Chromosome " + snp_coords[1][
            1] + " Coordinate (Mb)(" + genome_build_vars[genome_build][
                'title'] + ")"
        gene_plot.yaxis.axis_label = "Genes (Transcripts Collapsed)"
        gene_plot.ygrid.grid_line_color = None
        gene_plot.yaxis.axis_line_color = None
        gene_plot.yaxis.minor_tick_line_color = None
        gene_plot.yaxis.major_tick_line_color = None
        gene_plot.yaxis.major_label_text_color = None

        gene_plot.toolbar_location = "below"

    # Change output backend to SVG temporarily for headless export
    # Will be changed back to canvas in LDlink.js
    matrix_plot.output_backend = "svg"
    connector.output_backend = "svg"
    rug.output_backend = "svg"
    gene_plot.output_backend = "svg"
    export_svgs(matrix_plot,
                filename=tmp_dir + "matrix_plot_1_" + request + ".svg")
    export_svgs(connector,
                filename=tmp_dir + "connector_1_" + request + ".svg")
    export_svgs(rug, filename=tmp_dir + "rug_1_" + request + ".svg")
    export_svgs(gene_plot,
                filename=tmp_dir + "gene_plot_1_" + request + ".svg")

    # 1 pixel = 0.0264583333 cm
    svg_height = str(25.00 + (0.0264583333 * plot_h_pix)) + "cm"
    svg_height_scaled = str(110.00 + (0.1322916665 * plot_h_pix)) + "cm"

    # Concatenate svgs
    sg.Figure(
        "21.59cm", svg_height,
        sg.SVG(tmp_dir + "matrix_plot_1_" + request + ".svg"),
        sg.SVG(tmp_dir + "connector_1_" + request + ".svg").scale(.97).move(
            0, 700),
        sg.SVG(tmp_dir + "rug_1_" + request + ".svg").scale(.97).move(0, 790),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(.97).move(
            0, 840)).save(tmp_dir + "matrix_plot_" + request + ".svg")

    sg.Figure(
        "107.95cm", svg_height_scaled,
        sg.SVG(tmp_dir + "matrix_plot_1_" + request + ".svg").scale(5),
        sg.SVG(tmp_dir + "connector_1_" + request + ".svg").scale(4.85).move(
            0, 3500),
        sg.SVG(tmp_dir + "rug_1_" + request + ".svg").scale(4.85).move(
            0, 3930),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(4.85).move(
            0, 4160)).save(tmp_dir + "matrix_plot_scaled_" + request + ".svg")

    # Export to PDF
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "matrix_plot_" +
                    request + ".svg " + tmp_dir + "matrix_plot_" + request +
                    ".pdf",
                    shell=True)
    # Export to PNG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "matrix_plot_scaled_" + request + ".svg " + tmp_dir +
                    "matrix_plot_" + request + ".png",
                    shell=True)
    # Export to JPEG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "matrix_plot_scaled_" + request + ".svg " + tmp_dir +
                    "matrix_plot_" + request + ".jpeg",
                    shell=True)
    # Remove individual SVG files after they are combined
    subprocess.call("rm " + tmp_dir + "matrix_plot_1_" + request + ".svg",
                    shell=True)
    subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg",
                    shell=True)
    subprocess.call("rm " + tmp_dir + "rug_1_" + request + ".svg", shell=True)
    subprocess.call("rm " + tmp_dir + "connector_1_" + request + ".svg",
                    shell=True)
    # Remove scaled SVG file after it is converted to png and jpeg
    subprocess.call("rm " + tmp_dir + "matrix_plot_scaled_" + request + ".svg",
                    shell=True)
    # Remove temporary file(s)
    subprocess.call("rm " + tmp_dir + "genes_*" + request + "*.json",
                    shell=True)

    reset_output()

    return None
Ejemplo n.º 14
0
def calculate_assoc_svg(file, region, pop, request, myargs, myargsName, myargsOrigin):

    # Set data directories using config.yml
    with open('config.yml', 'r') as f:
        config = yaml.load(f)
    gene_dir2 = config['data']['gene_dir2']
    vcf_dir = config['data']['vcf_dir']

    tmp_dir = "./tmp/"


    # Ensure tmp directory exists
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)


    chrs=["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","X","Y"]

    # Define parameters for --variant option
    if region=="variant":
        if myargsOrigin=="None":
            return None
            

    if myargsOrigin!="None":
        # Find coordinates (GRCh37/hg19) for SNP RS number
        if myargsOrigin[0:2]=="rs":
            snp=myargsOrigin

            # Connect to Mongo snp database
            client = MongoClient('mongodb://'+username+':'+password+'@localhost/admin', port)
            db = client["LDLink"]


            def get_coords_var(db, rsid):
                rsid = rsid.strip("rs")
                query_results = db.dbsnp151.find_one({"id": rsid})
                query_results_sanitized = json.loads(json_util.dumps(query_results))
                return query_results_sanitized

            # Find RS number in snp database
            var_coord=get_coords_var(db, snp)

            if var_coord==None:
                return None
                

        elif myargsOrigin.split(":")[0].strip("chr") in chrs and len(myargsOrigin.split(":"))==2:
            snp=myargsOrigin
            var_coord=[None,myargsOrigin.split(":")[0].strip("chr"),myargsOrigin.split(":")[1]]

        else:
            return None
            

        chromosome = var_coord['chromosome']
        org_coord = var_coord['position']


    # Open Association Data
    header_list=[]
    header_list.append(myargs['chr'])
    header_list.append(myargs['bp'])
    header_list.append(myargs['pval'])

    # Load input file
    with open(file) as fp:
        header = fp.readline().strip().split()
        first = fp.readline().strip().split()

    if len(header)!=len(first):
        return None
        

    # Check header
    for item in header_list:
        if item not in header:
            return None
            

    len_head=len(header)

    chr_index=header.index(myargs['chr'])
    pos_index=header.index(myargs['bp'])
    p_index=header.index(myargs['pval'])


    # Define window of interest around query SNP
    if myargs['window']==None:
        if region=="variant":
            window=500000
        elif region=="gene":
            window=100000
        else:
            window=0
    else:
        window=myargs['window']

    if region=="variant":
        coord1=int(org_coord)-window
        if coord1<0:
            coord1=0
        coord2=int(org_coord)+window

    elif region=="gene":
        if myargsName=="None":
            return None
            

        # Connect to gene database
        conn=sqlite3.connect(gene_dir2)
        conn.text_factory=str
        cur=conn.cursor()

        def get_coords_gene(gene_raw):
            gene=gene_raw.upper()
            t=(gene,)
            cur.execute("SELECT * FROM genes WHERE name=?", t)
            return cur.fetchone()

        # Find RS number in snp database
        gene_coord=get_coords_gene(myargsName)

        # Close snp connection
        cur.close()
        conn.close()

        if gene_coord==None:
            return None
            

        # Define search coordinates
        coord1=int(gene_coord[2])-window
        if coord1<0:
            coord1=0
        coord2=int(gene_coord[3])+window

        # Run with --origin option
        if myargsOrigin!="None":
            if gene_coord[1]!=chromosome:
                return None
                
            if coord1>int(org_coord) or int(org_coord)>coord2:
                return None
                
        else:
            chromosome=gene_coord[1]

    elif region=="region":
        if myargs['start']==None:
            return None
            
        if myargs['end']==None:
            return None
            

        # Parse out chr and positions for --region option
        if len(myargs['start'].split(":"))!=2:
            return None
            
        if len(myargs['end'].split(":"))!=2:
            return None
            

        chr_s=myargs['start'].strip("chr").split(":")[0]
        coord_s=myargs['start'].split(":")[1]
        chr_e=myargs['end'].strip("chr").split(":")[0]
        coord_e=myargs['end'].split(":")[1]

        if chr_s not in chrs:
            return None
            
        if chr_e not in chrs:
            return None
            
        if chr_s!=chr_e:
            return None
            
        if coord_s>=coord_e:
            return None
            

        coord1=int(coord_s)-window
        if coord1<0:
            coord1=0
        coord2=int(coord_e)+window

        # Run with --origin option
        if myargsOrigin!="None":
            if chr_s!=chromosome:
                return None
                
            if coord1>int(org_coord) or int(org_coord)>coord2:
                return None
                
        else:
            chromosome=chr_s

    # Generate coordinate list and P-value dictionary
    max_window=3000000
    if coord2-coord1>max_window:
            return None
            

    assoc_coords=[]
    a_pos=[]
    assoc_dict={}
    assoc_list=[]
    with open(file) as fp:
        for line in fp:
            col=line.strip().split()
            if len(col)==len_head:
                if col[chr_index].strip("chr")==chromosome:
                    try:
                        int(col[pos_index])
                    except ValueError:
                        continue
                    else:
                        if coord1<=int(col[pos_index])<=coord2:
                            try:
                                float(col[p_index])
                            except ValueError:
                                continue
                            else:
                                coord_i=col[chr_index].strip("chr")+":"+col[pos_index]+"-"+col[pos_index]
                                assoc_coords.append(coord_i)
                                a_pos.append(col[pos_index])
                                assoc_dict[coord_i]=[col[p_index]]
                                assoc_list.append([coord_i,float(col[p_index])])


    # Coordinate list checks
    if len(assoc_coords)==0:
        return None


    # Get population ids from population output file from LDassoc.py
    pop_list=open(tmp_dir+"pops_"+request+".txt").readlines()
    ids=[]
    for i in range(len(pop_list)):
        ids.append(pop_list[i].strip())

    pop_ids=list(set(ids))


    # Define LD origin coordinate
    try:
        org_coord
    except NameError:
        for var_p in sorted(assoc_list, key=operator.itemgetter(1)):
            snp="chr"+var_p[0].split("-")[0]

            # Extract lowest P SNP phased genotypes
            vcf_file=vcf_dir+chromosome+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"

            tabix_snp_h="tabix -H {0} | grep CHROM".format(vcf_file)
            proc_h=subprocess.Popen(tabix_snp_h, shell=True, stdout=subprocess.PIPE)
            head=[x.decode('utf-8') for x in proc_h.stdout.readlines()][0].strip().split()

            # Check lowest P SNP is in the 1000G population and not monoallelic from LDassoc.py output file
            vcf=open(tmp_dir+"snp_no_dups_"+request+".vcf").readlines()

            if len(vcf)==0:
                continue
            elif len(vcf)>1:
                geno=vcf[0].strip().split()

            else:
                geno=vcf[0].strip().split()

            if "," in geno[3] or "," in geno[4]:
                continue

            index=[]
            for i in range(9,len(head)):
                if head[i] in pop_ids:
                    index.append(i)

            genotypes={"0":0, "1":0}
            for i in index:
                sub_geno=geno[i].split("|")
                for j in sub_geno:
                    if j in genotypes:
                        genotypes[j]+=1
                    else:
                        genotypes[j]=1

            if genotypes["0"]==0 or genotypes["1"]==0:
                continue

            org_coord=var_p[0].split("-")[1]
            break


    else:
        if chromosome+":"+org_coord+"-"+org_coord not in assoc_coords:
            return None
            

        # Extract query SNP phased genotypes
        vcf_file=vcf_dir+chromosome+".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"

        tabix_snp_h="tabix -H {0} | grep CHROM".format(vcf_file)
        proc_h=subprocess.Popen(tabix_snp_h, shell=True, stdout=subprocess.PIPE)
        head=[x.decode('utf-8') for x in proc_h.stdout.readlines()][0].strip().split()

        tabix_snp="tabix {0} {1}:{2}-{2} | grep -v -e END > {3}".format(vcf_file, chromosome, org_coord, tmp_dir+"snp_no_dups_"+request+".vcf")
        subprocess.call(tabix_snp, shell=True)


        # Check query SNP is in the 1000G population, has the correct RS number, and not monoallelic
        vcf=open(tmp_dir+"snp_no_dups_"+request+".vcf").readlines()

        if len(vcf)==0:
            subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
            subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
            return None
            
        elif len(vcf)>1:
            geno=[]
            for i in range(len(vcf)):
                if vcf[i].strip().split()[2]==snp:
                    geno=vcf[i].strip().split()
            if geno==[]:
                subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
                subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
                return None
                
        else:
            geno=vcf[0].strip().split()

        if geno[2]!=snp and snp[0:2]=="rs":
            snp=geno[2]

        if "," in geno[3] or "," in geno[4]:
            subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
            subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
            return None
            


        index=[]
        for i in range(9,len(head)):
            if head[i] in pop_ids:
                index.append(i)

        genotypes={"0":0, "1":0}
        for i in index:
            sub_geno=geno[i].split("|")
            for j in sub_geno:
                if j in genotypes:
                    genotypes[j]+=1
                else:
                    genotypes[j]=1

        if genotypes["0"]==0 or genotypes["1"]==0:
            subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
            subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
            return None
            


    # Calculate proxy LD statistics in parallel
    if len(assoc_coords)<60:
        threads=1
    else:
        threads=4

    block=len(assoc_coords)/threads
    commands=[]
    for i in range(threads):
        if i==min(range(threads)) and i==max(range(threads)):
            command="python LDassoc_sub.py "+snp+" "+chromosome+" "+"_".join(assoc_coords)+" "+request+" "+str(i)
        elif i==min(range(threads)):
            command="python LDassoc_sub.py "+snp+" "+chromosome+" "+"_".join(assoc_coords[:block])+" "+request+" "+str(i)
        elif i==max(range(threads)):
            command="python LDassoc_sub.py "+snp+" "+chromosome+" "+"_".join(assoc_coords[(block*i)+1:])+" "+request+" "+str(i)
        else:
            command="python LDassoc_sub.py "+snp+" "+chromosome+" "+"_".join(assoc_coords[(block*i)+1:block*(i+1)])+" "+request+" "+str(i)
        commands.append(command)


    processes=[subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) for command in commands]

    # collect output in parallel
    def get_output(process):
        return process.communicate()[0].splitlines()

    pool = Pool(len(processes))
    out_raw=pool.map(get_output, processes)
    pool.close()
    pool.join()


    # Aggregate output
    out_prox=[]
    for i in range(len(out_raw)):
        for j in range(len(out_raw[i])):
            col=out_raw[i][j].decode('utf-8').strip().split("\t")
            col[6]=int(col[6])
            col[7]=float(col[7])
            col[8]=float(col[8])
            col.append(abs(int(col[6])))
            pos_i_j=col[5].split(":")[1]
            coord_i_j=chromosome+":"+pos_i_j+"-"+pos_i_j
            if coord_i_j in assoc_dict:
                col.append(float(assoc_dict[coord_i_j][0]))
                out_prox.append(col)


    out_dist_sort=sorted(out_prox, key=operator.itemgetter(14))
    out_p_sort=sorted(out_dist_sort, key=operator.itemgetter(15), reverse=False)

    # Organize scatter plot data
    q_rs=[]
    q_allele=[]
    q_coord=[]
    q_maf=[]
    p_rs=[]
    p_allele=[]
    p_coord=[]
    p_pos=[]
    p_maf=[]
    dist=[]
    d_prime=[]
    d_prime_round=[]
    r2=[]
    r2_round=[]
    corr_alleles=[]
    regdb=[]
    funct=[]
    color=[]
    alpha=[]
    size=[]
    p_val=[]
    neg_log_p=[]
    for i in range(len(out_p_sort)):
        q_rs_i,q_allele_i,q_coord_i,p_rs_i,p_allele_i,p_coord_i,dist_i,d_prime_i,r2_i,corr_alleles_i,regdb_i,q_maf_i,p_maf_i,funct_i,dist_abs,p_val_i=out_p_sort[i]

        q_rs.append(q_rs_i)
        q_allele.append(q_allele_i)
        q_coord.append(float(q_coord_i.split(":")[1])/1000000)
        q_maf.append(str(round(float(q_maf_i),4)))
        if p_rs_i==".":
            p_rs_i=p_coord_i
        p_rs.append(p_rs_i)
        p_allele.append(p_allele_i)
        p_coord.append(float(p_coord_i.split(":")[1])/1000000)
        p_pos.append(p_coord_i.split(":")[1])
        p_maf.append(str(round(float(p_maf_i),4)))
        dist.append(str(round(dist_i/1000000.0,4)))
        d_prime.append(float(d_prime_i))
        d_prime_round.append(str(round(float(d_prime_i),4)))
        r2.append(float(r2_i))
        r2_round.append(str(round(float(r2_i),4)))
        corr_alleles.append(corr_alleles_i)

        # P-value
        p_val.append(p_val_i)
        neg_log_p.append(-log10(p_val_i))

        # Correct Missing Annotations
        if regdb_i==".":
            regdb_i=""
        regdb.append(regdb_i)
        if funct_i==".":
            funct_i=""
        if funct_i=="NA":
            funct_i="none"
        funct.append(funct_i)

        # Set Color
        reds=["#FFCCCC","#FFCACA","#FFC8C8","#FFC6C6","#FFC4C4","#FFC2C2","#FFC0C0","#FFBEBE","#FFBCBC","#FFBABA","#FFB8B8","#FFB6B6","#FFB4B4","#FFB1B1","#FFAFAF","#FFADAD","#FFABAB","#FFA9A9","#FFA7A7","#FFA5A5","#FFA3A3","#FFA1A1","#FF9F9F","#FF9D9D","#FF9B9B","#FF9999","#FF9797","#FF9595","#FF9393","#FF9191","#FF8F8F","#FF8D8D","#FF8B8B","#FF8989","#FF8787","#FF8585","#FF8383","#FF8181","#FF7E7E","#FF7C7C","#FF7A7A","#FF7878","#FF7676","#FF7474","#FF7272","#FF7070","#FF6E6E","#FF6C6C","#FF6A6A","#FF6868","#FF6666","#FF6464","#FF6262","#FF6060","#FF5E5E","#FF5C5C","#FF5A5A","#FF5858","#FF5656","#FF5454","#FF5252","#FF5050","#FF4E4E","#FF4B4B","#FF4949","#FF4747","#FF4545","#FF4343","#FF4141","#FF3F3F","#FF3D3D","#FF3B3B","#FF3939","#FF3737","#FF3535","#FF3333","#FF3131","#FF2F2F","#FF2D2D","#FF2B2B","#FF2929","#FF2727","#FF2525","#FF2323","#FF2121","#FF1F1F","#FF1D1D","#FF1B1B","#FF1818","#FF1616","#FF1414","#FF1212","#FF1010","#FF0E0E","#FF0C0C","#FF0A0A","#FF0808","#FF0606","#FF0404","#FF0202","#FF0000"]
        if q_coord_i==p_coord_i:
            color_i="#0000FF"
            alpha_i=0.7
        else:
            if myargs['dprime']==True:
                color_i=reds[int(d_prime_i*100.0)]
                alpha_i=0.7
            elif myargs['dprime']==False:
                color_i=reds[int(r2_i*100.0)]
                alpha_i=0.7
        color.append(color_i)
        alpha.append(alpha_i)

        # Set Size
        size_i=9+float(p_maf_i)*14.0
        size.append(size_i)


    # Pull out SNPs from association file not found in 1000G
    p_plot_pos=[]
    p_plot_pval=[]
    p_plot_pos2=[]
    p_plot_pval2=[]
    p_plot_dist=[]
    index_var_pos=float(q_coord_i.split(":")[1])/1000000
    for input_pos in a_pos:
        if input_pos not in p_pos:
            p_plot_pos.append(float(input_pos)/1000000)
            p_plot_pval.append(-log10(float(assoc_dict[chromosome+":"+input_pos+"-"+input_pos][0])))
            p_plot_pos2.append("chr"+chromosome+":"+input_pos)
            p_plot_pval2.append(float(assoc_dict[chromosome+":"+input_pos+"-"+input_pos][0]))
            p_plot_dist.append(str(round(float(input_pos)/1000000-index_var_pos,4)))


    # Begin Bokeh Plotting
    from collections import OrderedDict
    from bokeh.embed import components,file_html
    from bokeh.layouts import gridplot
    from bokeh.models import HoverTool,LinearAxis,Range1d
    from bokeh.plotting import ColumnDataSource,curdoc,figure,output_file,reset_output,save
    from bokeh.resources import CDN
    from bokeh.io import export_svgs
    import svgutils.compose as sg

    reset_output()

    data_p = {'p_plot_posX': p_plot_pos, 'p_plot_pvalY': p_plot_pval, 'p_plot_pos2': p_plot_pos2, 'p_plot_pval2': p_plot_pval2, 'p_plot_dist': p_plot_dist}
    source_p = ColumnDataSource(data_p)

    # Assoc Plot
    x=p_coord
    y=neg_log_p

    data = {'x': x, 'y': y, 'qrs': q_rs, 'q_alle': q_allele, 'q_maf': q_maf, 'prs': p_rs, 'p_alle': p_allele, 'p_maf': p_maf, 'dist': dist, 'r': r2_round, 'd': d_prime_round, 'alleles': corr_alleles, 'regdb': regdb, 'funct': funct, 'p_val': p_val, 'size': size, 'color': color, 'alpha': alpha}
    source = ColumnDataSource(data)

    whitespace=0.01
    xr=Range1d(start=coord1/1000000.0-whitespace, end=coord2/1000000.0+whitespace)
    yr=Range1d(start=-0.03, end=max(y)*1.03)
    sup_2="\u00B2"

    assoc_plot=figure(
                title="P-values and Regional LD for "+snp+" in "+pop,
                min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False,
                plot_width=900,
                plot_height=600,
                x_range=xr, y_range=yr,
                tools="tap,pan,box_zoom,wheel_zoom,box_select,undo,redo,reset,previewsave", logo=None,
                toolbar_location="above")

    assoc_plot.title.align="center"

    # Add recombination rate from LDassoc.py output file
    filename=tmp_dir+"recomb_"+request+".txt"
    recomb_raw=open(filename).readlines()
    recomb_x=[]
    recomb_y=[]
    for i in range(len(recomb_raw)):
        chr,pos,rate=recomb_raw[i].strip().split()
        recomb_x.append(int(pos)/1000000.0)
        recomb_y.append(float(rate)/100*max(y))

    assoc_plot.line(recomb_x, recomb_y, line_width=1, color="black", alpha=0.5)

    # Add genome-wide significance
    a = [coord1/1000000.0-whitespace,coord2/1000000.0+whitespace]
    b = [-log10(0.00000005),-log10(0.00000005)]
    assoc_plot.line(a, b, color="blue", alpha=0.5)

    assoc_points_not1000G=assoc_plot.circle(x='p_plot_posX', y='p_plot_pvalY', size=9+float("0.25")*14.0, source=source_p, line_color="gray", fill_color="white")
    assoc_points=assoc_plot.circle(x='x', y='y', size='size', color='color', alpha='alpha', source=source)
    assoc_plot.add_tools(HoverTool(renderers=[assoc_points_not1000G], tooltips=OrderedDict([("Variant", "@p_plot_pos2"), ("P-value", "@p_plot_pval2"), ("Distance (Mb)", "@p_plot_dist")])))

    hover=HoverTool(renderers=[assoc_points])
    hover.tooltips=OrderedDict([
        ("Variant", "@prs @p_alle"),
        ("P-value", "@p_val"),
        ("Distance (Mb)", "@dist"),
        ("MAF", "@p_maf"),
        ("R"+sup_2+" ("+q_rs[0]+")", "@r"),
        ("D\' ("+q_rs[0]+")", "@d"),
        ("Correlated Alleles", "@alleles"),
        ("RegulomeDB", "@regdb"),
        ("Functional Class", "@funct"),
    ])

    assoc_plot.add_tools(hover)

    # Annotate RebulomeDB scores
    if myargs['annotate']==True:
        assoc_plot.text(x, y, text=regdb, alpha=1, text_font_size="7pt", text_baseline="middle", text_align="center", angle=0)

    assoc_plot.yaxis.axis_label="-log10 P-value"

    assoc_plot.extra_y_ranges = {"y2_axis": Range1d(start=-3, end=103)}
    assoc_plot.add_layout(LinearAxis(y_range_name="y2_axis", axis_label="Combined Recombination Rate (cM/Mb)"), "right")  ## Need to confirm units


    # Rug Plot
    y2_ll=[-0.03]*len(x)
    y2_ul=[1.03]*len(x)
    yr_rug=Range1d(start=-0.03, end=1.03)

    data_rug = {'x': x, 'y': y, 'y2_ll': y2_ll, 'y2_ul': y2_ul,'qrs': q_rs, 'q_alle': q_allele, 'q_maf': q_maf, 'prs': p_rs, 'p_alle': p_allele, 'p_maf': p_maf, 'dist': dist, 'r': r2_round, 'd': d_prime_round, 'alleles': corr_alleles, 'regdb': regdb, 'funct': funct, 'p_val': p_val, 'size': size, 'color': color, 'alpha': alpha}
    source_rug = ColumnDataSource(data_rug)

    rug=figure(
            x_range=xr, y_range=yr_rug, border_fill_color='white', y_axis_type=None,
            title="", min_border_top=2, min_border_bottom=2, min_border_left=60, min_border_right=60, h_symmetry=False, v_symmetry=False,
            plot_width=900, plot_height=50, tools="xpan,tap,wheel_zoom", logo=None)

    rug.segment(x0='x', y0='y2_ll', x1='x', y1='y2_ul', source=source_rug, color='color', alpha='alpha', line_width=1)
    rug.toolbar_location=None


    # Gene Plot (All Transcripts)
    if myargs['transcript']==True:
        # Get genes from LDassoc.py output file
        filename=tmp_dir+"genes_"+request+".txt"
        genes_raw=open(filename).readlines()

        genes_plot_start=[]
        genes_plot_end=[]
        genes_plot_y=[]
        genes_plot_name=[]
        exons_plot_x=[]
        exons_plot_y=[]
        exons_plot_w=[]
        exons_plot_h=[]
        exons_plot_name=[]
        exons_plot_id=[]
        exons_plot_exon=[]
        message = ["Too many genes to plot."]
        lines=[0]
        gap=80000
        tall=0.75
        if genes_raw!=None:
            for i in range(len(genes_raw)):
                bin,name_id,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,score,name2,cdsStartStat,cdsEndStat,exonFrames=genes_raw[i].strip().split()
                name=name2
                id=name_id
                e_start=exonStarts.split(",")
                e_end=exonEnds.split(",")

                # Determine Y Coordinate
                i=0
                y_coord=None
                while y_coord==None:
                    if i>len(lines)-1:
                        y_coord=i+1
                        lines.append(int(txEnd))
                    elif int(txStart)>(gap+lines[i]):
                        y_coord=i+1
                        lines[i]=int(txEnd)
                    else:
                        i+=1

                genes_plot_start.append(int(txStart)/1000000.0)
                genes_plot_end.append(int(txEnd)/1000000.0)
                genes_plot_y.append(y_coord)
                genes_plot_name.append(name+"  ")

                for i in range(len(e_start)-1):
                    if strand=="+":
                        exon=i+1
                    else:
                        exon=len(e_start)-1-i

                    width=(int(e_end[i])-int(e_start[i]))/1000000.0
                    x_coord=int(e_start[i])/1000000.0+(width/2)

                    exons_plot_x.append(x_coord)
                    exons_plot_y.append(y_coord)
                    exons_plot_w.append(width)
                    exons_plot_h.append(tall)
                    exons_plot_name.append(name)
                    exons_plot_id.append(id)
                    exons_plot_exon.append(exon)


        n_rows=len(lines)
        genes_plot_yn=[n_rows-x+0.5 for x in genes_plot_y]
        exons_plot_yn=[n_rows-x+0.5 for x in exons_plot_y]
        yr2=Range1d(start=0, end=n_rows)

        data_gene_plot = {'exons_plot_x': exons_plot_x, 'exons_plot_yn': exons_plot_yn, 'exons_plot_w': exons_plot_w, 'exons_plot_h': exons_plot_h,'exons_plot_name': exons_plot_name, 'exons_plot_id': exons_plot_id, 'exons_plot_exon': exons_plot_exon}
        source_gene_plot=ColumnDataSource(data_gene_plot)

        max_genes = 40
        # if len(lines) < 3 or len(genes_raw) > max_genes:
        if len(lines) < 3:
            plot_h_pix = 150
        else:
            plot_h_pix = 150 + (len(lines) - 2) * 50

        gene_plot = figure(min_border_top=2, min_border_bottom=0, min_border_left=100, min_border_right=5,
                            x_range=xr, y_range=yr2, border_fill_color='white',
                            title="", h_symmetry=False, v_symmetry=False, logo=None,
                            plot_width=900, plot_height=plot_h_pix, tools="hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

        # if len(genes_raw) <= max_genes:
        gene_plot.segment(genes_plot_start, genes_plot_yn, genes_plot_end,
                            genes_plot_yn, color="black", alpha=1, line_width=2)
        gene_plot.rect(x='exons_plot_x', y='exons_plot_yn', width='exons_plot_w', height='exons_plot_h',
                        source=source_gene_plot, fill_color="grey", line_color="grey")
        gene_plot.text(genes_plot_start, genes_plot_yn, text=genes_plot_name, alpha=1, text_font_size="7pt",
                        text_font_style="bold", text_baseline="middle", text_align="right", angle=0)
        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_plot_name"),
            ("Transcript ID", "@exons_plot_id"),
            ("Exon", "@exons_plot_exon"),
        ])

        # else:
        #     x_coord_text = coord1/1000000.0 + (coord2/1000000.0 - coord1/1000000.0) / 2.0
        #     gene_plot.text(x_coord_text, n_rows / 2.0, text=message, alpha=1,
        #                     text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

        gene_plot.xaxis.axis_label = "Chromosome " + chromosome + " Coordinate (Mb)(GRCh37)"
        gene_plot.yaxis.axis_label = "Genes (All Transcripts)"
        gene_plot.ygrid.grid_line_color = None
        gene_plot.yaxis.axis_line_color = None
        gene_plot.yaxis.minor_tick_line_color = None
        gene_plot.yaxis.major_tick_line_color = None
        gene_plot.yaxis.major_label_text_color = None

        gene_plot.toolbar_location = "below"

        # Change output backend to SVG temporarily for headless export
        assoc_plot.output_backend = "svg"
        rug.output_backend = "svg"
        gene_plot.output_backend = "svg"
        export_svgs(assoc_plot, filename=tmp_dir + "assoc_plot_1_" + request + ".svg")
        export_svgs(gene_plot, filename=tmp_dir + "gene_plot_1_" + request + ".svg")

        # 1 pixel = 0.0264583333 cm
        svg_height = str(20.00 + (0.0264583333 * plot_h_pix)) + "cm"
        svg_height_scaled = str(100.00 + (0.1322916665 * plot_h_pix)) + "cm"
        
        # Concatenate svgs
        sg.Figure("24.59cm", svg_height,
            sg.SVG(tmp_dir + "assoc_plot_1_" + request + ".svg"),
            sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").move(-40, 630)
            ).save(tmp_dir + "assoc_plot_" + request + ".svg")

        sg.Figure("122.95cm", svg_height_scaled,
            sg.SVG(tmp_dir + "assoc_plot_1_" + request + ".svg").scale(5),
            sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(5).move(-200, 3150)
            ).save(tmp_dir + "assoc_plot_scaled_" + request + ".svg")

        # Export to PDF
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".pdf", shell=True)
        # Export to PNG
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_scaled_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".png", shell=True)
        # Export to JPEG
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_scaled_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".jpeg", shell=True)    
        # Remove individual SVG files after they are combined
        subprocess.call("rm " + tmp_dir + "assoc_plot_1_" + request + ".svg", shell=True)
        subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg", shell=True)
        # Remove scaled SVG file after it is converted to png and jpeg
        subprocess.call("rm " + tmp_dir + "assoc_plot_scaled_" + request + ".svg", shell=True)



    # Gene Plot (Collapsed)
    else:
        # Get genes from LDassoc.py output file
        filename_c=tmp_dir+"genes_c_"+request+".txt"
        genes_c_raw=open(filename_c).readlines()

        genes_c_plot_start=[]
        genes_c_plot_end=[]
        genes_c_plot_y=[]
        genes_c_plot_name=[]
        exons_c_plot_x=[]
        exons_c_plot_y=[]
        exons_c_plot_w=[]
        exons_c_plot_h=[]
        exons_c_plot_name=[]
        exons_c_plot_id=[]
        message_c = ["Too many genes to plot."]
        lines_c=[0]
        gap=80000
        tall=0.75
        if genes_c_raw!=None:
            for i in range(len(genes_c_raw)):
                chrom,txStart,txEnd,name,exonStarts,exonEnds,transcripts=genes_c_raw[i].strip().split()
                e_start=exonStarts.split(",")
                e_end=exonEnds.split(",")
                e_transcripts=transcripts.split(",")

                # Determine Y Coordinate
                i=0
                y_coord=None
                while y_coord==None:
                    if i>len(lines_c)-1:
                        y_coord=i+1
                        lines_c.append(int(txEnd))
                    elif int(txStart)>(gap+lines_c[i]):
                        y_coord=i+1
                        lines_c[i]=int(txEnd)
                    else:
                        i+=1

                genes_c_plot_start.append(int(txStart)/1000000.0)
                genes_c_plot_end.append(int(txEnd)/1000000.0)
                genes_c_plot_y.append(y_coord)
                genes_c_plot_name.append(name+"  ")

                for i in range(len(e_start)):

                    width=(int(e_end[i])-int(e_start[i]))/1000000.0
                    x_coord=int(e_start[i])/1000000.0+(width/2)

                    exons_c_plot_x.append(x_coord)
                    exons_c_plot_y.append(y_coord)
                    exons_c_plot_w.append(width)
                    exons_c_plot_h.append(tall)
                    exons_c_plot_name.append(name)
                    exons_c_plot_id.append(e_transcripts[i].replace("-",","))


        n_rows_c=len(lines_c)
        genes_c_plot_yn=[n_rows_c-x+0.5 for x in genes_c_plot_y]
        exons_c_plot_yn=[n_rows_c-x+0.5 for x in exons_c_plot_y]
        yr2_c=Range1d(start=0, end=n_rows_c)

        data_gene_c_plot = {'exons_c_plot_x': exons_c_plot_x, 'exons_c_plot_yn': exons_c_plot_yn, 'exons_c_plot_w': exons_c_plot_w, 'exons_c_plot_h': exons_c_plot_h, 'exons_c_plot_name': exons_c_plot_name, 'exons_c_plot_id': exons_c_plot_id}
        source_gene_c_plot=ColumnDataSource(data_gene_c_plot)

        max_genes_c = 40
        # if len(lines_c) < 3 or len(genes_c_raw) > max_genes_c:
        if len(lines_c) < 3:
            plot_c_h_pix = 150
        else:
            plot_c_h_pix = 150 + (len(lines_c) - 2) * 50

        gene_c_plot = figure(min_border_top=2, min_border_bottom=0, min_border_left=100, min_border_right=5,
                            x_range=xr, y_range=yr2_c, border_fill_color='white',
                            title="", h_symmetry=False, v_symmetry=False, logo=None,
                            plot_width=900, plot_height=plot_c_h_pix, tools="hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

        # if len(genes_c_raw) <= max_genes_c:
        gene_c_plot.segment(genes_c_plot_start, genes_c_plot_yn, genes_c_plot_end,
                            genes_c_plot_yn, color="black", alpha=1, line_width=2)
        gene_c_plot.rect(x='exons_c_plot_x', y='exons_c_plot_yn', width='exons_c_plot_w', height='exons_c_plot_h',
                        source=source_gene_c_plot, fill_color="grey", line_color="grey")
        gene_c_plot.text(genes_c_plot_start, genes_c_plot_yn, text=genes_c_plot_name, alpha=1, text_font_size="7pt",
                        text_font_style="bold", text_baseline="middle", text_align="right", angle=0)
        hover = gene_c_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_c_plot_name"),
            ("Transcript IDs", "@exons_c_plot_id"),
        ])

        # else:
        #     x_coord_text = coord1/1000000.0 + (coord2/1000000.0 - coord1/1000000.0) / 2.0
        #     gene_c_plot.text(x_coord_text, n_rows_c / 2.0, text=message_c, alpha=1,
        #                     text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

        gene_c_plot.xaxis.axis_label = "Chromosome " + chromosome + " Coordinate (Mb)(GRCh37)"
        gene_c_plot.yaxis.axis_label = "Genes (Transcripts Collapsed)"
        gene_c_plot.ygrid.grid_line_color = None
        gene_c_plot.yaxis.axis_line_color = None
        gene_c_plot.yaxis.minor_tick_line_color = None
        gene_c_plot.yaxis.major_tick_line_color = None
        gene_c_plot.yaxis.major_label_text_color = None

        gene_c_plot.toolbar_location = "below"
        
        # Change output backend to SVG temporarily for headless export
        assoc_plot.output_backend = "svg"
        rug.output_backend = "svg"
        gene_c_plot.output_backend = "svg"
        export_svgs(assoc_plot, filename=tmp_dir + "assoc_plot_1_" + request + ".svg")
        export_svgs(gene_c_plot, filename=tmp_dir + "gene_plot_1_" + request + ".svg")
        
        # 1 pixel = 0.0264583333 cm
        svg_height = str(20.00 + (0.0264583333 * plot_c_h_pix)) + "cm"
        svg_height_scaled = str(100.00 + (0.1322916665 * plot_c_h_pix)) + "cm"

        # Concatenate svgs
        sg.Figure("24.59cm", svg_height,
            sg.SVG(tmp_dir + "assoc_plot_1_" + request + ".svg"),
            sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").move(-40, 630)
            ).save(tmp_dir + "assoc_plot_" + request + ".svg")

        sg.Figure("122.95cm", svg_height_scaled,
            sg.SVG(tmp_dir + "assoc_plot_1_" + request + ".svg").scale(5),
            sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(5).move(-200, 3150)
            ).save(tmp_dir + "assoc_plot_scaled_" + request + ".svg")

        # Export to PDF
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".pdf", shell=True)
        # Export to PNG
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_scaled_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".png", shell=True)
        # Export to JPEG
        subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "assoc_plot_scaled_" + request + ".svg " + tmp_dir + "assoc_plot_" + request + ".jpeg", shell=True)    
        # Remove individual SVG files after they are combined
        subprocess.call("rm " + tmp_dir + "assoc_plot_1_" + request + ".svg", shell=True)
        subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg", shell=True)
        # Remove scaled SVG file after it is converted to png and jpeg
        subprocess.call("rm " + tmp_dir + "assoc_plot_scaled_" + request + ".svg", shell=True)

    reset_output()

    # Remove temporary files
    subprocess.call("rm "+tmp_dir+"pops_"+request+".txt", shell=True)
    subprocess.call("rm "+tmp_dir+"*"+request+"*.vcf", shell=True)
    subprocess.call("rm "+tmp_dir+"genes_*"+request+"*.txt", shell=True)
    subprocess.call("rm "+tmp_dir+"recomb_"+request+".txt", shell=True)
    subprocess.call("rm "+tmp_dir+"assoc_args"+request+".json", shell=True)

    print("Bokeh high quality image export complete!")

    # Return plot output
    return None
Ejemplo n.º 15
0
def calculate_proxy_svg(snp, pop, request, r2_d="r2"):

    start_time = time.time()

    # Set data directories using config.yml
    with open('config.yml', 'r') as f:
        config = yaml.load(f)
    gene_dir = config['data']['gene_dir']
    recomb_dir = config['data']['recomb_dir']
    snp_dir = config['data']['snp_dir']
    pop_dir = config['data']['pop_dir']
    vcf_dir = config['data']['vcf_dir']

    tmp_dir = "./tmp/"

    # Ensure tmp directory exists
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    if request is False:
        request = str(time.strftime("%I%M%S"))

    # Create JSON output

    # Find coordinates (GRCh37/hg19) for SNP RS number
    # Connect to snp database
    conn = sqlite3.connect(snp_dir)
    conn.text_factory = str
    cur = conn.cursor()

    def get_coords(rs):
        id = rs.strip("rs")
        t = (id, )
        cur.execute("SELECT * FROM tbl_" + id[-1] + " WHERE id=?", t)
        return cur.fetchone()

    # Find RS number in snp database
    snp_coord = get_coords(snp)

    # Close snp connection
    cur.close()
    conn.close()

    # Select desired ancestral populations
    pops = pop.split("+")
    pop_dirs = []
    for pop_i in pops:
        if pop_i in [
                "ALL", "AFR", "AMR", "EAS", "EUR", "SAS", "ACB", "ASW", "BEB",
                "CDX", "CEU", "CHB", "CHS", "CLM", "ESN", "FIN", "GBR", "GIH",
                "GWD", "IBS", "ITU", "JPT", "KHV", "LWK", "MSL", "MXL", "PEL",
                "PJL", "PUR", "STU", "TSI", "YRI"
        ]:
            pop_dirs.append(pop_dir + pop_i + ".txt")

    get_pops = "cat " + " ".join(pop_dirs) + " > " + \
        tmp_dir + "pops_" + request + ".txt"
    subprocess.call(get_pops, shell=True)

    # Get population ids
    pop_list = open(tmp_dir + "pops_" + request + ".txt").readlines()
    ids = []
    for i in range(len(pop_list)):
        ids.append(pop_list[i].strip())

    pop_ids = list(set(ids))

    # Extract query SNP phased genotypes
    vcf_file = vcf_dir + \
        snp_coord[
            1] + ".phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"

    tabix_snp_h = "tabix -H {0} | grep CHROM".format(vcf_file)
    proc_h = subprocess.Popen(tabix_snp_h, shell=True, stdout=subprocess.PIPE)
    head = proc_h.stdout.readlines()[0].strip().split()

    tabix_snp = "tabix {0} {1}:{2}-{2} | grep -v -e END > {3}".format(
        vcf_file, snp_coord[1], snp_coord[2],
        tmp_dir + "snp_no_dups_" + request + ".vcf")
    subprocess.call(tabix_snp, shell=True)

    # Check SNP is in the 1000G population, has the correct RS number, and not
    # monoallelic
    vcf = open(tmp_dir + "snp_no_dups_" + request + ".vcf").readlines()

    if len(vcf) == 0:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None
    elif len(vcf) > 1:
        geno = []
        for i in range(len(vcf)):
            if vcf[i].strip().split()[2] == snp:
                geno = vcf[i].strip().split()
        if geno == []:
            subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                            shell=True)
            subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf",
                            shell=True)
            return None
    else:
        geno = vcf[0].strip().split()

    if geno[2] != snp:
        snp = geno[2]

    if "," in geno[3] or "," in geno[4]:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None

    index = []
    for i in range(9, len(head)):
        if head[i] in pop_ids:
            index.append(i)

    genotypes = {"0": 0, "1": 0}
    for i in index:
        sub_geno = geno[i].split("|")
        for j in sub_geno:
            if j in genotypes:
                genotypes[j] += 1
            else:
                genotypes[j] = 1

    if genotypes["0"] == 0 or genotypes["1"] == 0:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None

    # Define window of interest around query SNP
    window = 500000
    coord1 = int(snp_coord[2]) - window
    if coord1 < 0:
        coord1 = 0
    coord2 = int(snp_coord[2]) + window

    # Calculate proxy LD statistics in parallel
    threads = 4
    block = (2 * window) / 4
    commands = []
    for i in range(threads):
        if i == min(range(threads)) and i == max(range(threads)):
            command = "python LDproxy_sub.py " + snp + " " + \
                snp_coord[1] + " " + str(coord1) + " " + \
                str(coord2) + " " + request + " " + str(i)
        elif i == min(range(threads)):
            command = "python LDproxy_sub.py " + snp + " " + \
                snp_coord[1] + " " + str(coord1) + " " + \
                str(coord1 + block) + " " + request + " " + str(i)
        elif i == max(range(threads)):
            command = "python LDproxy_sub.py " + snp + " " + snp_coord[
                1] + " " + str(coord1 + (block * i) + 1) + " " + str(
                    coord2) + " " + request + " " + str(i)
        else:
            command = "python LDproxy_sub.py " + snp + " " + snp_coord[
                1] + " " + str(coord1 + (block * i) + 1) + " " + str(
                    coord1 + (block * (i + 1))) + " " + request + " " + str(i)
        commands.append(command)

    processes = [
        subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        for command in commands
    ]

    # collect output in parallel
    def get_output(process):
        return process.communicate()[0].splitlines()

    if not hasattr(threading.current_thread(), "_children"):
        threading.current_thread()._children = weakref.WeakKeyDictionary()

    pool = Pool(len(processes))
    out_raw = pool.map(get_output, processes)
    pool.close()
    pool.join()

    # Aggregate output
    out_prox = []
    for i in range(len(out_raw)):
        for j in range(len(out_raw[i])):
            col = out_raw[i][j].strip().split("\t")
            col[6] = int(col[6])
            col[7] = float(col[7])
            col[8] = float(col[8])
            col.append(abs(int(col[6])))
            out_prox.append(col)

    # Sort output
    if r2_d not in ["r2", "d"]:
        r2_d = "r2"

    out_dist_sort = sorted(out_prox, key=operator.itemgetter(14))
    if r2_d == "r2":
        out_ld_sort = sorted(out_dist_sort,
                             key=operator.itemgetter(8),
                             reverse=True)
    else:
        out_ld_sort = sorted(out_dist_sort,
                             key=operator.itemgetter(7),
                             reverse=True)

    # Organize scatter plot data
    q_rs = []
    q_allele = []
    q_coord = []
    q_maf = []
    p_rs = []
    p_allele = []
    p_coord = []
    p_maf = []
    dist = []
    d_prime = []
    d_prime_round = []
    r2 = []
    r2_round = []
    corr_alleles = []
    regdb = []
    funct = []
    color = []
    size = []
    for i in range(len(out_ld_sort)):
        q_rs_i, q_allele_i, q_coord_i, p_rs_i, p_allele_i, p_coord_i, dist_i, d_prime_i, r2_i, corr_alleles_i, regdb_i, q_maf_i, p_maf_i, funct_i, dist_abs = out_ld_sort[
            i]

        if float(r2_i) > 0.01:
            q_rs.append(q_rs_i)
            q_allele.append(q_allele_i)
            q_coord.append(float(q_coord_i.split(":")[1]) / 1000000)
            q_maf.append(str(round(float(q_maf_i), 4)))
            if p_rs_i == ".":
                p_rs_i = p_coord_i
            p_rs.append(p_rs_i)
            p_allele.append(p_allele_i)
            p_coord.append(float(p_coord_i.split(":")[1]) / 1000000)
            p_maf.append(str(round(float(p_maf_i), 4)))
            dist.append(str(round(dist_i / 1000000.0, 4)))
            d_prime.append(float(d_prime_i))
            d_prime_round.append(str(round(float(d_prime_i), 4)))
            r2.append(float(r2_i))
            r2_round.append(str(round(float(r2_i), 4)))
            corr_alleles.append(corr_alleles_i)

            # Correct Missing Annotations
            if regdb_i == ".":
                regdb_i = ""
            regdb.append(regdb_i)
            if funct_i == ".":
                funct_i = ""
            if funct_i == "NA":
                funct_i = "none"
            funct.append(funct_i)

            # Set Color
            if i == 0:
                color_i = "blue"
            elif funct_i != "none" and funct_i != "":
                color_i = "red"
            else:
                color_i = "orange"
            color.append(color_i)

            # Set Size
            size_i = 9 + float(p_maf_i) * 14.0
            size.append(size_i)

    # Begin Bokeh Plotting
    from collections import OrderedDict
    from bokeh.embed import components, file_html
    from bokeh.layouts import gridplot
    from bokeh.models import HoverTool, LinearAxis, Range1d
    from bokeh.plotting import ColumnDataSource, curdoc, figure, output_file, reset_output, save
    from bokeh.resources import CDN
    from bokeh.io import export_svgs
    import svgutils.compose as sg

    reset_output()

    # Proxy Plot
    x = p_coord
    if r2_d == "r2":
        y = r2
    else:
        y = d_prime
    whitespace = 0.01
    xr = Range1d(start=coord1 / 1000000.0 - whitespace,
                 end=coord2 / 1000000.0 + whitespace)
    yr = Range1d(start=-0.03, end=1.03)
    sup_2 = u"\u00B2"

    proxy_plot = figure(
        title="Proxies for " + snp + " in " + pop,
        min_border_top=2,
        min_border_bottom=2,
        min_border_left=60,
        min_border_right=60,
        h_symmetry=False,
        v_symmetry=False,
        plot_width=900,
        plot_height=600,
        x_range=xr,
        y_range=yr,
        tools="hover,tap,pan,box_zoom,box_select,undo,redo,reset,previewsave",
        logo=None,
        toolbar_location="above")

    proxy_plot.title.align = "center"

    tabix_recomb = "tabix -fh {0} {1}:{2}-{3} > {4}".format(
        recomb_dir, snp_coord[1], coord1 - whitespace, coord2 + whitespace,
        tmp_dir + "recomb_" + request + ".txt")
    subprocess.call(tabix_recomb, shell=True)
    filename = tmp_dir + "recomb_" + request + ".txt"
    recomb_raw = open(filename).readlines()
    recomb_x = []
    recomb_y = []
    for i in range(len(recomb_raw)):
        chr, pos, rate = recomb_raw[i].strip().split()
        recomb_x.append(int(pos) / 1000000.0)
        recomb_y.append(float(rate) / 100.0)

    data = {
        'x': x,
        'y': y,
        'qrs': q_rs,
        'q_alle': q_allele,
        'q_maf': q_maf,
        'prs': p_rs,
        'p_alle': p_allele,
        'p_maf': p_maf,
        'dist': dist,
        'r': r2_round,
        'd': d_prime_round,
        'alleles': corr_alleles,
        'regdb': regdb,
        'funct': funct,
        'size': size,
        'color': color
    }
    source = ColumnDataSource(data)

    proxy_plot.line(recomb_x, recomb_y, line_width=1, color="black", alpha=0.5)

    proxy_plot.circle(x='x',
                      y='y',
                      size='size',
                      color='color',
                      alpha=0.5,
                      source=source)

    hover = proxy_plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("Query Variant", "@qrs @q_alle"),
        ("Proxy Variant", "@prs @p_alle"),
        ("Distance (Mb)", "@dist"),
        ("MAF (Query,Proxy)", "@q_maf,@p_maf"),
        ("R" + sup_2, "@r"),
        ("D\'", "@d"),
        ("Correlated Alleles", "@alleles"),
        ("RegulomeDB", "@regdb"),
        ("Functional Class", "@funct"),
    ])

    proxy_plot.text(x,
                    y,
                    text=regdb,
                    alpha=1,
                    text_font_size="7pt",
                    text_baseline="middle",
                    text_align="center",
                    angle=0)

    if r2_d == "r2":
        proxy_plot.yaxis.axis_label = "R" + sup_2
    else:
        proxy_plot.yaxis.axis_label = "D\'"

    proxy_plot.extra_y_ranges = {"y2_axis": Range1d(start=-3, end=103)}
    proxy_plot.add_layout(
        LinearAxis(y_range_name="y2_axis",
                   axis_label="Combined Recombination Rate (cM/Mb)"), "right")

    # Rug Plot
    y2_ll = [-0.03] * len(x)
    y2_ul = [1.03] * len(x)
    yr_rug = Range1d(start=-0.03, end=1.03)

    data_rug = {
        'x': x,
        'y': y,
        'y2_ll': y2_ll,
        'y2_ul': y2_ul,
        'qrs': q_rs,
        'q_alle': q_allele,
        'q_maf': q_maf,
        'prs': p_rs,
        'p_alle': p_allele,
        'p_maf': p_maf,
        'dist': dist,
        'r': r2_round,
        'd': d_prime_round,
        'alleles': corr_alleles,
        'regdb': regdb,
        'funct': funct,
        'size': size,
        'color': color
    }
    source_rug = ColumnDataSource(data_rug)

    rug = figure(x_range=xr,
                 y_range=yr_rug,
                 border_fill_color='white',
                 y_axis_type=None,
                 title="",
                 min_border_top=2,
                 min_border_bottom=2,
                 min_border_left=60,
                 min_border_right=60,
                 h_symmetry=False,
                 v_symmetry=False,
                 plot_width=900,
                 plot_height=50,
                 tools="xpan,tap",
                 logo=None)

    rug.segment(x0='x',
                y0='y2_ll',
                x1='x',
                y1='y2_ul',
                source=source_rug,
                color='color',
                alpha=0.5,
                line_width=1)
    rug.toolbar_location = None

    # Gene Plot
    tabix_gene = "tabix -fh {0} {1}:{2}-{3} > {4}".format(
        gene_dir, snp_coord[1], coord1, coord2,
        tmp_dir + "genes_" + request + ".txt")
    subprocess.call(tabix_gene, shell=True)
    filename = tmp_dir + "genes_" + request + ".txt"
    genes_raw = open(filename).readlines()

    genes_plot_start = []
    genes_plot_end = []
    genes_plot_y = []
    genes_plot_name = []
    exons_plot_x = []
    exons_plot_y = []
    exons_plot_w = []
    exons_plot_h = []
    exons_plot_name = []
    exons_plot_id = []
    exons_plot_exon = []
    lines = [0]
    gap = 80000
    tall = 0.75
    if genes_raw != None:
        for i in range(len(genes_raw)):
            bin, name_id, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts, exonEnds, score, name2, cdsStartStat, cdsEndStat, exonFrames = genes_raw[
                i].strip().split()
            name = name2
            id = name_id
            e_start = exonStarts.split(",")
            e_end = exonEnds.split(",")

            # Determine Y Coordinate
            i = 0
            y_coord = None
            while y_coord == None:
                if i > len(lines) - 1:
                    y_coord = i + 1
                    lines.append(int(txEnd))
                elif int(txStart) > (gap + lines[i]):
                    y_coord = i + 1
                    lines[i] = int(txEnd)
                else:
                    i += 1

            genes_plot_start.append(int(txStart) / 1000000.0)
            genes_plot_end.append(int(txEnd) / 1000000.0)
            genes_plot_y.append(y_coord)
            genes_plot_name.append(name + "  ")

            for i in range(len(e_start) - 1):
                if strand == "+":
                    exon = i + 1
                else:
                    exon = len(e_start) - 1 - i

                width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                exons_plot_x.append(x_coord)
                exons_plot_y.append(y_coord)
                exons_plot_w.append(width)
                exons_plot_h.append(tall)
                exons_plot_name.append(name)
                exons_plot_id.append(id)
                exons_plot_exon.append(exon)

    n_rows = len(lines)
    genes_plot_yn = [n_rows - x + 0.5 for x in genes_plot_y]
    exons_plot_yn = [n_rows - x + 0.5 for x in exons_plot_y]
    yr2 = Range1d(start=0, end=n_rows)

    data_gene_plot = {
        'exons_plot_x': exons_plot_x,
        'exons_plot_yn': exons_plot_yn,
        'exons_plot_w': exons_plot_w,
        'exons_plot_h': exons_plot_h,
        'exons_plot_name': exons_plot_name,
        'exons_plot_id': exons_plot_id,
        'exons_plot_exon': exons_plot_exon
    }

    source_gene_plot = ColumnDataSource(data_gene_plot)

    if len(lines) < 3:
        plot_h_pix = 150
    else:
        plot_h_pix = 150 + (len(lines) - 2) * 50

    gene_plot = figure(
        x_range=xr,
        y_range=yr2,
        border_fill_color='white',
        title="",
        min_border_top=2,
        min_border_bottom=2,
        min_border_left=60,
        min_border_right=60,
        h_symmetry=False,
        v_symmetry=False,
        plot_width=900,
        plot_height=plot_h_pix,
        tools="hover,tap,xpan,box_zoom,undo,redo,reset,previewsave",
        logo=None)

    gene_plot.segment(genes_plot_start,
                      genes_plot_yn,
                      genes_plot_end,
                      genes_plot_yn,
                      color="black",
                      alpha=1,
                      line_width=2)

    gene_plot.rect(x='exons_plot_x',
                   y='exons_plot_yn',
                   width='exons_plot_w',
                   height='exons_plot_h',
                   source=source_gene_plot,
                   fill_color="grey",
                   line_color="grey")
    gene_plot.xaxis.axis_label = "Chromosome " + \
        snp_coord[1] + " Coordinate (Mb)(GRCh37)"
    gene_plot.yaxis.axis_label = "Genes"
    gene_plot.ygrid.grid_line_color = None
    gene_plot.yaxis.axis_line_color = None
    gene_plot.yaxis.minor_tick_line_color = None
    gene_plot.yaxis.major_tick_line_color = None
    gene_plot.yaxis.major_label_text_color = None

    hover = gene_plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("Gene", "@exons_plot_name"),
        ("ID", "@exons_plot_id"),
        ("Exon", "@exons_plot_exon"),
    ])

    gene_plot.text(genes_plot_start,
                   genes_plot_yn,
                   text=genes_plot_name,
                   alpha=1,
                   text_font_size="7pt",
                   text_font_style="bold",
                   text_baseline="middle",
                   text_align="right",
                   angle=0)

    gene_plot.toolbar_location = "below"

    # Change output backend to SVG temporarily for headless export
    # Will be changed back to canvas in LDlink.js
    proxy_plot.output_backend = "svg"
    rug.output_backend = "svg"
    gene_plot.output_backend = "svg"
    export_svgs(proxy_plot,
                filename=tmp_dir + "proxy_plot_1_" + request + ".svg")
    export_svgs(gene_plot,
                filename=tmp_dir + "gene_plot_1_" + request + ".svg")

    # Concatenate svgs
    sg.Figure("24.59cm", "27.94cm",
              sg.SVG(tmp_dir + "proxy_plot_1_" + request + ".svg"),
              sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").move(
                  0, 630)).save(tmp_dir + "proxy_plot_" + request + ".svg")

    sg.Figure(
        "122.95cm", "139.70cm",
        sg.SVG(tmp_dir + "proxy_plot_1_" + request + ".svg").scale(5),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(5).move(
            0, 3150)).save(tmp_dir + "proxy_plot_scaled_" + request + ".svg")

    # Export to PDF
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "proxy_plot_" +
                    request + ".svg " + tmp_dir + "proxy_plot_" + request +
                    ".pdf",
                    shell=True)
    # Export to PNG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "proxy_plot_scaled_" + request + ".svg " + tmp_dir +
                    "proxy_plot_" + request + ".png",
                    shell=True)
    # Export to JPEG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "proxy_plot_scaled_" + request + ".svg " + tmp_dir +
                    "proxy_plot_" + request + ".jpeg",
                    shell=True)
    # Remove individual SVG files after they are combined
    subprocess.call("rm " + tmp_dir + "proxy_plot_1_" + request + ".svg",
                    shell=True)
    subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg",
                    shell=True)
    # Remove scaled SVG file after it is converted to png and jpeg
    subprocess.call("rm " + tmp_dir + "proxy_plot_scaled_" + request + ".svg",
                    shell=True)

    reset_output()

    # Remove temporary files
    subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt", shell=True)
    subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
    subprocess.call("rm " + tmp_dir + "genes_" + request + ".txt", shell=True)
    subprocess.call("rm " + tmp_dir + "recomb_" + request + ".txt", shell=True)

    # Return plot output
    return None
Ejemplo n.º 16
0
def composite(
        fig_spec: FigureSpec,
        memoize_panels: bool = False,
        recompute_panels: bool = True,
        delete_png: bool = True,
) -> None:
    """
    Function that composites a figure from a FigureSpec.

    Parameters
    ----------
    fig_spec : FigureSpec
    memoize_panels : bool
    recompute_panels : bool
    delete_png :  bool
        See the pubfig.compositor decorator for a description of the parameters.

    Returns
    -------
        None
    """
    import tempfile

    svg_path = fig_spec.output_file
    if isinstance(svg_path, str):
        svg_path = Path(svg_path)

    assert not svg_path.is_dir(), "The output file name you provided is a directory"

    if svg_path.suffix != ".svg":
        svg_path = svg_path.with_suffix(".svg")

    svg_path = svg_path.expanduser()

    if not svg_path.parent.exists():
        svg_path.parent.mkdir(parents=True, exist_ok=True)

    if memoize_panels:
        panels_path = svg_path.parent / ".panels"
        if not panels_path.exists():
            panels_path.mkdir()
    else:
        panels_path = Path(tempfile.gettempdir())

    panels = []
    if fig_spec.plot_grid_every > 0:
        panels.append(_generate_grid(fig_spec.figure_size, fig_spec.plot_grid_every, font_size=8))

    auto_label = fig_spec.auto_label_options

    label_generator = auto_label.label_generator(auto_label.first_char.text)

    for name in fig_spec.panels._fields:
        panel = getattr(fig_spec.panels, name)
        assert isinstance(panel, Panel)

        panel_elements = []

        assert isinstance(panel.fig, (plt.Figure, VectorImage, RasterImage))
        content_offset = _location_to_str(
            panel.location.units or fig_spec.figure_size.units, panel.content_offset
        )
        if isinstance(panel, PanelFig):
            svg = _get_panel_content(panels_path, panel, name, memoize_panels, recompute_panels)
            panel_elements.append(svg.move(*content_offset))
        elif isinstance(panel.fig, VectorImage):
            scale = panel.scale or panel.fig.scale
            print(f"Scaling vector image {panel.fig.file.absolute()} by {scale:.3f}")
            panel_elements.append(panel.fig.svg.scale(scale).move(*content_offset))
        elif isinstance(panel.fig, RasterImage):
            img_size = panel.fig.img_size
            scale = panel.scale or 1.
            img = sc.Image(
                img_size.units.to_px(img_size.width),
                img_size.units.to_px(img_size.height),
                f"{panel.fig.file}",
            )
            panel_elements.append(img.scale(scale).move(*content_offset))
        else:
            raise TypeError(f"Unknown type of panel content {type(panel.fig)} for panel {name}")

        if panel.text is not None:
            panel_text = [
                sc.Text(
                    t.text,
                    *_location_to_str(fig_spec.figure_size.units, Location(t.x, t.y, panel.location.units)),
                    **t.kwargs
                ) for t in panel.text]

            for t, pt in zip(panel_text, panel.text):
                # Need separate loop because rotate doesn't return the Text Element
                t.move(*content_offset).rotate(pt.angle)

            panel_elements += panel_text

        if panel.auto_label:
            label = sc.Text(
                next(label_generator),
                *_location_to_str(
                    fig_spec.figure_size.units,
                    Location(auto_label.first_char.x, auto_label.first_char.y)
                ),
                **auto_label.first_char.kwargs
            )
            panel_elements.append(label)

        location = _location_to_str(fig_spec.figure_size.units, panel.location)
        panels.append(sc.Panel(*panel_elements).move(*location))

    fs = fig_spec.figure_size
    sc.Figure(
        f"{fs.units.to_px(fs.width):.2f}px",
        f"{fs.units.to_px(fs.height):.2f}px",
        *panels
    ).save(svg_path)

    if fig_spec.generate_image != ImageType.none:
        """ Taken from this shell script:
        #!/bin/sh

        # Convert all arguments (assumed SVG) to a TIFF acceptable to PLOS
        # Requires Inkscape and ImageMagick 6.8 (doesn't work with 6.6.9)

        for i in $@; do
          BN=$(basename $i .svg)
          inkscape --without-gui --export-png="$BN.png" --export-dpi 400 $i
          convert -compress LZW -alpha remove $BN.png $BN.tiff
          mogrify -alpha off $BN.tiff
          rm $BN.png
        done
        """
        basename = f"{svg_path}"[:-4]
        image_name = f"{basename}.png"
        _run(f"inkscape --without-gui --export-png='{image_name}' --export-dpi {fig_spec.image_dpi} {svg_path}")
        if fig_spec.generate_image == ImageType.tiff:
            tiff_name = f"{basename}.tiff"
            _run(f"convert -compress LZW -alpha remove {image_name} {tiff_name}")
            _run(f"mogrify -alpha off {tiff_name}")
            if delete_png:
                _run(f"rm {image_name}")
            image_name = tiff_name
        _run(f"eog {image_name}")
Ejemplo n.º 17
0
    def main(self):
        settings = self.settings
        SMILESSTRING = settings['SMILESSTRING']
        resulting_plots = []
        pRList = []
        mol_svg, d2d, dm = self.draw_smiles()
        replace_index = []
        for scope_plot in self.plots:
            # for each scope plot, make a vals list containing empty first items for the wedge with alpha=0
            if type(scope_plot) != dict:
                continue

            sizes = [
                360 - scope_plot['coverangle_wedges']
            ] + [scope_plot['coverangle_wedges'] / scope_plot['no_wedges']
                 ] * scope_plot['no_wedges']

            label_inner_circle, label_outer_circle = [
                ''
            ] + [''] * scope_plot['no_wedges'], [
                ''
            ] + [''] * scope_plot['no_wedges']
            if (len(scope_plot['value_inner_circle']) !=
                    scope_plot['no_wedges']
                    or len(scope_plot['value_outer_circle']) !=
                    scope_plot['no_wedges']):
                print('not equal')
            value_inner_circle, value_outer_circle = scope_plot[
                'value_inner_circle'], scope_plot['value_outer_circle']
            rounding_boundary = scope_plot['rounding_boundary']
            value_groups = scope_plot['value_groups']

            for i in range(scope_plot['no_wedges']):
                if scope_plot['rounding']:
                    if value_inner_circle[i] >= rounding_boundary:
                        label_inner_circle[i + 1] = ">" + str(
                            value_inner_circle[i])
                    else:
                        label_inner_circle[i + 1] = str(value_inner_circle[i])

                    if value_outer_circle[i] >= rounding_boundary:
                        label_outer_circle[i + 1] = ">" + str(
                            value_outer_circle[i])
                    else:
                        label_outer_circle[i + 1] = str(value_outer_circle[i])
                else:
                    label_inner_circle[i + 1] = str(value_inner_circle[i])
                    label_outer_circle[i + 1] = str(value_outer_circle[i])
            j = 0
            for i, item in enumerate(value_groups):
                if item[0] == '~':
                    replace_index.append(('~' + str(j), item[1:]))
                    value_groups[i] = '~' + str(j)
                    j = j + 1

            vals = [
                sizes,  # size of the wedges, the first wedge is transparent and will not be shown 
                [0] +
                value_inner_circle,  # colormap values for the inner circle, maximum value determines intensity, first is for the transparent wedge and should stay 0
                [0] +
                value_outer_circle,  # colormap values for the outer circle, maximum value determines intensity, first is for the transparent wedge and should stay 0
                label_inner_circle,  #labels for the inner circle
                label_outer_circle,  #labels for the outer circle    
                [""] + value_groups,  #groups  
            ]
            resulting_plots.append(
                self.plot_figure_and_colorbar(scope_plot, vals))

            # get the atom id from the settings and save its position
            rIdx = scope_plot['attach_atom_id']
            pRList.append(
                d2d.GetDrawCoords(
                    Geometry.Point2D(dm.GetConformer().GetAtomPosition(rIdx))))

        # take colorbar from first plot  #ToDo extension to multiple colorbars
        colorbar = compose.Panel(
            strSVG(resulting_plots[0][1]).scale(0.8).move(-350, 400))
        panels = [compose.Panel(strSVG('<svg></svg>'))] * len(resulting_plots)
        for i, plot in enumerate(resulting_plots):
            panels[i] = strSVG(resulting_plots[i][0]).move(
                -369, -358).scale(1).move(pRList[i].x, pRList[i].y)
            #panels[i]=strSVG(resulting_plots[i][0]).move(-369*1,-358*1).scale(0.4).move(pRList[i].x,pRList[i].y)

        compose.Figure(
            "600",
            "600",  #720 default` 
            compose.Panel(strSVG(mol_svg).scale(1).move(0, 0)),
            colorbar,
            *panels
            #).move(350,350).scale(self.settings['scalefactor']).save("substrate_scope.svg")
        ).move(350, 100).scale(
            self.settings['scalefactor']).save("substrate_scope.svg")

        new_svg = SVG('substrate_scope.svg')._data
        for item in replace_index:
            new_svg = self.replace_label_with_smiles(svg_file=new_svg,
                                                     smiles=item[1],
                                                     search_index=item[0])

        if settings['use_bold_font']:
            new_svg.replace('font-weight:normal', 'font-weight:bold')
        f = open("substrate_scope_replaced.svg", "w")
        f.write(new_svg)
        f.close()
        print('File written to:',
              os.getcwd() + '/substrate_scope_replaced.svg')
Ejemplo n.º 18
0
def put_list_of_figs_to_svg_fig(
        FIGS,
        fig_name="fig.svg",
        initial_guess=True,
        visualize=False,
        export_as_png=False,
        Props=None,
        figsize=None,
        fontsize=9,
        SCALING_FACTOR=1.34,  # needed to get the right cm size ...
        with_top_left_letter=False,
        transparent=True):
    """ take a list of figures and make a multi panel plot"""

    label = list(string.ascii_uppercase)[:len(FIGS)]

    SIZE = []
    for fig in FIGS:
        if type(fig) == str:
            SIZE.append([1., 1.])
        else:
            SIZE.append(fig.get_size_inches())

    width = np.max([s[0] for s in SIZE])
    height = np.max([s[1] for s in SIZE])

    if Props is None:
        LABELS, XCOORD, YCOORD = [], [], []

        # saving as svg
        for i in range(len(FIGS)):
            LABELS.append(label[i])
            XCOORD.append((i % 3) * width * 100)
            YCOORD.append(int(i / 3) * height * 100)
        XCOORD_LABELS,\
            YCOORD_LABELS = XCOORD, YCOORD

    else:
        XCOORD, YCOORD = Props['XCOORD'],\
                Props['YCOORD'],
        if 'LABELS' in Props:
            LABELS = Props['LABELS']
        else:
            LABELS = ['' for x in XCOORD]
        if 'XCOORD_LABELS' in Props:
            XCOORD_LABELS,\
                YCOORD_LABELS = Props['XCOORD_LABELS'],\
                                Props['YCOORD_LABELS']
        else:
            XCOORD_LABELS,\
                YCOORD_LABELS = XCOORD, YCOORD

    LOCATIONS = []
    for i in range(len(FIGS)):
        if type(FIGS[i]) is str:
            LOCATIONS.append(FIGS[i])
        else:
            LOCATIONS.append(os.path.join(gettempdir(), str(i) + '.svg'))
            FIGS[i].savefig(LOCATIONS[-1],
                            format='svg',
                            transparent=transparent)

    PANELS = []
    for i in range(len(FIGS)):
        PANELS.append(sg.Panel(\
            sg.SVG(LOCATIONS[i]).move(XCOORD[i],YCOORD[i])))

    for i in range(len(LABELS)):
        PANELS.append(sg.Panel(\
            sg.Text(LABELS[i], 15, 10,
                    size=fontsize, weight='bold').move(\
                                                       XCOORD_LABELS[i],YCOORD_LABELS[i]))\
        )

    sg.Figure("21cm", "29.7cm", *PANELS).scale(SCALING_FACTOR).save(fig_name)
    # if figsize is None:
    #     sg.Figure("21cm", "29.7cm", *PANELS).save(fig_name)
    # else:
    #     sg.Figure(str(inch2cm(figsize[0]*A0_format['width'])[0])+"cm",\
    #               str(inch2cm(figsize[1]*A0_format['height'])[0])+"cm",\
    #               *PANELS).scale(SCALING_FACTOR).save(fig_name)

    if visualize:
        os.system('open ' + fig_name)  # works well with 'Gapplin' on OS-X
Ejemplo n.º 19
0
def fsapt_analyze(lig_dir, mode, ene_type):
    lig_name = os.path.basename(os.path.abspath(lig_dir))
    matrix_dfs = []
    outfiles = glob('%s/FSAPT*out' % lig_dir)
    for of in outfiles:
        df = _get_ene_matrix(of, ene_type)
        if not df is None:
            matrix_dfs.append(df)

    all_df = pd.concat(matrix_dfs, axis=1)
    mean_df = all_df.stack().groupby(level=[0, 1]).mean().unstack()
    std_df = all_df.stack().groupby(level=[0, 1]).std().unstack()

    if mode in ['prolig', 'proliglig']:
        old_columns = mean_df.columns[:]
        new_labels = []
        numbering = []
        for old_label in old_columns:
            if old_label == 'Total':
                new_labels.append('Total')
                numbering.append(100000)
            else:
                labels = old_label.split('-')
                if len(labels) == 2:
                    new_labels.append(''.join(labels))
                    numbering.append(float(labels[-1]))
                elif len(labels) == 3:
                    new_labels.append('-'.join(labels[1:]))
                    numbering.append(0.5 *
                                     (float(labels[-1]) + float(labels[-2])))
        new_columns = [nl for _, nl in sorted(zip(numbering, new_labels))]
        old_columns = [ol for _, ol in sorted(zip(numbering, old_columns))]

        new_mean_df = pd.DataFrame()
        new_std_df = pd.DataFrame()
        for nc, oc in zip(new_columns, old_columns):
            new_mean_df[nc] = mean_df[oc]
            new_std_df[nc] = std_df[oc]
        mean_df = new_mean_df
        std_df = new_std_df

    mean_anno = mean_df.applymap(lambda x: '%+.2f\n' % x)
    std_anno = std_df.applymap(lambda x: r'+/-%.2f' % x)
    all_anno = mean_anno + std_anno

    matrix_svg = '%s/ene_matrix_%s.svg' % (lig_dir, ene_type)
    plot_matrix(mean_df, all_anno, matrix_svg, mode, ene_type)

    mean_df.to_csv('%s/ene_mean_%s_%s_%s.csv' %
                   (lig_dir, lig_name, mode, ene_type))
    std_df.to_csv('%s/ene_std_%s_%s_%s.csv' %
                  (lig_dir, lig_name, mode, ene_type))

    # Plot the ligand
    dpi = 96
    width = len(mean_df.columns) + 2
    height = 4

    ligmol = cs._RdkitMolBase.from_file('MD/%s/cmp_sybyl.mol2' % lig_name)
    ligmol._init_atominfo(reset=False)
    ligmol.charged_mol2file = 'MD/%s/cmp_sybyl.mol2' % lig_name
    ligmol.get_noh_mol()
    AllChem.Compute2DCoords(ligmol.noh_mol, canonOrient=True, bondLength=1.5)
    drawer = rdMolDraw2D.MolDraw2DSVG(width * dpi, height * dpi)
    opts = drawer.drawOptions()
    opts.additionalAtomLabelPadding = 0.1

    frag_dict, _ = fragment_mol(ligmol, 'L1')

    for noha in ligmol.noh_mol.GetAtoms():
        noh_idx = noha.GetIdx()
        h_idx = ligmol.noh_to_h_atom_mapping[noh_idx]
        frag_label = str(frag_dict[h_idx]['resid'])
        if not 'L1-%02d' % int(frag_label) in mean_df.index:
            continue
        if noha.GetAtomicNum() == 6:
            opts.atomLabels[noh_idx] = '%02d' % int(frag_label)
        else:
            elem = ligmol.GetAtomWithIdx(h_idx).GetProp(
                '_TriposAtomType').split('.')[0]
            opts.atomLabels[noh_idx] = '%s/%02d' % (elem, int(frag_label))
    drawer.DrawMolecule(ligmol.noh_mol)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText().replace('svg:', '')
    struct_svg = '%s/lig_frag_%s.svg' % (lig_dir, ene_type)
    with open(struct_svg, 'w') as fh:
        fh.writelines(svg)

    # Consolidate the panels
    if mode == 'prolig':
        mat_title = 'Protein-Ligand %s Interaction' % ene_type.capitalize()
    else:
        mat_title = 'Ligand-Ligand %s Interaction' % ene_type.capitalize()

    mat_title = sc.Panel(sc.Text(mat_title, size=24)).move(20, 20)
    mat_panel = sc.Panel(sc.SVG(matrix_svg).scale(1.4)).move(0, 20)
    struct_title = sc.Panel(sc.Text('Ligand %s' % lig_name,
                                    size=24)).move(20,
                                                   dpi * len(mean_df) + 20)
    struct_panel = sc.Panel(sc.SVG(struct_svg)).move(0,
                                                     dpi * len(mean_df) + 20)
    final_figure = sc.Figure(dpi * width,
                             dpi * (len(mean_df) + height) + 40, mat_panel,
                             mat_title, struct_panel, struct_title)
    final_name = '%s/%s_%s_%s' % (lig_dir, lig_name, mode, ene_type)
    final_figure.save('%s.svg' % final_name)
    os.system('convert -density 100 %s.svg %s.pdf' % (final_name, final_name))
    os.system('rm -f %s %s' % (matrix_svg, struct_svg))

    # Write pdb for pymol
    inpdb = '%s/frame0/fsapt.pdb' % lig_dir
    outpdb = '%s_pymol.pdb' % final_name
    write_pymol_pdb(inpdb, outpdb, mean_df)
Ejemplo n.º 20
0
def calculate_proxy_svg(snp,
                        pop,
                        request,
                        genome_build,
                        r2_d="r2",
                        window=500000,
                        collapseTranscript=True):

    # Set data directories using config.yml
    with open('config.yml', 'r') as yml_file:
        config = yaml.load(yml_file)
    env = config['env']
    connect_external = config['database']['connect_external']
    api_mongo_addr = config['database']['api_mongo_addr']
    data_dir = config['data']['data_dir']
    tmp_dir = config['data']['tmp_dir']
    genotypes_dir = config['data']['genotypes_dir']
    mongo_username = config['database']['mongo_user_readonly']
    mongo_password = config['database']['mongo_password']
    mongo_port = config['database']['mongo_port']
    aws_info = config['aws']
    num_subprocesses = config['performance']['num_subprocesses']

    export_s3_keys = retrieveAWSCredentials()

    # Ensure tmp directory exists
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    if request is False:
        request = str(time.strftime("%I%M%S"))

    # Create JSON output

    # Find coordinates (GRCh37/hg19) or (GRCh38/hg38) for SNP RS number

    # Connect to Mongo snp database
    if env == 'local' or connect_external:
        mongo_host = api_mongo_addr
    else:
        mongo_host = 'localhost'
    client = MongoClient(
        'mongodb://' + mongo_username + ':' + mongo_password + '@' +
        mongo_host + '/admin', mongo_port)
    db = client["LDLink"]

    def get_coords(db, rsid):
        rsid = rsid.strip("rs")
        query_results = db.dbsnp.find_one({"id": rsid})
        query_results_sanitized = json.loads(json_util.dumps(query_results))
        return query_results_sanitized

    # Query genomic coordinates
    def get_rsnum(db, coord):
        temp_coord = coord.strip("chr").split(":")
        chro = temp_coord[0]
        pos = temp_coord[1]
        query_results = db.dbsnp.find({
            "chromosome":
            chro.upper() if chro == 'x' or chro == 'y' else str(chro),
            genome_build_vars[genome_build]['position']:
            str(pos)
        })
        query_results_sanitized = json.loads(json_util.dumps(query_results))
        return query_results_sanitized

    # Replace input genomic coordinates with variant ids (rsids)
    def replace_coord_rsid(db, snp):
        if snp[0:2] == "rs":
            return snp
        else:
            snp_info_lst = get_rsnum(db, snp)
            print("snp_info_lst")
            print(snp_info_lst)
            if snp_info_lst != None:
                if len(snp_info_lst) > 1:
                    var_id = "rs" + snp_info_lst[0]['id']
                    ref_variants = []
                    for snp_info in snp_info_lst:
                        if snp_info['id'] == snp_info['ref_id']:
                            ref_variants.append(snp_info['id'])
                    if len(ref_variants) > 1:
                        var_id = "rs" + ref_variants[0]
                    elif len(ref_variants) == 0 and len(snp_info_lst) > 1:
                        var_id = "rs" + snp_info_lst[0]['id']
                    else:
                        var_id = "rs" + ref_variants[0]
                    return var_id
                elif len(snp_info_lst) == 1:
                    var_id = "rs" + snp_info_lst[0]['id']
                    return var_id
                else:
                    return snp
            else:
                return snp
        return snp

    snp = replace_coord_rsid(db, snp)

    # Find RS number in snp database
    snp_coord = get_coords(db, snp)

    # Get population ids from LDproxy.py tmp output files
    pop_list = open(tmp_dir + "pops_" + request + ".txt").readlines()
    ids = []
    for i in range(len(pop_list)):
        ids.append(pop_list[i].strip())

    pop_ids = list(set(ids))

    # Extract query SNP phased genotypes
    vcf_filePath = "%s/%s%s/%s" % (
        config['aws']['data_subfolder'], genotypes_dir,
        genome_build_vars[genome_build]['1000G_dir'],
        genome_build_vars[genome_build]['1000G_file'] %
        (snp_coord['chromosome']))
    vcf_query_snp_file = "s3://%s/%s" % (config['aws']['bucket'], vcf_filePath)

    checkS3File(aws_info, config['aws']['bucket'], vcf_filePath)

    tabix_snp_h = export_s3_keys + " cd {1}; tabix -HD {0} | grep CHROM".format(
        vcf_query_snp_file, data_dir + genotypes_dir +
        genome_build_vars[genome_build]['1000G_dir'])
    head = [
        x.decode('utf-8')
        for x in subprocess.Popen(tabix_snp_h,
                                  shell=True,
                                  stdout=subprocess.PIPE).stdout.readlines()
    ][0].strip().split()

    tabix_snp = export_s3_keys + " cd {4}; tabix -D {0} {1}:{2}-{2} | grep -v -e END > {3}".format(
        vcf_query_snp_file, genome_build_vars[genome_build]['1000G_chr_prefix']
        + snp_coord['chromosome'],
        snp_coord[genome_build_vars[genome_build]['position']],
        tmp_dir + "snp_no_dups_" + request + ".vcf", data_dir + genotypes_dir +
        genome_build_vars[genome_build]['1000G_dir'])
    subprocess.call(tabix_snp, shell=True)

    # Check SNP is in the 1000G population, has the correct RS number, and not
    # monoallelic
    vcf = open(tmp_dir + "snp_no_dups_" + request + ".vcf").readlines()

    if len(vcf) == 0:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None
    elif len(vcf) > 1:
        geno = []
        for i in range(len(vcf)):
            # if vcf[i].strip().split()[2] == snp:
            geno = vcf[i].strip().split()
            geno[0] = geno[0].lstrip('chr')
        if geno == []:
            subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                            shell=True)
            subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf",
                            shell=True)
            return None
    else:
        geno = vcf[0].strip().split()
        geno[0] = geno[0].lstrip('chr')

    if geno[2] != snp and snp[0:2] == "rs" and "rs" in geno[2]:
        snp = geno[2]

    if "," in geno[3] or "," in geno[4]:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None

    index = []
    for i in range(9, len(head)):
        if head[i] in pop_ids:
            index.append(i)

    genotypes = {"0": 0, "1": 0}
    for i in index:
        sub_geno = geno[i].split("|")
        for j in sub_geno:
            if j in genotypes:
                genotypes[j] += 1
            else:
                genotypes[j] = 1

    if genotypes["0"] == 0 or genotypes["1"] == 0:
        subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt",
                        shell=True)
        subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
        return None

    # Define window of interest around query SNP
    # window = 500000
    coord1 = int(
        snp_coord[genome_build_vars[genome_build]['position']]) - window
    if coord1 < 0:
        coord1 = 0
    coord2 = int(
        snp_coord[genome_build_vars[genome_build]['position']]) + window

    # Calculate proxy LD statistics in parallel
    # threads = 4
    # block = (2 * window) // 4
    # block = (2 * window) // num_subprocesses
    windowChunkRanges = chunkWindow(
        int(snp_coord[genome_build_vars[genome_build]['position']]), window,
        num_subprocesses)

    commands = []
    # for i in range(num_subprocesses):
    #     if i == min(range(num_subprocesses)) and i == max(range(num_subprocesses)):
    #         command = "python3 LDproxy_sub.py " + "True " + snp + " " + \
    #             snp_coord['chromosome'] + " " + str(coord1) + " " + \
    #             str(coord2) + " " + request + " " + str(i)
    #     elif i == min(range(num_subprocesses)):
    #         command = "python3 LDproxy_sub.py " + "True " + snp + " " + \
    #             snp_coord['chromosome'] + " " + str(coord1) + " " + \
    #             str(coord1 + block) + " " + request + " " + str(i)
    #     elif i == max(range(num_subprocesses)):
    #         command = "python3 LDproxy_sub.py " + "True " + snp + " " + snp_coord['chromosome'] + " " + str(
    #             coord1 + (block * i) + 1) + " " + str(coord2) + " " + request + " " + str(i)
    #     else:
    #         command = "python3 LDproxy_sub.py " + "True " + snp + " " + snp_coord['chromosome'] + " " + str(coord1 + (
    #             block * i) + 1) + " " + str(coord1 + (block * (i + 1))) + " " + request + " " + str(i)
    #     commands.append(command)

    for subprocess_id in range(num_subprocesses):
        getWindowVariantsArgs = " ".join([
            "True",
            str(snp),
            str(snp_coord['chromosome']),
            str(windowChunkRanges[subprocess_id][0]),
            str(windowChunkRanges[subprocess_id][1]),
            str(request), genome_build,
            str(subprocess_id)
        ])
        commands.append("python3 LDproxy_sub.py " + getWindowVariantsArgs)

    processes = [
        subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
        for command in commands
    ]

    # collect output in parallel
    def get_output(process):
        return process.communicate()[0].splitlines()

    if not hasattr(threading.current_thread(), "_children"):
        threading.current_thread()._children = weakref.WeakKeyDictionary()

    pool = Pool(len(processes))
    out_raw = pool.map(get_output, processes)
    pool.close()
    pool.join()

    # Aggregate output
    out_prox = []
    for i in range(len(out_raw)):
        for j in range(len(out_raw[i])):
            col = out_raw[i][j].decode('utf-8').strip().split("\t")
            col[6] = int(col[6])
            col[7] = float(col[7])
            col[8] = float(col[8])
            col.append(abs(int(col[6])))
            out_prox.append(col)

    # Sort output
    if r2_d not in ["r2", "d"]:
        r2_d = "r2"

    out_dist_sort = sorted(out_prox, key=operator.itemgetter(14))
    if r2_d == "r2":
        out_ld_sort = sorted(out_dist_sort,
                             key=operator.itemgetter(8),
                             reverse=True)
    else:
        out_ld_sort = sorted(out_dist_sort,
                             key=operator.itemgetter(7),
                             reverse=True)

    # Organize scatter plot data
    q_rs = []
    q_allele = []
    q_coord = []
    q_maf = []
    p_rs = []
    p_allele = []
    p_coord = []
    p_maf = []
    dist = []
    d_prime = []
    d_prime_round = []
    r2 = []
    r2_round = []
    corr_alleles = []
    regdb = []
    funct = []
    color = []
    size = []
    for i in range(len(out_ld_sort)):
        q_rs_i, q_allele_i, q_coord_i, p_rs_i, p_allele_i, p_coord_i, dist_i, d_prime_i, r2_i, corr_alleles_i, regdb_i, q_maf_i, p_maf_i, funct_i, dist_abs = out_ld_sort[
            i]

        if float(r2_i) > 0.01:
            q_rs.append(q_rs_i)
            q_allele.append(q_allele_i)
            q_coord.append(float(q_coord_i.split(":")[1]) / 1000000)
            q_maf.append(str(round(float(q_maf_i), 4)))
            if p_rs_i == ".":
                p_rs_i = p_coord_i
            p_rs.append(p_rs_i)
            p_allele.append(p_allele_i)
            p_coord.append(float(p_coord_i.split(":")[1]) / 1000000)
            p_maf.append(str(round(float(p_maf_i), 4)))
            dist.append(str(round(dist_i / 1000000.0, 4)))
            d_prime.append(float(d_prime_i))
            d_prime_round.append(str(round(float(d_prime_i), 4)))
            r2.append(float(r2_i))
            r2_round.append(str(round(float(r2_i), 4)))
            corr_alleles.append(corr_alleles_i)

            # Correct Missing Annotations
            if regdb_i == ".":
                regdb_i = ""
            regdb.append(regdb_i)
            if funct_i == ".":
                funct_i = ""
            if funct_i == "NA":
                funct_i = "none"
            funct.append(funct_i)

            # Set Color
            if i == 0:
                color_i = "blue"
            elif funct_i != "none" and funct_i != "":
                color_i = "red"
            else:
                color_i = "orange"
            color.append(color_i)

            # Set Size
            size_i = 9 + float(p_maf_i) * 14.0
            size.append(size_i)

    # Begin Bokeh Plotting
    from collections import OrderedDict
    from bokeh.embed import components, file_html
    from bokeh.layouts import gridplot
    from bokeh.models import HoverTool, LinearAxis, Range1d
    from bokeh.plotting import ColumnDataSource, curdoc, figure, output_file, reset_output, save
    from bokeh.resources import CDN
    from bokeh.io import export_svgs
    import svgutils.compose as sg

    reset_output()

    # Proxy Plot
    x = p_coord
    if r2_d == "r2":
        y = r2
    else:
        y = d_prime
    whitespace = 0.01
    xr = Range1d(start=coord1 / 1000000.0 - whitespace,
                 end=coord2 / 1000000.0 + whitespace)
    yr = Range1d(start=-0.03, end=1.03)
    sup_2 = "\u00B2"

    proxy_plot = figure(
        title="Proxies for " + snp + " in " + pop,
        min_border_top=2,
        min_border_bottom=2,
        min_border_left=60,
        min_border_right=60,
        h_symmetry=False,
        v_symmetry=False,
        plot_width=900,
        plot_height=600,
        x_range=xr,
        y_range=yr,
        tools="hover,tap,pan,box_zoom,box_select,undo,redo,reset,previewsave",
        logo=None,
        toolbar_location="above")

    proxy_plot.title.align = "center"

    # Add recombination rate from LDproxy.py output file
    recomb_file = tmp_dir + "recomb_" + request + ".json"
    recomb_raw = open(recomb_file).readlines()

    recomb_x = []
    recomb_y = []

    for recomb_raw_obj in recomb_raw:
        recomb_obj = json.loads(recomb_raw_obj)
        recomb_x.append(
            int(recomb_obj[genome_build_vars[genome_build]['position']]) /
            1000000.0)
        recomb_y.append(float(recomb_obj['rate']) / 100.0)

    data = {
        'x': x,
        'y': y,
        'qrs': q_rs,
        'q_alle': q_allele,
        'q_maf': q_maf,
        'prs': p_rs,
        'p_alle': p_allele,
        'p_maf': p_maf,
        'dist': dist,
        'r': r2_round,
        'd': d_prime_round,
        'alleles': corr_alleles,
        'regdb': regdb,
        'funct': funct,
        'size': size,
        'color': color
    }
    source = ColumnDataSource(data)

    proxy_plot.line(recomb_x, recomb_y, line_width=1, color="black", alpha=0.5)

    proxy_plot.circle(x='x',
                      y='y',
                      size='size',
                      color='color',
                      alpha=0.5,
                      source=source)

    hover = proxy_plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("Query Variant", "@qrs @q_alle"),
        ("Proxy Variant", "@prs @p_alle"),
        ("Distance (Mb)", "@dist"),
        ("MAF (Query,Proxy)", "@q_maf,@p_maf"),
        ("R" + sup_2, "@r"),
        ("D\'", "@d"),
        ("Correlated Alleles", "@alleles"),
        ("RegulomeDB", "@regdb"),
        ("Functional Class", "@funct"),
    ])

    proxy_plot.text(x,
                    y,
                    text=regdb,
                    alpha=1,
                    text_font_size="7pt",
                    text_baseline="middle",
                    text_align="center",
                    angle=0)

    if r2_d == "r2":
        proxy_plot.yaxis.axis_label = "R" + sup_2
    else:
        proxy_plot.yaxis.axis_label = "D\'"

    proxy_plot.extra_y_ranges = {"y2_axis": Range1d(start=-3, end=103)}
    proxy_plot.add_layout(
        LinearAxis(y_range_name="y2_axis",
                   axis_label="Combined Recombination Rate (cM/Mb)"), "right")

    # Rug Plot
    y2_ll = [-0.03] * len(x)
    y2_ul = [1.03] * len(x)
    yr_rug = Range1d(start=-0.03, end=1.03)

    data_rug = {
        'x': x,
        'y': y,
        'y2_ll': y2_ll,
        'y2_ul': y2_ul,
        'qrs': q_rs,
        'q_alle': q_allele,
        'q_maf': q_maf,
        'prs': p_rs,
        'p_alle': p_allele,
        'p_maf': p_maf,
        'dist': dist,
        'r': r2_round,
        'd': d_prime_round,
        'alleles': corr_alleles,
        'regdb': regdb,
        'funct': funct,
        'size': size,
        'color': color
    }
    source_rug = ColumnDataSource(data_rug)

    rug = figure(x_range=xr,
                 y_range=yr_rug,
                 border_fill_color='white',
                 y_axis_type=None,
                 title="",
                 min_border_top=2,
                 min_border_bottom=2,
                 min_border_left=60,
                 min_border_right=60,
                 h_symmetry=False,
                 v_symmetry=False,
                 plot_width=900,
                 plot_height=50,
                 tools="xpan,tap",
                 logo=None)

    rug.segment(x0='x',
                y0='y2_ll',
                x1='x',
                y1='y2_ul',
                source=source_rug,
                color='color',
                alpha=0.5,
                line_width=1)
    rug.toolbar_location = None

    if collapseTranscript == "false":
        # Gene Plot (All Transcripts)
        genes_file = tmp_dir + "genes_" + request + ".json"
        genes_raw = open(genes_file).readlines()

        genes_plot_start = []
        genes_plot_end = []
        genes_plot_y = []
        genes_plot_name = []
        exons_plot_x = []
        exons_plot_y = []
        exons_plot_w = []
        exons_plot_h = []
        exons_plot_name = []
        exons_plot_id = []
        exons_plot_exon = []
        lines = [0]
        gap = 80000
        tall = 0.75
        if genes_raw != None and len(genes_raw) > 0:
            for gene_raw_obj in genes_raw:
                gene_obj = json.loads(gene_raw_obj)
                bin = gene_obj["bin"]
                name_id = gene_obj["name"]
                chrom = gene_obj["chrom"]
                strand = gene_obj["strand"]
                txStart = gene_obj["txStart"]
                txEnd = gene_obj["txEnd"]
                cdsStart = gene_obj["cdsStart"]
                cdsEnd = gene_obj["cdsEnd"]
                exonCount = gene_obj["exonCount"]
                exonStarts = gene_obj["exonStarts"]
                exonEnds = gene_obj["exonEnds"]
                score = gene_obj["score"]
                name2 = gene_obj["name2"]
                cdsStartStat = gene_obj["cdsStartStat"]
                cdsEndStat = gene_obj["cdsEndStat"]
                exonFrames = gene_obj["exonFrames"]
                name = name2
                id = name_id
                e_start = exonStarts.split(",")
                e_end = exonEnds.split(",")

                # Determine Y Coordinate
                i = 0
                y_coord = None
                while y_coord == None:
                    if i > len(lines) - 1:
                        y_coord = i + 1
                        lines.append(int(txEnd))
                    elif int(txStart) > (gap + lines[i]):
                        y_coord = i + 1
                        lines[i] = int(txEnd)
                    else:
                        i += 1

                genes_plot_start.append(int(txStart) / 1000000.0)
                genes_plot_end.append(int(txEnd) / 1000000.0)
                genes_plot_y.append(y_coord)
                genes_plot_name.append(name + "  ")

                for i in range(len(e_start) - 1):
                    if strand == "+":
                        exon = i + 1
                    else:
                        exon = len(e_start) - 1 - i

                    width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                    x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                    exons_plot_x.append(x_coord)
                    exons_plot_y.append(y_coord)
                    exons_plot_w.append(width)
                    exons_plot_h.append(tall)
                    exons_plot_name.append(name)
                    exons_plot_id.append(id)
                    exons_plot_exon.append(exon)

        n_rows = len(lines)
        genes_plot_yn = [n_rows - x + 0.5 for x in genes_plot_y]
        exons_plot_yn = [n_rows - x + 0.5 for x in exons_plot_y]
        yr2 = Range1d(start=0, end=n_rows)

        data_gene_plot = {
            'exons_plot_x': exons_plot_x,
            'exons_plot_yn': exons_plot_yn,
            'exons_plot_w': exons_plot_w,
            'exons_plot_h': exons_plot_h,
            'exons_plot_name': exons_plot_name,
            'exons_plot_id': exons_plot_id,
            'exons_plot_exon': exons_plot_exon
        }

        source_gene_plot = ColumnDataSource(data_gene_plot)

        if len(lines) < 3:
            plot_h_pix = 250
        else:
            plot_h_pix = 250 + (len(lines) - 2) * 50

        gene_plot = figure(
            x_range=xr,
            y_range=yr2,
            border_fill_color='white',
            title="",
            min_border_top=2,
            min_border_bottom=2,
            min_border_left=60,
            min_border_right=60,
            h_symmetry=False,
            v_symmetry=False,
            plot_width=900,
            plot_height=plot_h_pix,
            tools="hover,tap,xpan,box_zoom,undo,redo,reset,previewsave",
            logo=None)

        gene_plot.segment(genes_plot_start,
                          genes_plot_yn,
                          genes_plot_end,
                          genes_plot_yn,
                          color="black",
                          alpha=1,
                          line_width=2)

        gene_plot.rect(x='exons_plot_x',
                       y='exons_plot_yn',
                       width='exons_plot_w',
                       height='exons_plot_h',
                       source=source_gene_plot,
                       fill_color="grey",
                       line_color="grey")
        gene_plot.xaxis.axis_label = "Chromosome " + snp_coord[
            'chromosome'] + " Coordinate (Mb)(" + genome_build_vars[
                genome_build]['title'] + ")"
        gene_plot.yaxis.axis_label = "Genes (All Transcripts)"
        gene_plot.ygrid.grid_line_color = None
        gene_plot.yaxis.axis_line_color = None
        gene_plot.yaxis.minor_tick_line_color = None
        gene_plot.yaxis.major_tick_line_color = None
        gene_plot.yaxis.major_label_text_color = None

        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_plot_name"),
            ("ID", "@exons_plot_id"),
            ("Exon", "@exons_plot_exon"),
        ])

        gene_plot.text(genes_plot_start,
                       genes_plot_yn,
                       text=genes_plot_name,
                       alpha=1,
                       text_font_size="7pt",
                       text_font_style="bold",
                       text_baseline="middle",
                       text_align="right",
                       angle=0)

        gene_plot.toolbar_location = "below"

    # Gene Plot (Collapsed)
    else:
        genes_c_file = tmp_dir + "genes_c_" + request + ".json"
        genes_c_raw = open(genes_c_file).readlines()

        genes_c_plot_start = []
        genes_c_plot_end = []
        genes_c_plot_y = []
        genes_c_plot_name = []
        exons_c_plot_x = []
        exons_c_plot_y = []
        exons_c_plot_w = []
        exons_c_plot_h = []
        exons_c_plot_name = []
        exons_c_plot_id = []
        message_c = ["Too many genes to plot."]
        lines_c = [0]
        gap = 80000
        tall = 0.75
        if genes_c_raw != None and len(genes_c_raw) > 0:
            for gene_c_raw_obj in genes_c_raw:
                gene_c_obj = json.loads(gene_c_raw_obj)
                chrom = gene_c_obj["chrom"]
                txStart = gene_c_obj["txStart"]
                txEnd = gene_c_obj["txEnd"]
                exonStarts = gene_c_obj["exonStarts"]
                exonEnds = gene_c_obj["exonEnds"]
                name2 = gene_c_obj["name2"]
                transcripts = gene_c_obj["transcripts"]
                name = name2
                e_start = exonStarts.split(",")
                e_end = exonEnds.split(",")
                e_transcripts = transcripts.split(",")

                # Determine Y Coordinate
                i = 0
                y_coord = None
                while y_coord == None:
                    if i > len(lines_c) - 1:
                        y_coord = i + 1
                        lines_c.append(int(txEnd))
                    elif int(txStart) > (gap + lines_c[i]):
                        y_coord = i + 1
                        lines_c[i] = int(txEnd)
                    else:
                        i += 1

                genes_c_plot_start.append(int(txStart) / 1000000.0)
                genes_c_plot_end.append(int(txEnd) / 1000000.0)
                genes_c_plot_y.append(y_coord)
                genes_c_plot_name.append(name + "  ")

                # for i in range(len(e_start)):
                for i in range(len(e_start) - 1):
                    width = (int(e_end[i]) - int(e_start[i])) / 1000000.0
                    x_coord = int(e_start[i]) / 1000000.0 + (width / 2)

                    exons_c_plot_x.append(x_coord)
                    exons_c_plot_y.append(y_coord)
                    exons_c_plot_w.append(width)
                    exons_c_plot_h.append(tall)
                    exons_c_plot_name.append(name)
                    exons_c_plot_id.append(e_transcripts[i].replace("-", ","))

        n_rows_c = len(lines_c)
        genes_c_plot_yn = [n_rows_c - x + 0.5 for x in genes_c_plot_y]
        exons_c_plot_yn = [n_rows_c - x + 0.5 for x in exons_c_plot_y]
        yr2_c = Range1d(start=0, end=n_rows_c)

        data_gene_c_plot = {
            'exons_c_plot_x': exons_c_plot_x,
            'exons_c_plot_yn': exons_c_plot_yn,
            'exons_c_plot_w': exons_c_plot_w,
            'exons_c_plot_h': exons_c_plot_h,
            'exons_c_plot_name': exons_c_plot_name,
            'exons_c_plot_id': exons_c_plot_id
        }
        source_gene_c_plot = ColumnDataSource(data_gene_c_plot)
        max_genes_c = 40
        # if len(lines_c) < 3 or len(genes_c_raw) > max_genes_c:
        if len(lines_c) < 3:
            plot_h_pix = 250
        else:
            plot_h_pix = 250 + (len(lines_c) - 2) * 50

        gene_plot = figure(
            min_border_top=2,
            min_border_bottom=0,
            min_border_left=100,
            min_border_right=5,
            x_range=xr,
            y_range=yr2_c,
            border_fill_color='white',
            title="",
            h_symmetry=False,
            v_symmetry=False,
            logo=None,
            plot_width=900,
            plot_height=plot_h_pix,
            tools=
            "hover,xpan,box_zoom,wheel_zoom,tap,undo,redo,reset,previewsave")

        # if len(genes_c_raw) <= max_genes_c:
        gene_plot.segment(genes_c_plot_start,
                          genes_c_plot_yn,
                          genes_c_plot_end,
                          genes_c_plot_yn,
                          color="black",
                          alpha=1,
                          line_width=2)
        gene_plot.rect(x='exons_c_plot_x',
                       y='exons_c_plot_yn',
                       width='exons_c_plot_w',
                       height='exons_c_plot_h',
                       source=source_gene_c_plot,
                       fill_color="grey",
                       line_color="grey")
        gene_plot.text(genes_c_plot_start,
                       genes_c_plot_yn,
                       text=genes_c_plot_name,
                       alpha=1,
                       text_font_size="7pt",
                       text_font_style="bold",
                       text_baseline="middle",
                       text_align="right",
                       angle=0)
        hover = gene_plot.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ("Gene", "@exons_c_plot_name"),
            ("Transcript IDs", "@exons_c_plot_id"),
        ])

        # else:
        # 	x_coord_text = coord1/1000000.0 + (coord2/1000000.0 - coord1/1000000.0) / 2.0
        # 	gene_c_plot.text(x_coord_text, n_rows_c / 2.0, text=message_c, alpha=1,
        # 				   text_font_size="12pt", text_font_style="bold", text_baseline="middle", text_align="center", angle=0)

        gene_plot.xaxis.axis_label = "Chromosome " + snp_coord[
            'chromosome'] + " Coordinate (Mb)(" + genome_build_vars[
                genome_build]['title'] + ")"
        gene_plot.yaxis.axis_label = "Genes (Transcripts Collapsed)"
        gene_plot.ygrid.grid_line_color = None
        gene_plot.yaxis.axis_line_color = None
        gene_plot.yaxis.minor_tick_line_color = None
        gene_plot.yaxis.major_tick_line_color = None
        gene_plot.yaxis.major_label_text_color = None

        gene_plot.toolbar_location = "below"

    # Change output backend to SVG temporarily for headless export
    # Will be changed back to canvas in LDlink.js
    proxy_plot.output_backend = "svg"
    rug.output_backend = "svg"
    gene_plot.output_backend = "svg"
    export_svgs(proxy_plot,
                filename=tmp_dir + "proxy_plot_1_" + request + ".svg")
    export_svgs(gene_plot,
                filename=tmp_dir + "gene_plot_1_" + request + ".svg")

    # 1 pixel = 0.0264583333 cm
    svg_height = str(20.00 + (0.0264583333 * plot_h_pix)) + "cm"
    svg_height_scaled = str(100.00 + (0.1322916665 * plot_h_pix)) + "cm"

    # Concatenate svgs
    sg.Figure("24.59cm", svg_height,
              sg.SVG(tmp_dir + "proxy_plot_1_" + request + ".svg"),
              sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").move(
                  0, 630)).save(tmp_dir + "proxy_plot_" + request + ".svg")

    sg.Figure(
        "122.95cm", svg_height_scaled,
        sg.SVG(tmp_dir + "proxy_plot_1_" + request + ".svg").scale(5),
        sg.SVG(tmp_dir + "gene_plot_1_" + request + ".svg").scale(5).move(
            0, 3150)).save(tmp_dir + "proxy_plot_scaled_" + request + ".svg")

    # Export to PDF
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir + "proxy_plot_" +
                    request + ".svg " + tmp_dir + "proxy_plot_" + request +
                    ".pdf",
                    shell=True)
    # Export to PNG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "proxy_plot_scaled_" + request + ".svg " + tmp_dir +
                    "proxy_plot_" + request + ".png",
                    shell=True)
    # Export to JPEG
    subprocess.call("phantomjs ./rasterize.js " + tmp_dir +
                    "proxy_plot_scaled_" + request + ".svg " + tmp_dir +
                    "proxy_plot_" + request + ".jpeg",
                    shell=True)
    # Remove individual SVG files after they are combined
    subprocess.call("rm " + tmp_dir + "proxy_plot_1_" + request + ".svg",
                    shell=True)
    subprocess.call("rm " + tmp_dir + "gene_plot_1_" + request + ".svg",
                    shell=True)
    # Remove scaled SVG file after it is converted to png and jpeg
    subprocess.call("rm " + tmp_dir + "proxy_plot_scaled_" + request + ".svg",
                    shell=True)

    reset_output()

    # Remove temporary files
    subprocess.call("rm " + tmp_dir + "pops_" + request + ".txt", shell=True)
    subprocess.call("rm " + tmp_dir + "*" + request + "*.vcf", shell=True)
    subprocess.call("rm " + tmp_dir + "genes_*" + request + "*.json",
                    shell=True)
    subprocess.call("rm " + tmp_dir + "recomb_" + request + ".txt", shell=True)

    # Return plot output
    return None
Ejemplo n.º 21
0
    def add_title(self, fig, title, y_pos=-0.2, w=None):
        """
        `fig`: a plotly/svgutils Figure object, or the path of a png file.
        `title`: figure title.
        `w`: output image width in px.
        """
        if isinstance(fig, str):
            if not os.path.exists(fig):
                print('File %s does exist!'%(fig))
                return
            if not os.path.splitext(os.path.abspath(fig))[-1]=='.png':
                print('Only png image supported.')
                return

            im = Image.open(fig)
            if w:
                _w = w
            else:
                _w = self.output_width
            _scalar = _w * 1.0 / im.width
            
            svg_fig = sc.Figure(_w, ceil(_scalar * im.height),
                                sc.Image(
                                    im.width,
                                    im.height,
                                    fig
                                ).scale(_scalar)
            )
            
            fig = svg_fig

        assert isinstance(fig, go.Figure) or isinstance(fig, sc.Figure)

        # if input a plotly Figure object, convert it into a svg file first
        title_txt = '%s%s  '%(self.prefix, self.current_num) + title
        if isinstance(fig, go.Figure):
            # compute image size
            if w:
                _w = w
            else:
                _w = self.output_width
            if not fig.layout.width is None:
                _scalar = _w * 1.0 / fig.layout.width
            else:
                # default width is 700px
                _scalar = _w * 1.0 / 700

            if not fig.layout.height is None:
                _h = fig.layout.height * _scalar
            else:
                # default height is 450px
                _h = 450.0 * _scalar
           
            title_annotation = go.layout.Annotation(
                            xref = 'paper',
                            yref = 'paper',
                            x = 0.5,
                            y = y_pos,
                            xanchor = 'center',
                            yanchor = 'top',
                            text = title_txt,
                            font = dict(
                                family = self.font_family,
                                size = self.font_size,
                                color = "#000000",
                            ),
                            showarrow = False,
            )

            fig.update_layout(
                width = _w,
                height = _h,
                annotations = list(fig.layout['annotations']) + [title_annotation],
            )
 
            # increasing counter number
            self.current_num += 1

            return fig
        else:
            # compute image size
            if w:
                _w = w
            else:
                _w = self.output_width
            _scalar = _w * 1.0 / fig.width.value
            _h = fig.height.value * _scalar + 25

            if y_pos==-0.2:
                text_y = _h - 5
            else:
                assert y_pos<=1 and y_pos>=0
                text_y = int(_h*(1-y_pos))
            
            # if w < default output width, add margin
            if _w < self.output_width:
                _outw = self.output_width
                _move_x = int((_outw - _w) / 2)
            else:
                _outw = _w
                _move_x = int((_outw - _w) / 2)

            # add title
            new_figure = sc.Figure(_outw, _h,
                                   fig.scale(_scalar).move(_move_x, 0),
                                   sc.Text(title_txt,
                                           _outw / 2,
                                           text_y,
                                           anchor='middle',
                                           size=self.font_size,
                                           font=self.font_family,
                                           )
            )
        
            # increasing counter number
            self.current_num += 1

            return new_figure