Ejemplo n.º 1
0
def QQPlot(arguments,pv,unique_pv,fname):
    font_size = 18
    mpl.rcParams['font.family']="sans-serif"
    mpl.rcParams['font.sans-serif']="Arial"
    mpl.rcParams['font.size']=font_size
    #mpl.rcParams['figure.dpi'] = 300
    mpl.rcParams['font.weight']='medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'
    if arguments.ignore!=None:
        if arguments.ignore in fname:
            return 
    
    if arguments.distinct:
        pv = unique_pv

    pl.figure(figsize=(5,5))
    pv_uni = (sp.arange(1.0/float(pv.shape[0]),1,1.0/float(pv.shape[0]+1)))
    pl.plot(-sp.log10(pv_uni),-sp.log10(sp.sort(pv_uni)),'b--')
    pl.ylim(0,(-sp.log10(pv[:])).max()+1)
    pl.plot(-sp.log10(pv_uni),-sp.log10(sp.sort(pv[:],axis=0)),'.',color="#F68E55",markersize=12,markeredgewidth=0,alpha=1)
    #plot theoretical expectations
    if arguments.estpv:
        datapoints=10**(sp.arange(sp.log10(0.5),sp.log10(pv.shape[0]-0.5)+0.1,0.1))
        beta_alpha=sp.zeros(datapoints.shape[0])
        beta_nalpha=sp.zeros(datapoints.shape[0])
        beta_tmp=sp.zeros(datapoints.shape[0])
        for n in xrange(datapoints.shape[0]):
            m=datapoints[n]
            beta_tmp[n]=stats.beta.ppf(0.5,m,pv.shape[0]-m)
            beta_alpha[n]=stats.beta.ppf(0.05,m,pv.shape[0]-m)
            beta_nalpha[n]=stats.beta.ppf(1-0.05,m,pv.shape[0]-m)
        estimated_pvals=datapoints/pv.shape[0]
        lower_bound = -sp.log10(estimated_pvals-(beta_tmp-beta_alpha))
        upper_bound = -sp.log10(estimated_pvals+(beta_nalpha-beta_tmp))
        pl.fill_between(-sp.log10(estimated_pvals),lower_bound,upper_bound,color='#00BFF3',alpha=0.4,linewidth=0)
    if arguments.title:
        pl.title("Phenotype: %s"%(fname))
    pl.xlabel('Expected $-log10(p-value)$')
    pl.ylabel('Observed $-log10(p-value)$')
    if arguments.gc:
        gc = sp.median(stats.chi2.isf(pv,1))/0.456
        pl.text(4,1,"$\hat \lambda=%.2f$"%(gc))
    remove_border()
    pl.subplots_adjust(left=0.14,bottom=0.13,right=0.97,top=0.95,wspace=0.45)
    pl.savefig(os.path.join(arguments.out,'qqplot_' + fname + '.' + arguments.iformat) )
    pl.close()
Ejemplo n.º 2
0
def LDPlot(arguments, identifiers, encoded, maf, pv, unique_pv, positions,
           chromosomes, fname, pathogenicity_map):
    font_size = 16
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    #mpl.rcParams['figure.dpi'] = 150
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'

    if arguments.ignore != None:
        if arguments.ignore in fname:
            return

    if not arguments.sql_gene == None:
        sqlite = sqlite3.connect(arguments.sql_gene)
        sqlite_cursor = sqlite.cursor()
    else:
        sqlite = None

    if arguments.nr_hypothesis == -1:
        if arguments.distinct:
            bf_threshold = 0.05 / unique_pv.shape[0]
        else:
            bf_threshold = 0.05 / pv.shape[0]
    else:
        bf_threshold = 0.05 / arguments.nr_hypothesis

    snp_distance = arguments.distance
    r2_measure = arguments.r2_measure

    pl.ion()
    pl.figure(figsize=(20, 4))

    color_list = [
        '#F26C4F', '#F68E55', '#7CC576', '#00BFF3', '#605CA8', '#F06EA9',
        '#F26C4F', '#F68E55'
    ]

    #select different SNP
    if arguments.selected_snp != '-1':
        ind = sp.where(identifiers == arguments.selected_snp)[0]
        if ind.shape[0] == 0:
            print "\nSNP " + arguments.selected_snp + " not found in dataset!"
            print "Please select a different SNP identifier!\n"
            quit()
        else:
            chrom_list = sp.array([arguments.selected_snp.split("_")[0]])
            __pos = sp.array([int(arguments.selected_snp.split("_")[1])])
    else:
        chrom_list = sp.unique(chromosomes)

    for i, chrom in enumerate(chrom_list):
        idx = chromosomes == chrom
        _pos = positions[idx]
        _chrs = chromosomes[idx]
        _pv = pv[idx]

        if arguments.selected_snp == '-1':
            idx = sp.where(_pv <= bf_threshold)[0]
            __pos = _pos[idx]
            __chrs = _chrs[idx]
            __pv = _pv[idx]
            __pv = -sp.log10(__pv)
        else:
            idx = sp.where(__pos[0] == _pos)[0]
            __pv = _pv[idx]
            __pv = -sp.log10(__pv)
        #plot LD
        cnorm = mcol.Normalize(vmin=0.0, vmax=1)
        cpick = cm.ScalarMappable(norm=cnorm, cmap=pl.get_cmap("jet"))
        cpick.set_array([])
        ind = sp.argsort(_pos)
        _pos = _pos[ind]
        _pv = _pv[ind]
        for k, pp in enumerate(__pos):
            fig = pl.figure(figsize=(12, 4))
            if sqlite == None:
                ax1 = pl.subplot2grid((5, 5), (0, 0), colspan=5, rowspan=4)
            else:
                ax1 = pl.subplot2grid((6, 5), (0, 0), colspan=5, rowspan=4)
            ax1.axhline(-sp.log10(bf_threshold),
                        color='#F68E55',
                        linestyle='--')
            marker = '.'
            if str(chrom) + "_" + str(pp) in pathogenicity_map:
                if pathogenicity_map[str(chrom) + "_" +
                                     str(pp)].prediction == "DELETERIOUS":
                    marker = '^'
                else:
                    marker = 'v'
            ax1.plot(
                pp,
                __pv[k],
                marker,
                color="#b000ff",
                alpha=0.9,
                markersize=10,
            )
            if pp - snp_distance > 0 and pp + snp_distance <= _pos.max():
                ranges = sp.where((_pos >= pp - snp_distance)
                                  & (_pos <= pp + snp_distance))[0]
            elif pp - snp_distance < 0 and pp + snp_distance <= _pos.max():
                ranges = sp.where(_pos <= pp + snp_distance)[0]
            elif pp - snp_distance > 0 and pp + snp_distance > _pos.max():
                ranges = sp.where(_pos >= pp - snp_distance)[0]

            pp_idx = sp.where(pp == _pos)[0][0]
            vline_map = {}
            rr_color_list = []
            maf_list = []
            xlabels = [_pos[ranges][0], __pos[0], _pos[ranges][-1]]
            idx = sp.where(str(chrom) + "_" + str(pp) == identifiers)[0]
            for l, sra in enumerate(ranges):
                sind = sp.where(
                    str(chrom) + "_" + str(_pos[sra]) == identifiers)[0]
                maf_list.append(maf[sind])

                if r2_measure == "excoffier_slatkin":
                    rr = ld.esem_r(
                        sp.array(encoded[:, sind].flatten(), dtype="int"),
                        sp.array(encoded[:, idx].flatten(), dtype="int"))**2
                elif r2_measure == "roger_huff":
                    rr = ld.get_r(
                        sp.array(encoded[:, sind].flatten(), dtype="int"),
                        sp.array(encoded[:, idx].flatten(), dtype="int"))**2
                elif r2_measure == "pearson_r2":
                    rr = stats.pearsonr(encoded[:, sind].flatten(),
                                        encoded[:, idx].flatten())[0]**2
                rr_color_list.append(cpick.to_rgba(rr))
                if _pos[sra] == pp:
                    ax1.vlines(pp,
                               0,
                               __pv[k],
                               color='#b000ff',
                               linestyle='--',
                               alpha=0.8,
                               linewidth=0.5)
                    vline_map[pp] = "#b000ff"
                    continue
                marker = '.'
                if str(chrom) + "_" + str(_pos[sra]) in pathogenicity_map:
                    if pathogenicity_map[str(chrom) + "_" + str(
                            _pos[sra])].prediction == "DELETERIOUS":
                        marker = '^'
                    else:
                        marker = 'v'
                if rr >= 0.7:
                    ax1.plot(_pos[sra],
                             -sp.log10(_pv[sra]),
                             marker,
                             color=cpick.to_rgba(rr),
                             alpha=0.9,
                             markeredgecolor='#DDDDDD',
                             markersize=10)  #8
                elif rr >= 0.5:
                    ax1.plot(_pos[sra],
                             -sp.log10(_pv[sra]),
                             marker,
                             color=cpick.to_rgba(rr),
                             alpha=0.9,
                             markeredgecolor='#DDDDDD',
                             markersize=9)  #7
                elif rr >= 0.3:
                    ax1.plot(_pos[sra],
                             -sp.log10(_pv[sra]),
                             marker,
                             color=cpick.to_rgba(rr),
                             alpha=0.9,
                             markeredgecolor='#DDDDDD',
                             markersize=8)  #6
                else:
                    ax1.plot(_pos[sra],
                             -sp.log10(_pv[sra]),
                             marker,
                             color=cpick.to_rgba(rr),
                             alpha=0.4,
                             markeredgecolor='#DDDDDD',
                             markersize=7)  #4
                if _pv[sra] <= bf_threshold:
                    ax1.vlines(_pos[sra],
                               0,
                               -sp.log10(_pv[sra]),
                               color=cpick.to_rgba(rr),
                               linestyle='--',
                               alpha=0.8,
                               linewidth=0.5)
                    vline_map[_pos[sra]] = cpick.to_rgba(rr)
            remove_border()
            ax1.set_ylabel("$-log10(p-value)$")
            ax1.set_ylim(
                0,
                sp.maximum(
                    -sp.log10(bf_threshold) + 1,
                    sp.maximum((-sp.log10(_pv[ranges])).max() + 1,
                               -sp.log10(_pv[pp_idx]) + 1)))
            if arguments.title:
                ax1.set_title("Phenotype: %s, SNP: %d" % (fname, pp))
            ax1.set_xticks([])
            ax1.set_yticks(
                sp.arange(
                    1,
                    sp.ceil(
                        sp.maximum(
                            -sp.log10(bf_threshold) + 1,
                            sp.maximum((-sp.log10(_pv[ranges])).max() + 1,
                                       -sp.log10(_pv[pp_idx]) + 1))), 3))
            ax1.set_xlim(_pos[ranges[0]] - snp_distance * 0.01,
                         _pos[ranges[-1]] + snp_distance * 0.01)
            ax1.yaxis.grid()
            if len(pathogenicity_map) > 0:
                deleterious = pl.Line2D(range(1),
                                        range(1),
                                        marker='^',
                                        color="white",
                                        linestyle="None")
                benign = pl.Line2D(range(1),
                                   range(1),
                                   marker='v',
                                   color="white",
                                   linestyle="None")
                leg = pl.legend([benign, deleterious], [
                    'Benign Missense Mutation', 'Deleterious Missense Mutation'
                ],
                                frameon=True,
                                scatterpoints=1,
                                prop={'size': 12},
                                fancybox=True,
                                bbox_to_anchor=(1, 1.2),
                                numpoints=1,
                                ncol=2)
                leg.get_frame().set_alpha(0.5)
                leg.get_frame().set_linewidth(0.2)
                leg.get_frame().set_facecolor("#DDDDDD")

            if sqlite == None:
                ax3 = pl.subplot2grid((5, 5), (4, 0), colspan=5)
                ax3.set_xlabel("Genomic positions on chromosome: " +
                               str(chrom))
            else:
                ax3 = pl.subplot2grid((6, 5), (4, 0), colspan=5)
            ax3.set_ylim(0, 0.6)
            ax3.set_yticks([0.25, 0.5])
            ax3.yaxis.grid()
            ax3.set_ylabel("MAF")
            ax3.set_xticks([])
            remove_border(top=True)
            ax3.set_xlim(_pos[ranges[0]] - snp_distance * 0.01,
                         _pos[ranges[-1]] + snp_distance * 0.01)
            for key in vline_map:
                ax3.axvline(key,
                            color=vline_map[key],
                            linestyle='--',
                            alpha=0.8,
                            linewidth=0.5)
            for i in xrange(ranges.shape[0]):
                ax3.plot(_pos[ranges[i]],
                         maf_list[i],
                         'x',
                         color=rr_color_list[i])

            if sqlite != None:
                ax2 = pl.subplot2grid((6, 5), (5, 0), colspan=5)
                remove_border(top=True)
                ax2.set_xlabel("Genomic positions on chromosome: " +
                               str(chrom))
                ax2.set_yticks([])
                ax2.set_xlim(_pos[ranges[0]] - snp_distance * 0.01,
                             _pos[ranges[-1]] + snp_distance * 0.01)
                for key in vline_map:
                    ax2.axvline(key,
                                color=vline_map[key],
                                linestyle='--',
                                alpha=0.8,
                                linewidth=0.5)

                sqlite_cursor.execute(
                    "SELECT * FROM geneannotation WHERE chromosome_id=? AND annotation_end >=? AND annotation_start<=?",
                    (str(chrom), int(_pos[ranges[0]]), int(_pos[ranges[-1]])))
                annotations = sqlite_cursor.fetchall()
                for g, annotation in enumerate(annotations):
                    alpha = 0.6
                    start = int(annotation[3])
                    length = int(annotation[4]) - int(annotation[3])
                    head_width = 0.25
                    head_length = length * 0.15
                    width = 0.1
                    name = annotation[7].replace("Contig20-", "")

                    if annotation[5] == "+":
                        shape = "right"
                        arrow_params = {
                            'length_includes_head': True,
                            'shape': shape,
                            'head_starts_at_zero': True
                        }
                        if g % 2 == 0:
                            y_pos = 0.6
                        else:
                            y_pos = 0.5
                        y_text = y_pos + 2 * width
                        ax2.arrow(start,
                                  y_pos,
                                  length,
                                  0,
                                  head_width=head_width,
                                  head_length=head_length,
                                  fc=color_list[0],
                                  ec=color_list[0],
                                  alpha=alpha,
                                  width=width,
                                  **arrow_params)
                    else:
                        shape = "left"
                        arrow_params = {
                            'length_includes_head': True,
                            'shape': shape,
                            'head_starts_at_zero': True
                        }
                        if g % 2 == 0:
                            y_pos = 0.15
                        else:
                            y_pos = 0.35
                        y_text = y_pos - 2 * width
                        ax2.arrow(start + length,
                                  y_pos,
                                  -length,
                                  0,
                                  head_width=head_width,
                                  head_length=head_length,
                                  fc=color_list[3],
                                  ec=color_list[3],
                                  alpha=alpha,
                                  width=width,
                                  **arrow_params)
                    ax2.text(start + length / 2.0,
                             y_text,
                             name,
                             size=7,
                             ha='center',
                             va='center',
                             color="k")

            pl.xticks(xlabels, xlabels)
            #pl.gca().get_xaxis().get_major_formatter().set_useOffset(False)
            if not sqlite == None:
                cax = fig.add_axes([0.93, 0.4, 0.01, 0.5])
                pl.colorbar(cpick, label="SNP r^2", cax=cax)
                pl.subplots_adjust(left=0.07,
                                   bottom=0.15,
                                   right=0.92,
                                   top=0.9,
                                   wspace=0,
                                   hspace=0)
            else:
                pl.subplots_adjust(left=0.07,
                                   bottom=0.15,
                                   right=0.99,
                                   top=0.9,
                                   wspace=0,
                                   hspace=0)
            if not arguments.title:
                pl.subplots_adjust(top=0.9)

            pl.savefig(
                os.path.join(
                    arguments.out, 'ld_plot_chrom_' + str(chrom) + "_pos_" +
                    str(pp) + "_" + fname + '.' + arguments.iformat))
            pl.close()
Ejemplo n.º 3
0
def QQPlot(arguments, pv, unique_pv, fname):
    font_size = 18
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    #mpl.rcParams['figure.dpi'] = 300
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'
    if arguments.ignore != None:
        if arguments.ignore in fname:
            return

    if arguments.distinct:
        pv = unique_pv

    pl.figure(figsize=(5, 5))
    pv_uni = (sp.arange(1.0 / float(pv.shape[0]), 1,
                        1.0 / float(pv.shape[0] + 1)))
    pl.plot(-sp.log10(pv_uni), -sp.log10(sp.sort(pv_uni)), 'b--')
    pl.ylim(0, (-sp.log10(pv[:])).max() + 1)
    pl.plot(-sp.log10(pv_uni),
            -sp.log10(sp.sort(pv[:], axis=0)),
            '.',
            color="#F68E55",
            markersize=12,
            markeredgewidth=0,
            alpha=1)
    #plot theoretical expectations
    if arguments.estpv:
        datapoints = 10**(sp.arange(sp.log10(0.5),
                                    sp.log10(pv.shape[0] - 0.5) + 0.1, 0.1))
        beta_alpha = sp.zeros(datapoints.shape[0])
        beta_nalpha = sp.zeros(datapoints.shape[0])
        beta_tmp = sp.zeros(datapoints.shape[0])
        for n in xrange(datapoints.shape[0]):
            m = datapoints[n]
            beta_tmp[n] = stats.beta.ppf(0.5, m, pv.shape[0] - m)
            beta_alpha[n] = stats.beta.ppf(0.05, m, pv.shape[0] - m)
            beta_nalpha[n] = stats.beta.ppf(1 - 0.05, m, pv.shape[0] - m)
        estimated_pvals = datapoints / pv.shape[0]
        lower_bound = -sp.log10(estimated_pvals - (beta_tmp - beta_alpha))
        upper_bound = -sp.log10(estimated_pvals + (beta_nalpha - beta_tmp))
        pl.fill_between(-sp.log10(estimated_pvals),
                        lower_bound,
                        upper_bound,
                        color='#00BFF3',
                        alpha=0.4,
                        linewidth=0)
    if arguments.title:
        pl.title("Phenotype: %s" % (fname))
    pl.xlabel('Expected $-log10(p-value)$')
    pl.ylabel('Observed $-log10(p-value)$')
    if arguments.gc:
        gc = sp.median(stats.chi2.isf(pv, 1)) / 0.456
        pl.text(4, 1, "$\hat \lambda=%.2f$" % (gc))
    remove_border()
    pl.subplots_adjust(left=0.14,
                       bottom=0.13,
                       right=0.97,
                       top=0.95,
                       wspace=0.45)
    pl.savefig(
        os.path.join(arguments.out,
                     'qqplot_' + fname + '.' + arguments.iformat))
    pl.close()
Ejemplo n.º 4
0
def ManhattanPlot(arguments, pv, positions, chromosomes, hashs, unique_pv,
                  fname):
    font_size = 14
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    #mpl.rcParams['figure.dpi'] = 300
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'

    if arguments.ignore != None:
        if arguments.ignore in fname:
            return

    chrom_list = sp.unique(chromosomes)
    unsnps = arguments.distinct

    pl.ion()
    pl.figure(figsize=(20, 3))

    if arguments.nr_hypothesis == -1:
        if unsnps:
            bf_threshold = 0.05 / unique_pv.shape[0]
        else:
            bf_threshold = 0.05 / pv.shape[0]
    else:
        bf_threshold = 0.05 / arguments.nr_hypothesis

    current_pos = 0

    max_y = (-sp.log10(pv[:])).max()
    if max_y < -sp.log10(bf_threshold):
        max_y = -sp.log10(bf_threshold)
    max_y += 1

    split_list = []
    xtick_list = []
    pl.axhline(-sp.log10(bf_threshold), color='#F68E55', linestyle='--')
    if not arguments.nr_hypothesis2 == -1:
        pl.axhline(-sp.log10(0.05 / arguments.nr_hypothesis2),
                   color='#00BFF3',
                   linestyle='--')
        factor = sp.around(
            float(arguments.nr_hypothesis2) / float(arguments.nr_hypothesis))
        leg = pl.legend(['Bonferroni', 'Bonferroni ' + str(factor) + 'x'],
                        ncol=2,
                        fancybox=True,
                        prop={'size': 12},
                        bbox_to_anchor=(1, 1.05))
        leg.get_frame().set_alpha(0.8)
        leg.get_frame().set_linewidth(0.2)

    for i, chrom in enumerate(chrom_list):
        idx = chromosomes == chrom
        _pos = positions[idx]
        _chrs = chromosomes[idx]
        _pv = pv[idx]
        idx = sp.where(_pv > bf_threshold)[0]
        __pos = _pos[idx]
        __chrs = _chrs[idx]
        __pv = _pv[idx]
        __pv = -sp.log10(__pv)

        pl.plot(
            __pos + current_pos,
            __pv,
            '.',
            alpha=0.3,
            color="#A0A0A0",
            markersize=5,
        )

        idx = sp.where(_pv <= bf_threshold)[0]
        __pos = _pos[idx]
        __chrs = _chrs[idx]
        __pv = _pv[idx]
        __pv = -sp.log10(__pv)

        pl.plot(
            __pos + current_pos,
            __pv,
            '.',
            color="#b000ff",
            alpha=0.9,
            markersize=8,
        )

        xtick_list.append(float(current_pos) + float(_pos.max()) / 2.0)
        current_pos = _pos.max() + current_pos
        split_list.append(current_pos)
        pl.xlim(0, current_pos)
        pl.ylabel("$-log10(p-value)$")

    pl.ylim(0, max_y)

    for i in xrange(len(split_list)):
        if i % 2 == 0:
            try:
                pl.fill_between([split_list[i], split_list[i + 1]],
                                0,
                                max_y,
                                color="#D7D7D7",
                                linewidth=0,
                                alpha=0.5)
            except:
                pass
    s_list = split_list[:-1]

    for split in s_list:
        pl.axvline(split, color='k', linestyle='--')

    pl.xticks(xtick_list, chrom_list)
    pl.subplots_adjust(left=0.05,
                       bottom=0.09,
                       right=0.99,
                       top=0.9,
                       wspace=0.45)
    remove_border()
    if unsnps:
        pv = unique_pv

    if arguments.title:
        if arguments.gc:
            gc = sp.median(stats.chi2.isf(pv, 1)) / 0.456
            pl.title("Phenotype: %s, genomic control $\hat \lambda=%0.2f$" %
                     (fname, gc))
        else:
            pl.title("Phenotype: %s" % (fname))
    pl.savefig(
        os.path.join(arguments.out,
                     'manhattan_' + fname + '.' + arguments.iformat))
    pl.close()
Ejemplo n.º 5
0
def LDPlot(arguments,identifiers,encoded,maf,pv,unique_pv,positions,chromosomes,fname,pathogenicity_map):
    font_size = 16
    mpl.rcParams['font.family']="sans-serif"
    mpl.rcParams['font.sans-serif']="Arial"
    mpl.rcParams['font.size']=font_size
    #mpl.rcParams['figure.dpi'] = 150
    mpl.rcParams['font.weight']='medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'

    if arguments.ignore!=None:
        if arguments.ignore in fname:
            return

    if not arguments.sql_gene==None:
        sqlite = sqlite3.connect(arguments.sql_gene)
        sqlite_cursor = sqlite.cursor()
    else:
        sqlite = None

    if arguments.nr_hypothesis==-1:
        if arguments.distinct:
            bf_threshold = 0.05/unique_pv.shape[0]
        else:
            bf_threshold = 0.05/pv.shape[0]
    else:
        bf_threshold = 0.05/arguments.nr_hypothesis
    
    snp_distance = arguments.distance
    r2_measure = arguments.r2_measure

    pl.ion()
    pl.figure(figsize=(20,4))

    color_list = ['#F26C4F','#F68E55','#7CC576','#00BFF3','#605CA8','#F06EA9','#F26C4F','#F68E55']
    
    #select different SNP
    if arguments.selected_snp!='-1':
        ind = sp.where(identifiers==arguments.selected_snp)[0]
        if ind.shape[0]==0:
            print "\nSNP " + arguments.selected_snp + " not found in dataset!"
            print "Please select a different SNP identifier!\n"
            quit()
        else:
            chrom_list = sp.array([arguments.selected_snp.split("_")[0]])
            __pos = sp.array([int(arguments.selected_snp.split("_")[1])])
    else:
        chrom_list = sp.unique(chromosomes)

    for i,chrom in enumerate(chrom_list):
        idx = chromosomes==chrom
        _pos = positions[idx]
        _chrs = chromosomes[idx]
        _pv = pv[idx]

        if arguments.selected_snp=='-1':
            idx = sp.where(_pv<=bf_threshold)[0]
            __pos = _pos[idx]
            __chrs = _chrs[idx]
            __pv = _pv[idx]
            __pv = -sp.log10(__pv)
        else:
            idx = sp.where(__pos[0]==_pos)[0]
            __pv = _pv[idx]
            __pv = -sp.log10(__pv)
        #plot LD
        cnorm = mcol.Normalize(vmin=0.0,vmax=1)
        cpick = cm.ScalarMappable(norm=cnorm,cmap=pl.get_cmap("jet"))
        cpick.set_array([])
        ind = sp.argsort(_pos)
        _pos = _pos[ind]
        _pv = _pv[ind]
        for k,pp in enumerate(__pos):
            fig = pl.figure(figsize=(12,4))
            if sqlite==None:
                ax1 = pl.subplot2grid((5,5),(0,0),colspan=5,rowspan=4)
            else:
                ax1 = pl.subplot2grid((6,5),(0,0),colspan=5,rowspan=4)
            ax1.axhline(-sp.log10(bf_threshold),color='#F68E55',linestyle='--')
            marker = '.'
            if str(chrom) + "_" + str(pp) in pathogenicity_map:
                if pathogenicity_map[str(chrom) + "_" + str(pp)].prediction == "DELETERIOUS":
                    marker = '^' 
                else:
                    marker = 'v'
            ax1.plot(pp,__pv[k],marker,
                color="#b000ff",
                alpha=0.9,
                markersize=10,
                )
            if pp-snp_distance > 0 and pp+snp_distance<=_pos.max():
                ranges = sp.where((_pos>=pp-snp_distance) & (_pos<=pp+snp_distance))[0]
            elif pp-snp_distance < 0 and pp+snp_distance<=_pos.max():
                ranges = sp.where(_pos<=pp+snp_distance)[0]
            elif pp-snp_distance > 0 and pp+snp_distance>_pos.max():
                ranges = sp.where(_pos>=pp-snp_distance)[0]
            
            pp_idx = sp.where(pp==_pos)[0][0]
            vline_map = {}
            rr_color_list = []
            maf_list = []
            xlabels = [_pos[ranges][0],__pos[0],_pos[ranges][-1]]
            idx = sp.where(str(chrom) + "_" + str(pp)==identifiers)[0]
            for l,sra in enumerate(ranges):
                sind = sp.where(str(chrom) + "_" + str(_pos[sra])==identifiers)[0]
                maf_list.append(maf[sind])

                if r2_measure=="excoffier_slatkin":
                    rr = ld.esem_r(sp.array(encoded[:,sind].flatten(),dtype="int"),sp.array(encoded[:,idx].flatten(),dtype="int"))**2
                elif r2_measure=="roger_huff":
                    rr = ld.get_r(sp.array(encoded[:,sind].flatten(),dtype="int"),sp.array(encoded[:,idx].flatten(),dtype="int"))**2
                elif r2_measure=="pearson_r2":
                    rr = stats.pearsonr(encoded[:,sind].flatten(),encoded[:,idx].flatten())[0]**2
                rr_color_list.append(cpick.to_rgba(rr))
                if _pos[sra]==pp:
                    ax1.vlines(pp, 0, __pv[k],color='#b000ff',linestyle='--',alpha=0.8,linewidth=0.5)
                    vline_map[pp] = "#b000ff"
                    continue
                marker = '.'
                if str(chrom) + "_" + str(_pos[sra]) in pathogenicity_map:
                    if pathogenicity_map[str(chrom) + "_" + str(_pos[sra])].prediction == "DELETERIOUS":
                        marker = '^' 
                    else:
                        marker = 'v'
                if rr>=0.7:
                    ax1.plot(_pos[sra],-sp.log10(_pv[sra]),marker,color=cpick.to_rgba(rr),alpha=0.9,markeredgecolor='#DDDDDD',markersize=10)#8
                elif rr>=0.5:
                    ax1.plot(_pos[sra],-sp.log10(_pv[sra]),marker,color=cpick.to_rgba(rr),alpha=0.9,markeredgecolor='#DDDDDD',markersize=9)#7
                elif rr>=0.3:
                    ax1.plot(_pos[sra],-sp.log10(_pv[sra]),marker,color=cpick.to_rgba(rr),alpha=0.9,markeredgecolor='#DDDDDD',markersize=8)#6
                else:
                    ax1.plot(_pos[sra],-sp.log10(_pv[sra]),marker,color=cpick.to_rgba(rr),alpha=0.4,markeredgecolor='#DDDDDD',markersize=7)#4
                if _pv[sra]<=bf_threshold:
                    ax1.vlines(_pos[sra], 0, -sp.log10(_pv[sra]),color=cpick.to_rgba(rr),linestyle='--',alpha=0.8,linewidth=0.5)
                    vline_map[_pos[sra]] = cpick.to_rgba(rr)
            remove_border()
            ax1.set_ylabel("$-log10(p-value)$")
            ax1.set_ylim(0,sp.maximum(-sp.log10(bf_threshold)+1,sp.maximum((-sp.log10(_pv[ranges])).max()+1,-sp.log10(_pv[pp_idx])+1)))
            if arguments.title:
                ax1.set_title("Phenotype: %s, SNP: %d"%(fname,pp))
            ax1.set_xticks([])
            ax1.set_yticks(sp.arange(1,sp.ceil(sp.maximum(-sp.log10(bf_threshold)+1,sp.maximum((-sp.log10(_pv[ranges])).max()+1,-sp.log10(_pv[pp_idx])+1))),3))
            ax1.set_xlim(_pos[ranges[0]]-snp_distance*0.01,_pos[ranges[-1]]+snp_distance*0.01)
            ax1.yaxis.grid()
            if len(pathogenicity_map)>0:
                deleterious = pl.Line2D(range(1), range(1), marker='^', color="white",linestyle="None")
                benign = pl.Line2D(range(1), range(1), marker='v', color="white",linestyle="None")
                leg = pl.legend([benign,deleterious],['Benign Missense Mutation','Deleterious Missense Mutation'],frameon=True,scatterpoints=1,prop={'size':12},
                                fancybox=True,bbox_to_anchor=(1,1.2),numpoints=1,ncol=2)
                leg.get_frame().set_alpha(0.5)
                leg.get_frame().set_linewidth(0.2)
                leg.get_frame().set_facecolor("#DDDDDD")

            if sqlite==None:
                ax3 = pl.subplot2grid((5,5),(4,0),colspan=5)
                ax3.set_xlabel("Genomic positions on chromosome: " + str(chrom)) 
            else:
                ax3 = pl.subplot2grid((6,5),(4,0),colspan=5)
            ax3.set_ylim(0,0.6)
            ax3.set_yticks([0.25,0.5])
            ax3.yaxis.grid()
            ax3.set_ylabel("MAF")
            ax3.set_xticks([])
            remove_border(top=True)
            ax3.set_xlim(_pos[ranges[0]]-snp_distance*0.01,_pos[ranges[-1]]+snp_distance*0.01)
            for key in vline_map:
                ax3.axvline(key,color=vline_map[key],linestyle='--',alpha=0.8,linewidth=0.5)
            for i in xrange(ranges.shape[0]):
                ax3.plot(_pos[ranges[i]],maf_list[i],'x',color=rr_color_list[i])

            if sqlite!=None:
                ax2 = pl.subplot2grid((6,5),(5,0),colspan=5)
                remove_border(top=True)
                ax2.set_xlabel("Genomic positions on chromosome: " + str(chrom)) 
                ax2.set_yticks([])
                ax2.set_xlim(_pos[ranges[0]]-snp_distance*0.01,_pos[ranges[-1]]+snp_distance*0.01)
                for key in vline_map:
                    ax2.axvline(key,color=vline_map[key],linestyle='--',alpha=0.8,linewidth=0.5)
        
                sqlite_cursor.execute("SELECT * FROM geneannotation WHERE chromosome_id=? AND annotation_end >=? AND annotation_start<=?",(str(chrom),int(_pos[ranges[0]]),int(_pos[ranges[-1]])))
                annotations = sqlite_cursor.fetchall()
                for g,annotation in enumerate(annotations):
                    alpha = 0.6
                    start = int(annotation[3]) 
                    length = int(annotation[4]) - int(annotation[3])
                    head_width=0.25
                    head_length=length*0.15
                    width=0.1
                    name = annotation[7].replace("Contig20-","")

                    if annotation[5]=="+":
                        shape = "right"
                        arrow_params={'length_includes_head':True, 'shape':shape,'head_starts_at_zero':True}
                        if g%2==0:
                            y_pos = 0.6
                        else:
                            y_pos = 0.5
                        y_text = y_pos+2*width
                        ax2.arrow(start,y_pos,length,0,head_width=head_width,head_length=head_length,fc=color_list[0],ec=color_list[0],alpha=alpha,width=width,**arrow_params)
                    else:
                        shape = "left"
                        arrow_params={'length_includes_head':True, 'shape':shape,'head_starts_at_zero':True}
                        if g%2==0:
                            y_pos = 0.15
                        else:
                            y_pos = 0.35
                        y_text = y_pos-2*width
                        ax2.arrow(start+length,y_pos,-length,0,head_width=head_width,head_length=head_length,fc=color_list[3],ec=color_list[3],alpha=alpha,width=width,**arrow_params)
                    ax2.text(start+length/2.0, y_text,name, size=7, ha='center', va='center', color="k")

            pl.xticks(xlabels,xlabels)
            #pl.gca().get_xaxis().get_major_formatter().set_useOffset(False)
            if not sqlite==None:
                cax = fig.add_axes([0.93, 0.4, 0.01, 0.5])
                pl.colorbar(cpick,label="SNP r^2",cax=cax)
                pl.subplots_adjust(left=0.07,bottom=0.15,right=0.92,top=0.9,wspace=0,hspace=0)
            else:
                pl.subplots_adjust(left=0.07,bottom=0.15,right=0.99,top=0.9,wspace=0,hspace=0)
            if not arguments.title:
                pl.subplots_adjust(top=0.9)

            pl.savefig(os.path.join(arguments.out,'ld_plot_chrom_' + str(chrom) + "_pos_" + str(pp) + "_" + fname +'.' + arguments.iformat) )
            pl.close()
Ejemplo n.º 6
0
def ManhattanPlot(arguments,pv,positions,chromosomes,hashs,unique_pv,fname): 
    font_size = 14
    mpl.rcParams['font.family']="sans-serif"
    mpl.rcParams['font.sans-serif']="Arial"
    mpl.rcParams['font.size']=font_size
    #mpl.rcParams['figure.dpi'] = 300
    mpl.rcParams['font.weight']='medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 1
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'white'
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.color'] = 'LightGray'
    
    if arguments.ignore!=None:
        if arguments.ignore in fname:
            return

    chrom_list = sp.unique(chromosomes)
    unsnps = arguments.distinct

    pl.ion()
    pl.figure(figsize=(20,3))
    
    if arguments.nr_hypothesis==-1:
        if unsnps:
            bf_threshold = 0.05/unique_pv.shape[0]
        else:
            bf_threshold = 0.05/pv.shape[0]
    else:
        bf_threshold = 0.05/arguments.nr_hypothesis

    current_pos = 0

    max_y = (-sp.log10(pv[:])).max()
    if max_y<-sp.log10(bf_threshold):
        max_y = -sp.log10(bf_threshold)
    max_y += 1

    split_list = []
    xtick_list = []
    pl.axhline(-sp.log10(bf_threshold),color='#F68E55',linestyle='--')
    if not arguments.nr_hypothesis2==-1: 
        pl.axhline(-sp.log10(0.05/arguments.nr_hypothesis2),color='#00BFF3',linestyle='--')
        factor = sp.around(float(arguments.nr_hypothesis2)/float(arguments.nr_hypothesis))
        leg = pl.legend(['Bonferroni','Bonferroni ' + str(factor) + 'x'],ncol=2,fancybox=True,prop={'size':12},bbox_to_anchor=(1,1.05))
        leg.get_frame().set_alpha(0.8)
        leg.get_frame().set_linewidth(0.2)

    for i,chrom in enumerate(chrom_list):
        idx = chromosomes==chrom
        _pos = positions[idx]
        _chrs = chromosomes[idx]
        _pv = pv[idx]
        idx = sp.where(_pv>bf_threshold)[0]
        __pos = _pos[idx]
        __chrs = _chrs[idx]
        __pv = _pv[idx]
        __pv = -sp.log10(__pv)
        
        pl.plot(__pos+current_pos,__pv,'.',
                alpha=0.3,
                color="#A0A0A0",
                markersize=5,
                )
        
        idx = sp.where(_pv<=bf_threshold)[0]
        __pos = _pos[idx]
        __chrs = _chrs[idx]
        __pv = _pv[idx]
        __pv = -sp.log10(__pv)

        pl.plot(__pos+current_pos,__pv,'.',
                color="#b000ff",
                alpha=0.9,
                markersize=8,
                )

        xtick_list.append(float(current_pos) + float(_pos.max())/2.0)
        current_pos = _pos.max() + current_pos
        split_list.append(current_pos)
        pl.xlim(0,current_pos)
        pl.ylabel("$-log10(p-value)$")
    
    pl.ylim(0,max_y)
    
    for i in xrange(len(split_list)):
        if i%2==0:
            try:
                pl.fill_between([split_list[i],split_list[i+1]],0,max_y,color="#D7D7D7",linewidth=0,alpha=0.5)
            except:
                pass
    s_list = split_list[:-1]

    for split in s_list:
        pl.axvline(split,color='k',linestyle='--')
    
    pl.xticks(xtick_list,chrom_list)
    pl.subplots_adjust(left=0.05,bottom=0.09,right=0.99,top=0.9,wspace=0.45)
    remove_border()
    if unsnps:
        pv = unique_pv

    if arguments.title:
        if arguments.gc:
            gc = sp.median(stats.chi2.isf(pv,1))/0.456
            pl.title("Phenotype: %s, genomic control $\hat \lambda=%0.2f$"%(fname,gc))
        else:
            pl.title("Phenotype: %s"%(fname))
    pl.savefig(os.path.join(arguments.out,'manhattan_' + fname + '.' + arguments.iformat) )
    pl.close()