Beispiel #1
0
def plotScores(data, palette, pdf):
    """
    Runs LDA over a wide formated dataset

    :Arguments:
        :type data: pandas.DataFrame
        :param data: Scores of the LDA.

        :type palette: colorManager.object
        :param palette: Object from color manager

        :type pdf: pdf object
        :param pdf: PDF object to save all the generated figures.

    :Returns:
        :rtype scores_df: pandas.DataFrame
        :return scores_df: Scores of the LDA.
    """
    # Create a scatter plot for each combination of the scores
    for x, y in list(combinations(data.columns.tolist(), 2)):

        # Create a single-figure figure handler object
        fh = figureHandler(proj="2d", figsize=(14, 8))

        # Create a title for the figure
        title = "{0} vs {1}".format(x, y)

        # Plot the scatterplot based on data
        scatter.scatter2D(x=list(data[x]),
                          y=list(data[y]),
                          colorList=palette.design.colors.tolist(),
                          ax=fh.ax[0])

        # Create legend
        fh.makeLegend(ax=fh.ax[0],
                      ucGroups=palette.ugColors,
                      group=palette.combName)

        # Shrink axis to fit legend
        fh.shrink()

        # Despine axis
        fh.despine(fh.ax[0])

        # Formatting axis
        fh.formatAxis(figTitle=title,
                      xTitle="Scores on {0}".format(x),
                      yTitle="Scores on {0}".format(y),
                      grid=False)

        # Adding figure to pdf
        fh.addToPdf(dpi=600, pdfPages=pdf)
def plotScores(data, palette, pdf):
    """
    This function creates a PDF file with 3 scatter plots for the combinations 
    of the 3 principal components. PC1 vs PC2, PC1 vs PC3, PC2 vs PC3.

    :Arguments:
        :type data: pandas.core.frame.DataFrame
        :param data: Data frame with the data to plot.
        
        :type outpath: string
        :param outpath: Path for the output file

        :type group: string
        :param group: Name of the column that contains the group information on the design file.

    :Return:
        :rtype PDF: file
        :retrn PDF: file with the 3 scatter plots for PC1 vs PC2, PC1 vs PC3, PC2  vs PC3.
    """
    for x, y in list(itertools.combinations(data.columns.tolist(), 2)):
        # Creating a figure handler object
        fh = figureHandler(proj="2d", figsize=(14, 8))

        # Creating title for the figure
        title = "{0} vs {1}".format(x, y)

        # Creating the scatterplot 2D
        scatter.scatter2D(ax=fh.ax[0],
                          x=list(data[x]),
                          y=list(data[y]),
                          colorList=palette.design.colors.tolist())

        # Despine axis
        fh.despine(fh.ax[0])

        # Print Legend
        fh.makeLegend(ax=fh.ax[0],
                      ucGroups=palette.ugColors,
                      group=palette.combName)

        # Shinking the plot so everything fits
        fh.shrink()

        # Format Axis
        fh.formatAxis(figTitle=title,
                      xTitle="Scores on {0}".format(x),
                      yTitle="Scores on {0}".format(y),
                      grid=False)

        # Adding figure to pdf
        fh.addToPdf(dpi=90, pdfPages=pdf)
def main(args):
    # Loading design
    if args.design:
        design = pd.DataFrame.from_csv(args.design, sep="\t")
        design.reset_index(inplace=True)
    else:
        design = False

    # Loading wide file
    wide = pd.DataFrame.from_csv(args.input, sep="\t")

    # Create figureHandler object
    fh = figureHandler(proj="2d", figsize=(14, 8))

    # If design file with group and the uniqID is "sampleID" then color by group
    if args.group and args.uniqID == "sampleID":
        glist = list(design[args.group])
        colorList, ucGroups = palette.getColorsByGroup(design=design,
                                                       group=args.group,
                                                       uGroup=sorted(
                                                           set(glist)))
    else:
        glist = list()
        colorList = palette.mpl_colors[0]
        ucGroups = dict()

    # Plote scatterplot 2D
    scatter.scatter2D(ax=fh.ax[0],
                      x=list(wide[args.x]),
                      y=list(wide[args.y]),
                      colorList=colorList)

    # Despine axis (spine = tick)
    fh.despine(fh.ax[0])

    # Formating axis
    fh.formatAxis(figTitle=args.x + " vs " + args.y,
                  xTitle=args.x,
                  yTitle=args.y,
                  grid=False)

    # If groups are provided create a legend
    if args.group and args.uniqID == "sampleID":
        fh.makeLegend(ax=fh.ax[0], ucGroups=ucGroups, group=args.group)
        fh.shrink()

    # Saving figure to file
    with PdfPages(args.figure) as pdfOut:
        fh.addToPdf(dpi=600, pdfPages=pdfOut)
    logger.info("Script Complete!")
Beispiel #4
0
def plotScatterplot2D(data, palette, pdf, nloads=3):
    """
    Plots Scatterplots 2D for a number of loadngs for PCA.

    :Arguments:
        :type data: pandas.DataFrame
        :param data: Loadings of the PCA.

        :type pdf: pdf object
        :param pdf: PDF object to save all the generated figures.

        :type nloads: int
        :param nloads: Number of principal components to create pairwise combs.
    """

    # Selecting amount of pairwise combinations to plot scaterplots for loads.
    for x, y in list(combinations(data.columns.tolist()[:nloads], 2)):

        # Create a single-figure figure handler object
        fh = figureHandler(proj="2d", figsize=(14, 8))

        # Create a title for the figure
        title = "{0} vs {1}".format(x, y)

        # Plot the scatterplot based on data
        scatter.scatter2D(x=list(data[x]),
                          y=list(data[y]),
                          colorList=palette.design.colors.tolist(),
                          ax=fh.ax[0])

        # Create legend
        fh.makeLegend(ax=fh.ax[0],
                      ucGroups=palette.ugColors,
                      group=palette.combName)

        # Shrink axis to fit legend
        fh.shrink()

        # Despine axis
        fh.despine(fh.ax[0])

        # Formatting axis
        fh.formatAxis(figTitle=title,
                      xTitle="Scores on {0}".format(x),
                      yTitle="Scores on {0}".format(y),
                      grid=False)

        # Adding figure to pdf
        fh.addToPdf(dpi=600, pdfPages=pdf)
def makeScatter(x, y, ax, fh):
    """ Plot a scatter plot of x vs y.

    :Arguments:
        :type x: pandas.Series
        :param x: Series of first sample, treated as independent variable.

        :type y: pandas.Series
        :param y: Series of second sample, treated as dependent variables.

        :type ax: matplotlib.axis
        :param ax: Axis which to plot.

        :type fh: figureHandler
        :param fh: figure to draw BA plots onto.

    :Returns:
        :rtype: matplotlib.axis
        :returns: A matplotlib axis with a scatter plot.

    """
    #logger.info('{0}, {1}'.format(x.name, y.name))
    # Get Upper and Lower CI from regression
    lower, upper, fitted, resid, infl = runRegression(x, y)

    # Plot scatter
    scatter.scatter2D(x=x, y=y, ax=ax, colorList=list("b"))
    # Plot regression lines
    # If there are missing data, x and the result vectors won't have the same
    # dimensions. First filter x by the index of the fitted values then plot.
    x2 = x.loc[fitted.index]
    lines.drawCutoff(x=x2, y=lower, ax=ax)
    lines.drawCutoff(x=x2, y=fitted, ax=ax)
    lines.drawCutoff(x=x2, y=upper, ax=ax)
    # Adjust plot
    fh.formatAxis(axnum=0,
                  xTitle=x.name,
                  yTitle=y.name,
                  axTitle='Scatter plot',
                  grid=False)
def makeBA(x, y, ax, fh):
    """ Function to make BA Plot comparing x vs y.

    :Arguments:
        :type x: pandas.Series
        :param x: Series of first sample, treated as independent variable.

        :type y: pandas.Series
        :param y: Series of second sample, treated as dependent variables.

        :type ax: matplotlib.axis
        :param ax: Axis which to plot.

        :type fh: figureHandler
        :param fh: figure to draw BA plots onto.

    :Returns:
        :rtype: pandas.Series
        :returns: A Series containing Boolean values with True
              indicating a value is more extreme than CI and False indicating a
              value falls inside CI.

    """
    # Make BA plot
    x = x.apply(float)
    y = y.apply(float)

    diff = x - y
    mean = (x + y) / 2

    # Drop missing for current comparison
    diff.dropna(inplace=True)
    mean.dropna(inplace=True)

    # Get Upper and Lower CI from regression
    lower, upper, fitted, resid, infl = runRegression(mean, diff)
    mask1 = abs(resid['resid_pearson']) > cutoff
    mask2 = infl['cooks_pval'] <= 0.5
    mask3 = infl['dffits']
    mask = mask1 | mask2 | mask3

    # Create BA plot
    scatter.scatter2D(ax=ax, x=mean[~mask], y=diff[~mask], colorList='b')
    scatter.scatter2D(ax=ax, x=mean[mask], y=diff[mask], colorList='r')

    # Plot regression lines
    ax.plot(mean, lower, 'r:')
    ax.plot(mean, fitted, 'r')
    ax.axhline(0, color='k')
    ax.plot(mean, upper, 'r:')

    #Adjust axes
    fh.formatAxis(axnum=1,
                  xlim='ignore',
                  ylim='ignore',
                  axTitle='Bland-Altman Plot',
                  xTitle='Mean\n{0} & {1}'.format(x.name, y.name),
                  yTitle='Difference\n{0} - {1}'.format(x.name, y.name),
                  grid=False)

    return mask, mask1, mask2, mask3
Beispiel #7
0
def qqPlot(tresid, tfit, oname):
    """ 
    Plot the residual diagnostic plots by sample.

    Output q-q plot, boxplots and distributions of the residuals. These plots
    will be used diagnose if residuals are approximately normal.

    :Arguments:
        :type tresid: pandas.Series
        :param tresid: Pearson normalized residuals. (transposed)
                        (residuals / sqrt(MSE))

        :type tfit: pandas DataFrame
        :param tfit: output of the ANOVA (transposed)

        :type oname: string
        :param oname: Name of the output file in pdf format.

    :Returns:
        :rtype: PDF
        :returns: Outputs a pdf file containing all plots.

    """
    #Open pdf
    with PdfPages(oname) as pdf:

        # Stablishing axisLayout
        axisLayout = [(0,0,1,1),(0,1,1,1),(0,2,1,1),(1,0,3,1)]

        # Start plotting
        for col in tresid.columns:
            #Creating figure
            fig = figureHandler(proj='2d',numAx=4,numRow=2,numCol=3,
                                arrangement=axisLayout)


            data = tresid[col].values.ravel()
            noColors = list()
            for j in range(0,len(data)):
                noColors.append('b')#blue
            df_data = pd.DataFrame(data)

            # Plot qqplot on axis 0
            sm.graphics.qqplot(tresid[col],fit=True,line='r',ax=fig.ax[0])

            # Plot boxplot on axis 1
            box.boxSeries(ser=data,ax=fig.ax[1])

            # Plot histogram on axis 2
            hist.quickHist(ax=fig.ax[2],dat=df_data,orientation='horizontal')

            # Plot scatterplot on axis 3
            scatter.scatter2D(ax=fig.ax[3],x=tfit[col], y=tresid[col],
                                colorList=list('b'))

            # Draw cutoff line for scatterplot on axis 3
            lines.drawCutoffHoriz(ax=fig.ax[3],y=0)

            # Format axis 0
            fig.formatAxis(figTitle=col,axnum=0,grid=False,showX=True,
                yTitle="Sample Quantiles", xTitle=" ")

            # Format axis 1
            fig.formatAxis(axnum=1,axTitle="Standardized Residuals",
                grid=False,showX=False,showY=True, xTitle=" ")

            # Format axis 2
            fig.formatAxis(axnum=2,grid=False,showX=True,showY=True,
                axTitle=" ",xTitle=" ")

            # Format axis 3
            fig.formatAxis(axnum=3,axTitle="Fitted Values vs Residual Values",
                xTitle="Fitted Values",yTitle="Residual Values",
                grid=False)

            #Add figure to pdf
            fig.addToPdf(pdfPages=pdf)
def plotSignificantROR(data, pdf, palette):
    """
    Plot a scatter plot of x vs y. 

    :Arguments:

        :type row:
        :param row:

        :type pdf: PdfPages
        :param pdf: pdf object to store scatterplots

        :type des: pandas DataFrame
        :param des: design file

        :type groupName: string
        :param groupName: name of group
    """
    # Iterates over all rows in the dataframe
    # Make scatter plot if p-pvalue is less than 0.05
    for index, row in data.iterrows():
        if row["pval"] > 0.05: continue
        #plotSignificantROR(row,pdf,dat.design,args.group)

        # Get 95% CI
        prstd, lower, upper = wls_prediction_std(row["res"])

        # Sort CIs for Plotting
        toPlot = pd.DataFrame({"x": row["x"], "lower": lower, "upper": upper})
        toPlot.sort_values(by="x", inplace=True)

        # Create plot
        fh = figureHandler(proj="2d", figsize=(14, 8))

        #Plot scatterplot
        scatter.scatter2D(ax=fh.ax[0],
                          x=row["x"],
                          y=row["y"],
                          colorList=palette.list_colors)

        # Plot cutoffs
        lines.drawCutoff(ax=fh.ax[0], x=row["x"], y=row["fitted"], c="c")
        lines.drawCutoff(ax=fh.ax[0], x=toPlot["x"], y=toPlot["lower"], c="r")
        lines.drawCutoff(ax=fh.ax[0], x=toPlot["x"], y=toPlot["upper"], c="r")

        # Formatting
        ymin, ymax = fh.ax[0].get_ylim()
        fh.formatAxis(xTitle="Run Order", yTitle="Value", ylim=(ymin,ymax*1.2),
        figTitle=u"{} Scatter plot (fitted regression line and prediction bands"\
        " included)".format(row["name"]))

        # Shrink figure
        fh.shrink()

        # Add legend to figure
        fh.makeLegend(ax=fh.ax[0],
                      ucGroups=palette.ugColors,
                      group=palette.combName)

        #Add text to the ax
        fh.ax[0].text(.7, .85, u"Slope= {0:.4f}\n(p-value = {1:.4f})\n"\
            "$R^2$ = {2:4f}".format(round(row["slope"],4), round(row["pval"],4),
            round(row["rsq"],4)),transform=fh.ax[0].transAxes, fontsize=12)

        # Save to PDF
        fh.addToPdf(pdf)
Beispiel #9
0
def volcano(combo, results, oname, cutoff=4):
    """ 
    Plot volcano plots.

    Creates volcano plots to compare means, for all pairwise differences.

    :Arguments:

        :type combo: dictionary
        :param combo: A dictionary of dictionaries with all possible pairwise
            combinations. Used this to create the various column headers in the
            results table.

        :type results: pandas DataFrame
        :param results: TODO

        :type oname: string
        :param oname: Name of the output file in pdf format.
       
        :type cutoff: int
        :param cutoff: The cutoff value for significance.

    :Returns:
        :rtype: PD
        :returns: Outputs a pdf file containing all plots.

    """
    # Getting data for lpvals
    lpvals = {col.split("_")[-1]:results[col] for col in results.columns.tolist() \
            if col.startswith("-log10_p-value_")}

    # Gettign data for diffs
    difs   = {col.split("_")[-1]:results[col] for col in results.columns.tolist() \
            if col.startswith("diff_of")}

    # Making plots
    with PdfPages(oname) as pdf:
        for key in sorted(difs.keys()):
            # Set Up Figure
            volcanoPlot = figureHandler(proj="2d")

            # Plot all results
            scatter.scatter2D(x=list(difs[key]), y=list(lpvals[key]), 
                                colorList=list('b'), ax=volcanoPlot.ax[0])

            # Color results beyond treshold red
            cutLpvals = lpvals[key][lpvals[key]>cutoff]
            if not cutLpvals.empty:
                cutDiff = difs[key][cutLpvals.index]
                scatter.scatter2D(x=list(cutDiff), y=list(cutLpvals), 
                                colorList=list('r'), ax=volcanoPlot.ax[0])

            # Drawing cutoffs
            lines.drawCutoffHoriz(y=cutoff, ax=volcanoPlot.ax[0])

            # Format axis (volcanoPlot)
            volcanoPlot.formatAxis(axTitle=key, grid=False,
                yTitle="-log10(p-value) for Diff of treatment = {0}".format(key),
                xTitle="Diff of treatment = {0}".format(key))

            # Add figure to PDF
            volcanoPlot.addToPdf(pdfPages=pdf)
Beispiel #10
0
def makePlots (SEDData, design, pdf, groupName, cutoff, p, plotType, ugColors, levels):
    """
    Manage all the plots for this script

    :Arguments:
        :type SEDData: pandas.dataFrame
        :param SEDData: Contains SED data either to Mean or pairwise

        :type design: pandas.dataFrame
        :param design: Design file after getColor

        :type pdf: PDF object
        :param pdf: PDF for output plots

        :type groupName: string
        :param groupName: Name of the group (figure title).

        :type cutoff: pandas.dataFrame
        :param cutoff: Cutoff values, beta, chi-sqr and normal.

        :type p: float
        :param p: Percentil for cutoff.

        :type plotType: string
        :param plotType: Type of plot, the possible types are scatterplot to mean
            scatterplot pairwise and boxplot pairwise.

    """

    #Geting number of features in dataframe
    nFeatures = len(SEDData.index)

    #Calculates the widht for the figure base on the number of features
    figWidth = max(nFeatures/2, 16)

    # Create figure object with a single axis and initiate the figss
    figure = figureHandler(proj='2d', figsize=(figWidth, 8))

    # Keeping the order on the colors
    SEDData["colors"]=design["colors"]

    # Choose type of plot
    # Plot scatterplot to mean
    if(plotType=="scatterToMean"):
        #Adds Figure title, x axis limits and set the xticks
        figure.formatAxis(figTitle="Standardized Euclidean Distance from samples {} to the mean".
                        format(groupName),xlim=(-0.5,-0.5+nFeatures),ylim="ignore",
                        xticks=SEDData.index.values,xTitle="Index",
                        yTitle="Standardized Euclidean Distance")

        #Plot scatterplot quickplot
        scatter.scatter2D(ax=figure.ax[0],colorList=SEDData["colors"],
                        x=range(len(SEDData.index)), y=SEDData["SED_to_Mean"])


    #Plot scatterplot pairwise
    elif(plotType=="scatterPairwise"):
        # Adds Figure title, x axis limits and set the xticks
        figure.formatAxis(figTitle="Pairwise standardized Euclidean Distance from samples {}".
                        format(groupName),xlim=(-0.5,-0.5+nFeatures),ylim="ignore",
                        xticks=SEDData.index.values,xTitle="Index",
                        yTitle="Standardized Euclidean Distance")

        # Plot scatterplot
        for index in SEDData.index.values:
            scatter.scatter2D(ax=figure.ax[0],colorList=design["colors"][index],
                            x=range(len(SEDData.index)), y=SEDData[index])

    #Plot boxplot pairwise
    elif(plotType=="boxplotPairwise"):
        # Add Figure title, x axis limits and set the xticks
        figure.formatAxis(figTitle="Box-plots for pairwise standardized Euclidean Distance from samples {}".
                        format(groupName),xlim=(-0.5,-0.5+nFeatures),ylim="ignore",
                        xticks=SEDData.index.values,xTitle="Index",
                        yTitle="Standardized Euclidean Distance")
        # Plot Box plot
        box.boxDF(ax=figure.ax[0], colors=SEDData["colors"].values, dat=SEDData)

    #Add a cutoof line
    cutoff.apply(lambda x: plotCutoffs(x,ax=figure.ax[0],p=p),axis=0)
    figure.shrink()
    # Plot legend
    #if group:
    figure.makeLegend(figure.ax[0], ugColors, levels)

    # Add figure to PDF and close the figure afterwards
    figure.addToPdf(pdf)