def figure3d(globalOrLocal, beginChr, endChr, organism): """ Emulates the histogram shown for Figure 3d in Martin et. al paper. Displays the distribution of strand asymmetric regions found for the C->G substitution. This data has not been normalized. Prints out the figure number to the console. globalOrLocal: Determines the distance version used (mt_alpha or mt_alpha,gamma) for these calculations. beginChr: First chromosome used (1 by default from main method). endChr: Last chromosome used (usually last chromosome in organism). organism: Organism rates being graphed/analyzed. """ print "figure3d" histogramPoints = [] for i in range(beginChr, int(endChr)+1): BICResults = printAndParseFiles.parseBICfile("Calculations_" + organism + "/BICResults%s/BICResults%d.txt" % (globalOrLocal,i)) QgammaAsym = printAndParseFiles.parseQgammaFile("Calculations_" + organism + "/Qgamma/QgammaAsymmetric%d.txt" % i) for geneNumber, BICresult in BICResults.items(): if (BICresult == "Asymmetric"): histogramPoints.append(QgammaAsym[geneNumber][1,2]-QgammaAsym[geneNumber][2,1]) n, bins, patches = plt.hist(histogramPoints, bins=30, facecolor="#000000") plt.title("Figure 3d") plt.xlabel("$[C \longrightarrow G]_{strand 1} - [C \longrightarrow G]_{strand 2}$\n rel. difference") plt.ylabel("Distribution") plt.show()
def figure2b(globalOrLocal, beginChr, endChr, organism): """ Emulates the graph shown for Figure 2b in Martin et. al paper. Displays the correlation between strand asymmetry rates for G->T and C->T. Differenciates between gene regions that were predicted to be asymmetric versus symmetric gene regions. This data has not been normalized. Prints out the figure number and the Pearson correlation coefficient and p-value for non-correlation to the console. globalOrLocal: Determines the distance version used (mt_alpha or mt_alpha,gamma) for these calculations. beginChr: First chromosome analyzed (1 by default from main method). endChr: Last chromosome analyzed (usually last chromosome in organism). organism: Organism rates being graphed/analyzed. """ print "figure2b" blackPlotPointsX = [] blackPlotPointsY = [] greyPlotPointsX = [] greyPlotPointsY = [] for i in range(beginChr, int(endChr) + 1): BICResults = printAndParseFiles.parseBICfile( "Calculations_" + organism + "/BICResults%s/BICResults%d.txt" % (globalOrLocal, i) ) QgammaAsym = printAndParseFiles.parseQgammaFile( "Calculations_" + organism + "/Qgamma/QgammaAsymmetric%d.txt" % i ) for geneNumber, BICresult in BICResults.items(): if BICresult == "Asymmetric": blackPlotPointsX.append(QgammaAsym[geneNumber][2, 3] - QgammaAsym[geneNumber][1, 0]) blackPlotPointsY.append(QgammaAsym[geneNumber][1, 3] - QgammaAsym[geneNumber][2, 0]) else: greyPlotPointsX.append(QgammaAsym[geneNumber][2, 3] - QgammaAsym[geneNumber][1, 0]) greyPlotPointsY.append(QgammaAsym[geneNumber][1, 3] - QgammaAsym[geneNumber][2, 0]) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.scatter(greyPlotPointsX, greyPlotPointsY, c="#b0b0b0", edgecolor="none") ax1.scatter(blackPlotPointsX, blackPlotPointsY, c="#000000", edgecolor="none") ax1.set_xlim([-0.15, 0.15]) ax1.set_ylim([-0.5, 0.5]) ax1.set_xticks([0.0], minor=True) ax1.set_yticks([0.0], minor=True) ax1.yaxis.grid(True, which="minor") ax1.xaxis.grid(True, which="minor") ax1.annotate( "r =" + str( round( scipy.stats.pearsonr((blackPlotPointsX + greyPlotPointsX), (blackPlotPointsY + greyPlotPointsY))[0], 2 ) ), (0.07, -0.35), fontsize=20, ) plt.xlabel("$[G \longrightarrow T]_{strand 1} - [G \longrightarrow T]_{strand 2}$\n rel. difference") plt.ylabel("$[C \longrightarrow T]_{strand 1} - [C \longrightarrow T]_{strand 2}$\n rel. difference") plt.title("Figure 2b") plt.show() print scipy.stats.pearsonr(blackPlotPointsX + greyPlotPointsX, blackPlotPointsY + greyPlotPointsY)