def RunGammaTest(chr, beginSeqName, globalOrLocalMts, organism):
    """
    Runs all Gamma test calculations.
    chr: Current chromosome number.
    beginSeqName: Beginning of sequence names for query.
    globalOrLocalMts: String identifying whether to use global or local Mt values.
    organism: Organism's name.
    """
    print ("GAMMA")
    # to find P gamma
    familyListAsym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\RSeqFamilyMats\RSeqFamilyMatsAsymmetric%d.txt" % (chr), beginSeqName)#change this to Rag, should be the final area of filtering
    familyListSym = printAndParseFiles.parsePalphaGamma("Calculations_" + organism + "\RSeqFamilyMats\RSeqFamilyMatsSymmetric%d.txt" % (chr), beginSeqName)
    QgammaAsym = printAndParseFiles.parseQgammaFile("Calculations_" + organism + "\Qgamma\QgammaAsymmetric%d.txt" % (chr))
    QgammaSym = printAndParseFiles.parseQgammaFile("Calculations_" + organism + "\Qgamma\QgammaSymmetric%d.txt" % (chr))
    mtDict = {}
    if (globalOrLocalMts == "Global"):
        mtDict = printAndParseFiles.parseValueFile("Calculations_" + organism + "\MTFamilyVals.txt")
        PalphaGammaHatAsym = FindCandP.calculatePalphaGammaHatGlobal(QgammaAsym, familyListAsym, mtDict)
        PalphaGammaHatSym = FindCandP.calculatePalphaGammaHatGlobal(QgammaSym, familyListSym, mtDict)
        printAndParseFiles.printDictionaryMatrixToFile(PalphaGammaHatAsym, "Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalMts,chr))
        printAndParseFiles.printDictionaryMatrixToFile(PalphaGammaHatSym, "Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalMts,chr))

        CalphaGamma = printAndParseFiles.parseCalphaGammaFile("Calculations_" + organism + "\CSeqFamilyMats\CSeqFamilyMats%d.txt" % (chr), beginSeqName)
        
        # calculate this section's L gamma value
        LgammaAsym = FindLg.findLgamma(CalphaGamma, PalphaGammaHatAsym)
        LgammaSym = FindLg.findLgamma(CalphaGamma, PalphaGammaHatSym)
        printAndParseFiles.printValuesToFile(LgammaAsym, "Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalMts,chr))
        printAndParseFiles.printValuesToFile(LgammaSym, "Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalMts,chr))
        
    elif (globalOrLocalMts == "Local"):
        mtDictAsym = printAndParseFiles.parseMtalphaGammaFile("Calculations_" + organism + "\MtSeqFamilyMats\MtSeqFamilyMatsAsymmetric%d.txt" % chr, beginSeqName) # create dictionary values for asymmetric and symmetric, perform gamma calculations using them
        PalphaGammaHatAsym = FindCandP.calculatePalphaGammaHatLocal(QgammaAsym, familyListAsym, mtDictAsym)
        mtDictSym = printAndParseFiles.parseMtalphaGammaFile("Calculations_" + organism + "\MtSeqFamilyMats\MtSeqFamilyMatsSymmetric%d.txt" % chr, beginSeqName)
        PalphaGammaHatSym = FindCandP.calculatePalphaGammaHatLocal(QgammaSym, familyListSym, mtDictSym)
        printAndParseFiles.printDictionaryMatrixToFile(PalphaGammaHatAsym, "Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatAsymmetric%d.txt" % (globalOrLocalMts,chr))
        printAndParseFiles.printDictionaryMatrixToFile(PalphaGammaHatSym, "Calculations_" + organism + "\PalphaGammaHat%s\PalphaGammaHatSymmetric%d.txt" % (globalOrLocalMts,chr))

        CalphaGamma = printAndParseFiles.parseCalphaGammaFile("Calculations_" + organism + "\CSeqFamilyMats\CSeqFamilyMats%d.txt" % (chr), beginSeqName)
        
        # calculate this section's L gamma value
        LgammaAsym = FindLg.findLgamma(CalphaGamma, PalphaGammaHatAsym)
        LgammaSym = FindLg.findLgamma(CalphaGamma, PalphaGammaHatSym)
        printAndParseFiles.printValuesToFile(LgammaAsym, "Calculations_" + organism + "\Lgamma%s\LgammaAsymmetric%d.txt" % (globalOrLocalMts,chr))
        printAndParseFiles.printValuesToFile(LgammaSym, "Calculations_" + organism + "\Lgamma%s\LgammaSymmetric%d.txt" % (globalOrLocalMts,chr))

    else:
        print "Error with Global or Local choice. Please type 'Global' or 'Local'"

    print ("DONE WITH CHR %d" % chr)
def figure3d(globalOrLocal, beginChr, endChr, organism):
    """
    Emulates the histogram shown for Figure 3d in Martin et. al paper.
    Displays the distribution of strand asymmetric regions found  
    for the C->G substitution. This data has not been normalized. 
    Prints out the figure number to the console.
    globalOrLocal: Determines the distance version used (mt_alpha or 
                   mt_alpha,gamma) for these calculations.
    beginChr: First chromosome used (1 by default from main method).
    endChr: Last chromosome used (usually last chromosome in organism).
    organism: Organism rates being graphed/analyzed.
    """
    print "figure3d"
    histogramPoints = []
    for i in range(beginChr, int(endChr)+1):
        BICResults = printAndParseFiles.parseBICfile("Calculations_" + organism + "/BICResults%s/BICResults%d.txt" % (globalOrLocal,i))
        QgammaAsym = printAndParseFiles.parseQgammaFile("Calculations_" + organism + "/Qgamma/QgammaAsymmetric%d.txt" % i)
        for geneNumber, BICresult in BICResults.items():
            if (BICresult == "Asymmetric"):
                histogramPoints.append(QgammaAsym[geneNumber][1,2]-QgammaAsym[geneNumber][2,1])
    n, bins, patches = plt.hist(histogramPoints, bins=30, facecolor="#000000")
    plt.title("Figure 3d")
    plt.xlabel("$[C \longrightarrow G]_{strand 1} - [C \longrightarrow G]_{strand 2}$\n rel. difference")
    plt.ylabel("Distribution")
    plt.show()
def figure2b(globalOrLocal, beginChr, endChr, organism):
    """
    Emulates the graph shown for Figure 2b in Martin et. al paper.
    Displays the correlation between strand asymmetry rates for 
    G->T and C->T. Differenciates between gene regions that were
    predicted to be asymmetric versus symmetric gene regions.
    This data has not been normalized. Prints out the figure number
    and the Pearson correlation coefficient and p-value for 
    non-correlation to the console.
    globalOrLocal: Determines the distance version used (mt_alpha or 
                   mt_alpha,gamma) for these calculations.
    beginChr: First chromosome analyzed (1 by default from main method).
    endChr: Last chromosome analyzed (usually last chromosome in organism).
    organism: Organism rates being graphed/analyzed.
    """
    print "figure2b"
    blackPlotPointsX = []
    blackPlotPointsY = []
    greyPlotPointsX = []
    greyPlotPointsY = []
    for i in range(beginChr, int(endChr) + 1):
        BICResults = printAndParseFiles.parseBICfile(
            "Calculations_" + organism + "/BICResults%s/BICResults%d.txt" % (globalOrLocal, i)
        )
        QgammaAsym = printAndParseFiles.parseQgammaFile(
            "Calculations_" + organism + "/Qgamma/QgammaAsymmetric%d.txt" % i
        )
        for geneNumber, BICresult in BICResults.items():
            if BICresult == "Asymmetric":
                blackPlotPointsX.append(QgammaAsym[geneNumber][2, 3] - QgammaAsym[geneNumber][1, 0])
                blackPlotPointsY.append(QgammaAsym[geneNumber][1, 3] - QgammaAsym[geneNumber][2, 0])
            else:
                greyPlotPointsX.append(QgammaAsym[geneNumber][2, 3] - QgammaAsym[geneNumber][1, 0])
                greyPlotPointsY.append(QgammaAsym[geneNumber][1, 3] - QgammaAsym[geneNumber][2, 0])

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax1.scatter(greyPlotPointsX, greyPlotPointsY, c="#b0b0b0", edgecolor="none")
    ax1.scatter(blackPlotPointsX, blackPlotPointsY, c="#000000", edgecolor="none")
    ax1.set_xlim([-0.15, 0.15])
    ax1.set_ylim([-0.5, 0.5])
    ax1.set_xticks([0.0], minor=True)
    ax1.set_yticks([0.0], minor=True)
    ax1.yaxis.grid(True, which="minor")
    ax1.xaxis.grid(True, which="minor")
    ax1.annotate(
        "r ="
        + str(
            round(
                scipy.stats.pearsonr((blackPlotPointsX + greyPlotPointsX), (blackPlotPointsY + greyPlotPointsY))[0], 2
            )
        ),
        (0.07, -0.35),
        fontsize=20,
    )
    plt.xlabel("$[G \longrightarrow T]_{strand 1} - [G \longrightarrow T]_{strand 2}$\n rel. difference")
    plt.ylabel("$[C \longrightarrow T]_{strand 1} - [C \longrightarrow T]_{strand 2}$\n rel. difference")
    plt.title("Figure 2b")
    plt.show()
    print scipy.stats.pearsonr(blackPlotPointsX + greyPlotPointsX, blackPlotPointsY + greyPlotPointsY)
def getGlobalQGamma(fullFilePath):
    """
    Retrieves the global families and their corresponding values
    to calculate the global Q_gamma value. Returns the Q_gamma
    matrix. (Currently unused.)
    """
    QfamilyMats = printAndParseFiles.parseQgammaFile(fullFilePath)  # "Calculations/QFamilyMats.txt"
    globalQgamma = numpy.matrix(
        [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
    )
    for family, familyMat in QfamilyMats.items():
        globalQgamma += familyMat
    return numpy.divide(globalQgamma, len(QfamilyMats.items()))
def figure4b(geneTranscriptionDirections, beginChr, endChr):
    """
    Emulates the graph shown for Figure 4b in Martin et. al paper.
    Displays the correlation between strand asymmetry rates for 
    G->T and C->T. Differenciates plot points by identifying 
    transcription directions for each gene region. This data has 
    not been normalized. Prints out the figure number and the 
    Pearson correlation coefficient and p-value for 
    non-correlation to the console.
    globalOrLocal: Determines the distance version used (mt_alpha or 
                   mt_alpha,gamma) for these calculations.
    beginChr: First chromosome analyzed (1 by default from main method).
    endChr: Last chromosome analyzed (usually last chromosome in organism).
    organism: Organism rates being graphed/analyzed.
    """
    print "figure4b"
    blackPlotPointsX = []
    blackPlotPointsY = []
    greyPlotPointsX = []
    greyPlotPointsY = []
    QgammaAsym = {}
    for i in range(beginChr, endChr+1):    
        QgammaAsym.update(printAndParseFiles.parseQgammaFile("Calculations/Qgamma/QgammaAsymmetric%d.txt" % i))

    for geneNumber, transcriptionDirection in geneTranscriptionDirections.items():
        if (transcriptionDirection == "+"):
            blackPlotPointsX.append(QgammaAsym[geneNumber][2,3]-QgammaAsym[geneNumber][1,0])
            blackPlotPointsY.append(QgammaAsym[geneNumber][1,3]-QgammaAsym[geneNumber][2,0]) 
        else:
            greyPlotPointsX.append(QgammaAsym[geneNumber][2,3]-QgammaAsym[geneNumber][1,0])
            greyPlotPointsY.append(QgammaAsym[geneNumber][1,3]-QgammaAsym[geneNumber][2,0])

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax1.scatter(greyPlotPointsX, greyPlotPointsY,c='#b0b0b0',marker="s")
    ax1.scatter(blackPlotPointsX, blackPlotPointsY,c='#000000',marker="+")
    ax1.set_xlim([-0.15,0.15])
    ax1.set_ylim([-0.5,0.5])
    ax1.set_xticks([0.0], minor=True)
    ax1.set_yticks([0.0], minor=True)
    ax1.yaxis.grid(True, which='minor')
    ax1.xaxis.grid(True, which='minor')
    ax1.annotate("r =" + str(round(scipy.stats.pearsonr((blackPlotPointsX + greyPlotPointsX), (blackPlotPointsY + greyPlotPointsY))[0],2)), (0.07, -0.35), fontsize=20 )
    plt.xlabel("$[G \longrightarrow T]_{strand 1} - [G \longrightarrow T]_{strand 2}$\n rel. difference")
    plt.ylabel("$[C \longrightarrow T]_{strand 1} - [C \longrightarrow T]_{strand 2}$\n rel. difference")
    plt.title("Figure 4b")
    plt.show()
    print scipy.stats.pearsonr(blackPlotPointsX + greyPlotPointsX, blackPlotPointsY + greyPlotPointsY)