def RunAlphaGammaTest(chr, organism): """ Runs all Alpha, Gamma test calculations. chr: Current chromosome number. organism: Organism's name. """ print ("ALPHA GAMMA") # to find C alpha, gamma and P alpha, gamma seqNames = printAndParseFiles.findSeqNames("geneLocations" + organism + "\GeneLocations%d.txt" % (chr)) seqCagDictAsym = FindCandP.combineSeqMats("SubMatinGenes" + organism[0].upper() + "\SubMatinGenes%d.txt" % (chr), seqNames) seqCagDictSym = copy.deepcopy(seqCagDictAsym) printAndParseFiles.printDictionaryListToFile(seqCagDictAsym, "Calculations_" + organism + "\CSeqFamilyMats\CSeqFamilyMats%d.txt" % (chr)) seqPagDictAsym = FindCandP.findPalphaGammaAsymmetric(seqCagDictAsym) seqPagDictSym = FindCandP.findPalphaGammaSymmetric(seqCagDictSym) printAndParseFiles.printDictionaryListToFile(seqPagDictAsym, "Calculations_" + organism + "\PSeqFamilyMats\PSeqFamilyMatsAsymmetric%d.txt" % (chr)) printAndParseFiles.printDictionaryListToFile(seqPagDictSym, "Calculations_" + organism + "\PSeqFamilyMats\PSeqFamilyMatsSymmetric%d.txt" % (chr)) # to find R*t alpha, gamma RagDictAsym = FindR.calculateRag(seqPagDictAsym, str(chr), "Asymmetric", "RAssertionFailuresAsymmetric_" + organism + "/") RagDictSym = FindR.calculateRag(seqPagDictSym, str(chr), "Symmetric", "RAssertionFailuresSymmetric_" + organism + "/") for gene, families in RagDictSym.items(): for name, Rmatrix in families.items(): if name not in RagDictAsym[gene]: print "From Symmetric" print gene, name del RagDictSym[gene][name] for gene, families in RagDictAsym.items(): for name, Rmatrix in families.items(): if name not in RagDictSym[gene]: print "From Asymmetric" print gene, name del RagDictAsym[gene][name] printAndParseFiles.printDictionaryMatrixToFile(RagDictAsym, "Calculations_" + organism + "\RSeqFamilyMats\RSeqFamilyMatsAsymmetric%d.txt" % (chr)) printAndParseFiles.printDictionaryMatrixToFile(RagDictSym, "Calculations_" + organism + "\RSeqFamilyMats\RSeqFamilyMatsSymmetric%d.txt" % (chr)) # to find Q alpha,gamma and Q gamma matSize = 4 MtagDictAsym = FindMT.calculateMtalphaGamma(RagDictAsym, matSize) MtagDictSym = FindMT.calculateMtalphaGamma(RagDictSym, matSize) printAndParseFiles.printDictionaryofValuesToFile(MtagDictAsym, "Calculations_" + organism + "\MtSeqFamilyMats\MtSeqFamilyMatsAsymmetric%d.txt" % (chr)) printAndParseFiles.printDictionaryofValuesToFile(MtagDictSym, "Calculations_" + organism + "\MtSeqFamilyMats\MtSeqFamilyMatsSymmetric%d.txt" % (chr)) QagDictAsym = FindQ.calculateQag(RagDictAsym, MtagDictAsym, matSize) QagDictSym = FindQ.calculateQag(RagDictSym, MtagDictSym, matSize) printAndParseFiles.printDictionaryMatrixToFile(QagDictAsym, "Calculations_" + organism + "\QSeqFamilyMats\QSeqFamilyMatsAsymmetric%d.txt" % (chr)) printAndParseFiles.printDictionaryMatrixToFile(QagDictSym, "Calculations_" + organism + "\QSeqFamilyMats\QSeqFamilyMatsSymmetric%d.txt" % (chr)) QgammaAsym = FindQ.findQgamma(QagDictAsym) QgammaSym = FindQ.findQgamma(QagDictSym) printAndParseFiles.printMatrixToFile(QgammaAsym, "Calculations_" + organism + "\Qgamma\QgammaAsymmetric%d.txt" % (chr)) printAndParseFiles.printMatrixToFile(QgammaSym, "Calculations_" + organism + "\Qgamma\QgammaSymmetric%d.txt" % (chr)) print ("DONE WITH CHR %d" % chr)
def RunGlobalAlphaTest(beginSeqNum, endSeqNum, firstTime, significantRepeatList, organism, prmPrefix, prmDirectory = 'prm_files/'): """ Runs all Global Alpha test calculations. beginSeqNum: Beginning chromosome range. endSeqNum: Ending chromosome range. firstTime: Boolean if first time for all calculations. significantRepeatList: List of significant transposable element family instances. organism: Organism's name. prmDirectory: Directory where all of the .prm files are located. """ print ("GLOBAL ALPHA") # to find global alpha values for C and P if firstTime == True: AllRepObjs = list(itertools.chain.from_iterable(([unpickleRepeats(prmPrefix + "%d" % chrNum, prmDirectory) for chrNum in range(beginSeqNum, endSeqNum+1)]))) print (len(AllRepObjs)) sigReps = printAndParseFiles.SigTEs(AllRepObjs, significantRepeatList) print len(sigReps) CfamilyMats = FindCandP.combineMatrices(sigReps) print ("done with C") printAndParseFiles.printListToFile(CfamilyMats, "Calculations_" + organism + "\CFamilyMats.txt") PfamilyMats = FindCandP.getPSymm(CfamilyMats) printAndParseFiles.printListToFile(PfamilyMats, "Calculations_" + organism + "\PFamilyMats.txt") print ("done with P") else: PfamilyMats = printAndParseFiles.parsePfamilyFile("Calculations_" + organism + "\PFamilyMats.txt") # to find global alpha values for R*t print ("R") RmatDict = FindR.calculateR(PfamilyMats) printAndParseFiles.printMatrixToFile(RmatDict, "Calculations_" + organism + "\RFamilyMats.txt" ) # to find global alpha values for m*t matSize = 4 print ("mt") mtDict = FindMT.calculateMt(RmatDict, matSize) printAndParseFiles.printValuesToFile(mtDict, "Calculations_" + organism + "\MTFamilyVals.txt") # to find global alpha values for Q print ("Q") QmatDict = FindQ.calculateQvalues(mtDict, RmatDict, matSize) printAndParseFiles.printMatrixToFile(QmatDict, "Calculations_" + organism + "\QFamilyMats.txt" )