def runCactusProgressive(inputDir, jobTreeDir, logLevel=None, retryCount=0, batchSystem="single_machine", rescueJobFrequency=None, skipAlignments=False, buildHal=None, buildFasta=None, buildAvgs=False, jobTreeStats=False, maxThreads=None, maxCpus=None, defaultMemory=None, recursive=None, logFile=None, event=None, extraJobTreeArgumentsString="", profileFile=None): command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments, buildAvgs, None, buildHal, buildFasta, jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \ (" %s %s" % (nameValue("recursive", recursive, bool), nameValue("event", event))) if profileFile != None: command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command) system(command) logger.info("Ran the cactus progressive okay")
def runCactusAddReferenceCoordinates(cactusDiskDatabaseString, flowerNames, logLevel=None, referenceEventString=None, outgroupEventString=None, secondaryDatabaseString=None, bottomUpPhase=None): logLevel = getLogLevelString2(logLevel) bottomUpPhase = nameValue("bottomUpPhase", bottomUpPhase, bool) referenceEventString = nameValue("referenceEventString", referenceEventString) outgroupEventString = nameValue("outgroupEventString", outgroupEventString) secondaryDatabaseString = nameValue("secondaryDisk", secondaryDatabaseString, quotes=True) command = "cactus_addReferenceCoordinates --cactusDisk '%s' %s --logLevel %s %s %s %s" % (cactusDiskDatabaseString, secondaryDatabaseString, logLevel, referenceEventString, outgroupEventString, bottomUpPhase) popenPush(command, stdinString=flowerNames)
def calculateCoverage(sequenceFile, cigarFile, outputFile, fromGenome=None, depthById=False): logger.info("Calculating coverage of cigar file %s on %s, writing to %s" % ( cigarFile, sequenceFile, outputFile)) system("cactus_coverage %s %s %s %s > %s" % (sequenceFile, cigarFile, nameValue("from", fromGenome), nameValue("depthById", depthById, bool), outputFile))
def runCactusMAFGenerator(mAFFile, cactusDiskDatabaseString, flowerName="0", logLevel=None, referenceEventString=None, showOnlySubstitutionsWithRespectToTheReference=None): logLevel = getLogLevelString2(logLevel) referenceEventString = nameValue("referenceEventString", referenceEventString, str) showOnlySubstitutionsWithRespectToTheReference = nameValue("showOnlySubstitutionsWithRespectToTheReference", showOnlySubstitutionsWithRespectToTheReference, bool) system("cactus_MAFGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s %s" \ % (cactusDiskDatabaseString, flowerName, mAFFile, logLevel, referenceEventString, showOnlySubstitutionsWithRespectToTheReference)) logger.info("Created a MAF for the given cactusDisk")
def runCactusCheck(cactusDiskDatabaseString, flowerNames=encodeFlowerNames((0,)), logLevel=None, recursive=None, checkNormalised=None): logLevel = getLogLevelString2(logLevel) recursive = nameValue("recursive", recursive, bool) checkNormalised = nameValue("checkNormalised", checkNormalised, bool) popenPush("cactus_check --cactusDisk '%s' --logLevel %s %s %s" % (cactusDiskDatabaseString, logLevel, recursive, checkNormalised), stdinString=flowerNames) logger.info("Ran cactus check")
def runCactusSetup(cactusDiskDatabaseString, sequences, newickTreeString, logLevel=None, outgroupEvents=None, makeEventHeadersAlphaNumeric=None): logLevel = getLogLevelString2(logLevel) outgroupEvents = nameValue("outgroupEvents", outgroupEvents, str, quotes=True) makeEventHeadersAlphaNumeric=nameValue("makeEventHeadersAlphaNumeric", makeEventHeadersAlphaNumeric, bool) masterMessages = popenCatch("cactus_setup %s --speciesTree '%s' --cactusDisk '%s' \ --logLevel %s %s %s" \ % (" ".join(sequences), newickTreeString, cactusDiskDatabaseString, logLevel, outgroupEvents, makeEventHeadersAlphaNumeric)) logger.info("Ran cactus setup okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def runCactusMAFGenerator(mAFFile, cactusDiskDatabaseString, flowerName="0", logLevel=None, referenceEventString=None, showOnlySubstitutionsWithRespectToTheReference=None): logLevel = getLogLevelString2(logLevel) referenceEventString = nameValue("referenceEventString", referenceEventString, str) showOnlySubstitutionsWithRespectToTheReference = nameValue( "showOnlySubstitutionsWithRespectToTheReference", showOnlySubstitutionsWithRespectToTheReference, bool) system("cactus_MAFGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s %s" \ % (cactusDiskDatabaseString, flowerName, mAFFile, logLevel, referenceEventString, showOnlySubstitutionsWithRespectToTheReference)) logger.info("Created a MAF for the given cactusDisk")
def runCactusHalGenerator(cactusDiskDatabaseString, secondaryDatabaseString, flowerNames, referenceEventString, outputFile=None, showOnlySubstitutionsWithRespectToReference=None, logLevel=None): logLevel = getLogLevelString2(logLevel) popenPush("cactus_halGenerator --cactusDisk '%s' --secondaryDisk '%s' --logLevel %s %s %s %s" % (cactusDiskDatabaseString, secondaryDatabaseString, logLevel, nameValue("referenceEventString", referenceEventString), nameValue("outputFile", outputFile), nameValue("showOnlySubstitutionsWithRespectToReference", showOnlySubstitutionsWithRespectToReference, bool)), stdinString=flowerNames)
def calculateExpectations(target, sequences, alignments, modelsFile, expectationsFile, options): #Run cPecanRealign system( "cat %s | cPecanRealign --logLevel DEBUG %s %s --outputExpectations=%s %s" % (alignments, sequences, nameValue("loadHmm", modelsFile, str), expectationsFile, options.optionsToRealign))
def runCactusAdjacencyGraphViewer(graphFile, cactusDiskDatabaseString, flowerName="0", logLevel=None, includeInternalAdjacencies=False): logLevel = getLogLevelString2(logLevel) includeInternalAdjacencies = nameValue("includeInternalAdjacencies", includeInternalAdjacencies, bool) system("cactus_adjacencyGraphViewer --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s" \ % (cactusDiskDatabaseString, flowerName, graphFile, logLevel)) logger.info("Created a break point graph of the problem")
def runCactusCreateMultiCactusProject(experimentFile, outputDir, logLevel=None, fixNames=True, root=None): logLevel = getLogLevelString2(logLevel) root = nameValue("root", root, str, quotes=True) command = "cactus_createMultiCactusProject.py %s %s --fixNames=%s %s" % (experimentFile, outputDir, str(fixNames), root) system(command) logger.info("Ran the cactus create multi project")
def runCactusBlast(sequenceFiles, outputFile, jobTreeDir, chunkSize=None, overlapSize=None, logLevel=None, blastString=None, selfBlastString=None, compressFiles=None, lastzMemory=None, targetSequenceFiles=None): logLevel = getLogLevelString2(logLevel) chunkSize = nameValue("chunkSize", chunkSize, int) overlapSize = nameValue("overlapSize", overlapSize, int) blastString = nameValue("blastString", blastString, str) selfBlastString = nameValue("selfBlastString", selfBlastString, str) compressFiles = nameValue("compressFiles", compressFiles, bool) lastzMemory = nameValue("lastzMemory", lastzMemory, int) if targetSequenceFiles != None: targetSequenceFiles = " ".join(targetSequenceFiles) targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True) command = "cactus_blast.py %s --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \ (" ".join(sequenceFiles), outputFile, chunkSize, overlapSize, blastString, selfBlastString, compressFiles, lastzMemory, targetSequenceFiles, jobTreeDir, logLevel) logger.info("Running command : %s" % command) system(command) logger.info("Ran the cactus_blast command okay")
def runCactusFastaGenerator(cactusDiskDatabaseString, flowerName, outputFile, referenceEventString=None, logLevel=None): logLevel = getLogLevelString2(logLevel) system("cactus_fastaGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % (cactusDiskDatabaseString, flowerName, outputFile, logLevel, nameValue("referenceEventString", referenceEventString)))
def trimGenome(sequenceFile, coverageFile, outputFile, complement=False, flanking=0, minSize=1, windowSize=10, threshold=1, depth=None): system("cactus_trimSequences.py %s %s %s %s %s %s %s %s > %s" % ( nameValue("complement", complement, valueType=bool), nameValue("flanking", flanking), nameValue("minSize", minSize), nameValue("windowSize", windowSize), nameValue("threshold", threshold), nameValue("depth", depth), sequenceFile, coverageFile, outputFile))
def runCactusTreeStats(outputFile, cactusDiskDatabaseString, flowerName='0', logLevel=None, referenceEventString=None): logLevel = getLogLevelString2(logLevel) referenceEventString = nameValue("referenceEventString", referenceEventString, str) command = "cactus_treeStats --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % ( cactusDiskDatabaseString, flowerName, outputFile, logLevel, referenceEventString) system(command) logger.info("Ran the cactus tree stats command apprently okay")
def runCPecanEm(sequenceFiles, alignmentsFile, outputModelFile, inputModelFile=None, modelType=None, jobTreeDir=None, iterations=None, randomStart=None, trials=None, optionsToRealign=None, logLevel=None, updateTheBand=None, maxAlignmentLengthPerJob=None, maxAlignmentLengthToSample=None, useDefaultModelAsStart=None, setJukesCantorStartingEmissions=None, trainEmissions=None, tieEmissions=None, outputTrialHmms = None, outputXMLModelFile = None, blastScoringMatrixFile=None): logLevel = getLogLevelString2(logLevel) jobTreeDir= nameValue("jobTree", jobTreeDir, str) inputModelFile= nameValue("inputModel", inputModelFile, str) modelType = nameValue("modelType", modelType, str) iterations = nameValue("iterations", iterations, int) trials = nameValue("trials", trials, int) randomStart = nameValue("randomStart", randomStart, bool) updateTheBand = nameValue("updateTheBand", updateTheBand, bool) maxAlignmentLengthPerJob = nameValue("maxAlignmentLengthPerJob", maxAlignmentLengthPerJob, int) maxAlignmentLengthToSample = nameValue("maxAlignmentLengthToSample", maxAlignmentLengthToSample, int) optionsToRealign = nameValue("optionsToRealign", optionsToRealign, quotes=True) useDefaultModelAsStart = nameValue("useDefaultModelAsStart", useDefaultModelAsStart, bool) trainEmissions = nameValue("trainEmissions", trainEmissions, bool) tieEmissions = nameValue("tieEmissions", tieEmissions, bool) setJukesCantorStartingEmissions = nameValue("setJukesCantorStartingEmissions", setJukesCantorStartingEmissions, float) outputTrialHmms = nameValue("outputTrialHmms", outputTrialHmms, bool) outputXMLModelFile = nameValue("outputXMLModelFile", outputXMLModelFile, str) blastScoringMatrixFile = nameValue("blastScoringMatrixFile", blastScoringMatrixFile, str) system("cPecanEm --sequences '%s' --alignments %s --outputModel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % \ (" ".join(sequenceFiles), alignmentsFile, outputModelFile, iterations, trials, randomStart, jobTreeDir, inputModelFile, optionsToRealign, modelType, maxAlignmentLengthPerJob, maxAlignmentLengthToSample, updateTheBand, useDefaultModelAsStart, trainEmissions, tieEmissions, setJukesCantorStartingEmissions, outputTrialHmms, outputXMLModelFile, blastScoringMatrixFile))
def _fn(jobTreeDir, logLevel=None, retryCount=0, batchSystem="single_machine", rescueJobFrequency=None, skipAlignments=False, buildAvgs=False, buildReference=False, buildHal=False, buildFasta=False, jobTreeStats=False, maxThreads=None, maxCpus=None, defaultMemory=None, logFile=None, extraJobTreeArgumentsString=""): logLevel = getLogLevelString2(logLevel) skipAlignments = nameValue("skipAlignments", skipAlignments, bool) buildAvgs = nameValue("buildAvgs", buildAvgs, bool) buildReference = nameValue("buildReference", buildReference, bool) buildHal = nameValue("buildHal", buildHal, bool) buildFasta = nameValue("buildFasta", buildFasta, bool) #Jobtree args batchSystem = nameValue("batchSystem", batchSystem, str, quotes=True) retryCount = nameValue("retryCount", retryCount, int) rescueJobFrequency = nameValue("rescueJobsFrequency", rescueJobFrequency, int) jobTreeStats = nameValue("stats", jobTreeStats, bool) maxThreads = nameValue("maxThreads", maxThreads, int) maxCpus = nameValue("maxCpus", maxCpus, int) defaultMemory= nameValue("defaultMemory", defaultMemory, int) logFile = nameValue("logFile", logFile, str) return "%s %s %s --jobTree %s --logLevel %s %s %s %s %s %s %s %s %s %s %s %s" % (skipAlignments, buildAvgs, buildReference, jobTreeDir, logLevel, buildHal, buildFasta, batchSystem, retryCount, rescueJobFrequency, jobTreeStats, maxThreads, maxCpus, logFile, defaultMemory, extraJobTreeArgumentsString)
def runCactusTreeStats(outputFile, cactusDiskDatabaseString, flowerName='0', logLevel=None, referenceEventString=None): logLevel = getLogLevelString2(logLevel) referenceEventString = nameValue("referenceEventString", referenceEventString, str) command = "cactus_treeStats --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % (cactusDiskDatabaseString, flowerName, outputFile, logLevel, referenceEventString) system(command) logger.info("Ran the cactus tree stats command apprently okay")
def runCactusReference(cactusDiskDatabaseString, flowerNames, logLevel=None, matchingAlgorithm=None, referenceEventString=None, permutations=None, useSimulatedAnnealing=None, theta=None, phi=None, maxWalkForCalculatingZ=None, ignoreUnalignedGaps=None, wiggle=None, numberOfNs=None, minNumberOfSequencesToSupportAdjacency=None, makeScaffolds=None): """Runs cactus reference. """ logLevel = getLogLevelString2(logLevel) matchingAlgorithm = nameValue("matchingAlgorithm", matchingAlgorithm) referenceEventString = nameValue("referenceEventString", referenceEventString) permutations = nameValue("permutations", permutations, int) useSimulatedAnnealing = nameValue("useSimulatedAnnealing", useSimulatedAnnealing, bool) theta = nameValue("theta", theta, float) phi = nameValue("phi", phi, float) maxWalkForCalculatingZ = nameValue("maxWalkForCalculatingZ", maxWalkForCalculatingZ, int) ignoreUnalignedGaps = nameValue("ignoreUnalignedGaps", ignoreUnalignedGaps, bool) wiggle = nameValue("wiggle", wiggle, float) numberOfNs = nameValue("numberOfNs", numberOfNs, int) minNumberOfSequencesToSupportAdjacency = nameValue("minNumberOfSequencesToSupportAdjacency", minNumberOfSequencesToSupportAdjacency, int) makeScaffolds = nameValue("makeScaffolds", makeScaffolds, bool) command = "cactus_reference --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s" % \ (cactusDiskDatabaseString, logLevel, matchingAlgorithm, referenceEventString, permutations, useSimulatedAnnealing, theta, phi, maxWalkForCalculatingZ, ignoreUnalignedGaps, wiggle, numberOfNs, minNumberOfSequencesToSupportAdjacency, makeScaffolds) masterMessages = popenCatch(command, stdinString=flowerNames) logger.info("Ran cactus_reference okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def runCactusBar(cactusDiskDatabaseString, flowerNames, logLevel=None, spanningTrees=None, maximumLength=None, gapGamma=None, matchGamma=None, splitMatrixBiggerThanThis=None, anchorMatrixBiggerThanThis=None, repeatMaskMatrixBiggerThanThis=None, diagonalExpansion=None, constraintDiagonalTrim=None, minimumBlockDegree=None, minimumIngroupDegree=None, minimumOutgroupDegree=None, alignAmbiguityCharacters=None, pruneOutStubAlignments=None, useProgressiveMerging=None, calculateWhichEndsToComputeSeparately=None, largeEndSize=None, endAlignmentsToPrecomputeOutputFile=None, precomputedAlignments=None, ingroupCoverageFile=None, minimumSizeToRescue=None, minimumCoverageToRescue=None, minimumNumberOfSpecies=None): """Runs cactus base aligner. """ logLevel = getLogLevelString2(logLevel) maximumLength = nameValue("maximumLength", maximumLength, int) spanningTrees = nameValue("spanningTrees", spanningTrees, int) gapGamma = nameValue("gapGamma", gapGamma, float) matchGamma = nameValue("matchGamma", matchGamma, float) splitMatrixBiggerThanThis=nameValue("splitMatrixBiggerThanThis", splitMatrixBiggerThanThis, int) anchorMatrixBiggerThanThis=nameValue("anchorMatrixBiggerThanThis", anchorMatrixBiggerThanThis, int) repeatMaskMatrixBiggerThanThis=nameValue("repeatMaskMatrixBiggerThanThis", repeatMaskMatrixBiggerThanThis, int) diagonalExpansion=nameValue("diagonalExpansion", diagonalExpansion, int) constraintDiagonalTrim = nameValue("constraintDiagonalTrim", constraintDiagonalTrim, int) minimumBlockDegree = nameValue("minimumDegree", minimumBlockDegree, int) minimumIngroupDegree = nameValue("minimumIngroupDegree", minimumIngroupDegree, int) minimumOutgroupDegree = nameValue("minimumOutgroupDegree", minimumOutgroupDegree, int) pruneOutStubAlignments = nameValue("pruneOutStubAlignments", pruneOutStubAlignments, bool) alignAmbiguityCharacters = nameValue("alignAmbiguityCharacters", alignAmbiguityCharacters, bool) useProgressiveMerging=nameValue("useProgressiveMerging", useProgressiveMerging, bool) calculateWhichEndsToComputeSeparately=nameValue("calculateWhichEndsToComputeSeparately", calculateWhichEndsToComputeSeparately, bool) largeEndSize=nameValue("largeEndSize", largeEndSize, int) endAlignmentsToPrecomputeOutputFile=nameValue("endAlignmentsToPrecomputeOutputFile", endAlignmentsToPrecomputeOutputFile, str) precomputedAlignments=nameValue("precomputedAlignments", precomputedAlignments, str, quotes=True) ingroupCoverageFile = nameValue("ingroupCoverageFile", ingroupCoverageFile, str, quotes=True) minimumSizeToRescue = nameValue("minimumSizeToRescue", minimumSizeToRescue, int) minimumCoverageToRescue = nameValue("minimumCoverageToRescue", minimumCoverageToRescue, float) minimumNumberOfSpecies = nameValue("minimumNumberOfSpecies", minimumNumberOfSpecies, int) masterMessages = popenCatch("cactus_bar --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % (cactusDiskDatabaseString, logLevel, spanningTrees, maximumLength, gapGamma, matchGamma, splitMatrixBiggerThanThis, anchorMatrixBiggerThanThis, repeatMaskMatrixBiggerThanThis, constraintDiagonalTrim, minimumBlockDegree, minimumIngroupDegree, minimumOutgroupDegree, alignAmbiguityCharacters, pruneOutStubAlignments, diagonalExpansion, useProgressiveMerging, calculateWhichEndsToComputeSeparately, largeEndSize, endAlignmentsToPrecomputeOutputFile, precomputedAlignments, ingroupCoverageFile, minimumSizeToRescue, minimumCoverageToRescue, minimumNumberOfSpecies), stdinString=flowerNames) logger.info("Ran cactus_bar okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def runCactusCaf(cactusDiskDatabaseString, alignments, flowerNames=encodeFlowerNames((0,)), logLevel=None, writeDebugFiles=False, annealingRounds=None, deannealingRounds=None, trim=None, minimumTreeCoverage=None, blockTrim=None, minimumBlockDegree=None, minimumIngroupDegree=None, minimumOutgroupDegree=None, alignmentFilter=None, lastzArguments=None, minimumSequenceLengthForBlast=None, maxAdjacencyComponentSizeRatio=None, constraints=None, minLengthForChromosome=None, proportionOfUnalignedBasesForNewChromosome=None, maximumMedianSequenceLengthBetweenLinkedEnds=None, realign=None, realignArguments=None, phylogenyNumTrees=None, phylogenyScoringMethod=None, phylogenyRootingMethod=None, phylogenyBreakpointScalingFactor=None, phylogenySkipSingleCopyBlocks=None, phylogenyMaxBaseDistance=None, phylogenyMaxBlockDistance=None, phylogenyDebugFile=None, phylogenyKeepSingleDegreeBlocks=None, phylogenyTreeBuildingMethod=None, phylogenyCostPerDupPerBase=None, phylogenyCostPerLossPerBase=None, referenceEventHeader=None, phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce=None, numTreeBuildingThreads=None, doPhylogeny=None, removeLargestBlock=None, phylogenyNucleotideScalingFactor=None, minimumBlockDegreeToCheckSupport=None, minimumBlockHomologySupport=None, removeRecoverableChains=None, minimumNumberOfSpecies=None, maxRecoverableChainsIterations=None, maxRecoverableChainLength=None, phylogenyHomologyUnitType=None, phylogenyDistanceCorrectionMethod=None): # remove annoying carriage returns in caf command line. cactusDiskDatabaseString = cactusDiskDatabaseString.replace('\n', '') logLevel = getLogLevelString2(logLevel) annealingRounds = nameValue("annealingRounds", annealingRounds, quotes=True) deannealingRounds = nameValue("deannealingRounds", deannealingRounds, quotes=True) trim = nameValue("trim", trim, quotes=True) alignments = nameValue("alignments", alignments) lastzArguments = nameValue("lastzArguments", lastzArguments, quotes=True) minimumTreeCoverage = nameValue("minimumTreeCoverage", minimumTreeCoverage, float) blockTrim = nameValue("blockTrim", blockTrim, int) minimumBlockDegree = nameValue("minimumDegree", minimumBlockDegree, int) minimumSequenceLengthForBlast = nameValue("minimumSequenceLengthForBlast", minimumSequenceLengthForBlast, int) minimumIngroupDegree = nameValue("minimumIngroupDegree", minimumIngroupDegree, int) minimumOutgroupDegree = nameValue("minimumOutgroupDegree", minimumOutgroupDegree, int) alignmentFilter = nameValue("alignmentFilter", alignmentFilter) maxAdjacencyComponentSizeRatio = nameValue("maxAdjacencyComponentSizeRatio", maxAdjacencyComponentSizeRatio, float) constraints = nameValue("constraints", constraints) realign = nameValue("realign", realign, bool) realignArguments = nameValue("realignArguments", realignArguments, quotes=True) phylogenyNumTrees = nameValue("phylogenyNumTrees", phylogenyNumTrees, int) phylogenyRootingMethod = nameValue("phylogenyRootingMethod", phylogenyRootingMethod, quotes=True) phylogenyScoringMethod = nameValue("phylogenyScoringMethod", phylogenyScoringMethod, quotes=True) phylogenyBreakpointScalingFactor = nameValue("phylogenyBreakpointScalingFactor", phylogenyBreakpointScalingFactor) phylogenySkipSingleCopyBlocks = nameValue("phylogenySkipSingleCopyBlocks", phylogenySkipSingleCopyBlocks, bool) phylogenyMaxBaseDistance = nameValue("phylogenyMaxBaseDistance", phylogenyMaxBaseDistance) phylogenyMaxBlockDistance = nameValue("phylogenyMaxBlockDistance", phylogenyMaxBlockDistance) phylogenyDebugFile = nameValue("phylogenyDebugFile", phylogenyDebugFile) phylogenyKeepSingleDegreeBlocks = nameValue("phylogenyKeepSingleDegreeBlocks", phylogenyKeepSingleDegreeBlocks, bool) phylogenyTreeBuildingMethod = nameValue("phylogenyTreeBuildingMethod", phylogenyTreeBuildingMethod) phylogenyCostPerDupPerBase = nameValue("phylogenyCostPerDupPerBase", phylogenyCostPerDupPerBase) phylogenyCostPerLossPerBase = nameValue("phylogenyCostPerLossPerBase", phylogenyCostPerLossPerBase) referenceEventHeader = nameValue("referenceEventHeader", referenceEventHeader, quotes=True) phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce = nameValue("phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce", phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce) numTreeBuildingThreads = nameValue("numTreeBuildingThreads", numTreeBuildingThreads) doPhylogeny = nameValue("phylogeny", doPhylogeny, bool) minimumBlockDegreeToCheckSupport = nameValue("minimumBlockDegreeToCheckSupport", minimumBlockDegreeToCheckSupport) minimumBlockHomologySupport = nameValue("minimumBlockHomologySupport", minimumBlockHomologySupport) phylogenyNucleotideScalingFactor = nameValue("phylogenyNucleotideScalingFactor", phylogenyNucleotideScalingFactor) removeRecoverableChains = nameValue("removeRecoverableChains", removeRecoverableChains) minimumNumberOfSpecies = nameValue("minimumNumberOfSpecies", minimumNumberOfSpecies, int) maxRecoverableChainsIterations = nameValue("maxRecoverableChainsIterations", maxRecoverableChainsIterations, int) maxRecoverableChainLength = nameValue("maxRecoverableChainLength", maxRecoverableChainLength, int) phylogenyHomologyUnitType = nameValue("phylogenyHomologyUnitType", phylogenyHomologyUnitType, quotes=True) phylogenyDistanceCorrectionMethod = nameValue("phylogenyDistanceCorrectionMethod", phylogenyDistanceCorrectionMethod, quotes=True) minLengthForChromosome = nameValue("minLengthForChromosome", minLengthForChromosome, int) proportionOfUnalignedBasesForNewChromosome = nameValue("proportionOfUnalignedBasesForNewChromosome", proportionOfUnalignedBasesForNewChromosome, float) maximumMedianSequenceLengthBetweenLinkedEnds = nameValue("maximumMedianSequenceLengthBetweenLinkedEnds", maximumMedianSequenceLengthBetweenLinkedEnds, int) command = "cactus_caf --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % \ (cactusDiskDatabaseString, logLevel, alignments, annealingRounds, deannealingRounds, trim, minimumTreeCoverage, blockTrim, minimumBlockDegree, minimumIngroupDegree, minimumOutgroupDegree, alignmentFilter, lastzArguments, minimumSequenceLengthForBlast, maxAdjacencyComponentSizeRatio, constraints, minLengthForChromosome, proportionOfUnalignedBasesForNewChromosome, maximumMedianSequenceLengthBetweenLinkedEnds, realign, realignArguments, phylogenyNumTrees, phylogenyRootingMethod, phylogenyScoringMethod, phylogenyBreakpointScalingFactor, phylogenySkipSingleCopyBlocks, phylogenyMaxBaseDistance, phylogenyMaxBlockDistance, phylogenyDebugFile, phylogenyKeepSingleDegreeBlocks, phylogenyTreeBuildingMethod, phylogenyCostPerDupPerBase, phylogenyCostPerLossPerBase, referenceEventHeader, phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce, numTreeBuildingThreads, doPhylogeny, minimumBlockDegreeToCheckSupport, minimumBlockHomologySupport, phylogenyNucleotideScalingFactor, removeRecoverableChains, minimumNumberOfSpecies, phylogenyHomologyUnitType, phylogenyDistanceCorrectionMethod, maxRecoverableChainsIterations, maxRecoverableChainLength) masterMessages = popenCatch(command, stdinString=flowerNames) logger.info("Ran cactus_core okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def runConvertAlignmentsToInternalNames(cactusDiskString, alignmentsFile, outputFile, flowerName, isBedFile=False): bed = nameValue("bed", isBedFile, bool) popenCatch("cactus_convertAlignmentsToInternalNames --cactusDisk '%s' %s %s %s" % (cactusDiskString, alignmentsFile, outputFile, bed), stdinString=encodeFlowerNames((flowerName,)))
def calculateExpectations(target, sequences, alignments, modelsFile, expectationsFile, options): #Run cPecanRealign system("cat %s | cPecanRealign --logLevel DEBUG %s %s --outputExpectations=%s %s" % (alignments, sequences, nameValue("loadHmm", modelsFile, str), expectationsFile, options.optionsToRealign))