Exemple #1
0
def runCactusProgressive(inputDir,
                      jobTreeDir, 
                      logLevel=None, retryCount=0, 
                      batchSystem="single_machine", 
                      rescueJobFrequency=None,
                      skipAlignments=False,
                      buildHal=None,
                      buildFasta=None,
                      buildAvgs=False, 
                      jobTreeStats=False,
                      maxThreads=None,
                      maxCpus=None,
                      defaultMemory=None,
                      recursive=None,
                      logFile=None,
                      event=None,
                      extraJobTreeArgumentsString="",
                      profileFile=None):
    command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, 
                      logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments,
                      buildAvgs, None,
                      buildHal,
                      buildFasta,
                      jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \
                      (" %s %s" % (nameValue("recursive", recursive, bool),
                                      nameValue("event", event)))
    if profileFile != None:
        command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command)
    system(command)                   
    logger.info("Ran the cactus progressive okay")
Exemple #2
0
def runCactusAddReferenceCoordinates(cactusDiskDatabaseString, flowerNames, logLevel=None, referenceEventString=None, outgroupEventString=None, secondaryDatabaseString=None, bottomUpPhase=None):   
    logLevel = getLogLevelString2(logLevel)
    bottomUpPhase = nameValue("bottomUpPhase", bottomUpPhase, bool)
    referenceEventString = nameValue("referenceEventString", referenceEventString)
    outgroupEventString = nameValue("outgroupEventString", outgroupEventString)
    secondaryDatabaseString = nameValue("secondaryDisk", secondaryDatabaseString, quotes=True)
    command = "cactus_addReferenceCoordinates --cactusDisk '%s' %s --logLevel %s %s %s %s" % (cactusDiskDatabaseString, secondaryDatabaseString, logLevel, referenceEventString, outgroupEventString, bottomUpPhase)
    popenPush(command, stdinString=flowerNames)
Exemple #3
0
def calculateCoverage(sequenceFile, cigarFile, outputFile, fromGenome=None, depthById=False):
    logger.info("Calculating coverage of cigar file %s on %s, writing to %s" % (
        cigarFile, sequenceFile, outputFile))
    system("cactus_coverage %s %s %s %s > %s" % (sequenceFile,
                                           cigarFile,
                                           nameValue("from", fromGenome),
                                           nameValue("depthById", depthById, bool),
                                           outputFile))
Exemple #4
0
def runCactusMAFGenerator(mAFFile, cactusDiskDatabaseString, flowerName="0",
                          logLevel=None, referenceEventString=None, 
                          showOnlySubstitutionsWithRespectToTheReference=None):
    logLevel = getLogLevelString2(logLevel)
    referenceEventString = nameValue("referenceEventString", referenceEventString, str)
    showOnlySubstitutionsWithRespectToTheReference = nameValue("showOnlySubstitutionsWithRespectToTheReference", showOnlySubstitutionsWithRespectToTheReference, bool)
    system("cactus_MAFGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s %s" \
            % (cactusDiskDatabaseString, flowerName, mAFFile, logLevel, referenceEventString, showOnlySubstitutionsWithRespectToTheReference))
    logger.info("Created a MAF for the given cactusDisk")
Exemple #5
0
def runCactusCheck(cactusDiskDatabaseString, 
                    flowerNames=encodeFlowerNames((0,)), 
                    logLevel=None, 
                    recursive=None,
                    checkNormalised=None):
    logLevel = getLogLevelString2(logLevel)
    recursive = nameValue("recursive", recursive, bool)
    checkNormalised = nameValue("checkNormalised", checkNormalised, bool)
    popenPush("cactus_check --cactusDisk '%s' --logLevel %s %s %s"  % (cactusDiskDatabaseString, logLevel, recursive, checkNormalised), stdinString=flowerNames)
    logger.info("Ran cactus check")
Exemple #6
0
def runCactusSetup(cactusDiskDatabaseString, sequences, 
                   newickTreeString, logLevel=None, outgroupEvents=None,
                   makeEventHeadersAlphaNumeric=None):
    logLevel = getLogLevelString2(logLevel)
    outgroupEvents = nameValue("outgroupEvents", outgroupEvents, str, quotes=True)
    makeEventHeadersAlphaNumeric=nameValue("makeEventHeadersAlphaNumeric", makeEventHeadersAlphaNumeric, bool)
    masterMessages = popenCatch("cactus_setup %s --speciesTree '%s' --cactusDisk '%s' \
--logLevel %s %s %s" \
           % (" ".join(sequences), newickTreeString,
              cactusDiskDatabaseString, logLevel, outgroupEvents, makeEventHeadersAlphaNumeric))
    logger.info("Ran cactus setup okay")
    return [ i for i in masterMessages.split("\n") if i != '' ]
Exemple #7
0
def runCactusMAFGenerator(mAFFile,
                          cactusDiskDatabaseString,
                          flowerName="0",
                          logLevel=None,
                          referenceEventString=None,
                          showOnlySubstitutionsWithRespectToTheReference=None):
    logLevel = getLogLevelString2(logLevel)
    referenceEventString = nameValue("referenceEventString",
                                     referenceEventString, str)
    showOnlySubstitutionsWithRespectToTheReference = nameValue(
        "showOnlySubstitutionsWithRespectToTheReference",
        showOnlySubstitutionsWithRespectToTheReference, bool)
    system("cactus_MAFGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s %s" \
            % (cactusDiskDatabaseString, flowerName, mAFFile, logLevel, referenceEventString, showOnlySubstitutionsWithRespectToTheReference))
    logger.info("Created a MAF for the given cactusDisk")
Exemple #8
0
def runCactusHalGenerator(cactusDiskDatabaseString,
                          secondaryDatabaseString, 
                          flowerNames,
                          referenceEventString, 
                          outputFile=None,
                          showOnlySubstitutionsWithRespectToReference=None,
                          logLevel=None):
    logLevel = getLogLevelString2(logLevel)
    popenPush("cactus_halGenerator --cactusDisk '%s' --secondaryDisk '%s' --logLevel %s %s %s %s" % 
           (cactusDiskDatabaseString, secondaryDatabaseString, logLevel, 
            nameValue("referenceEventString", referenceEventString),
            nameValue("outputFile", outputFile),
            nameValue("showOnlySubstitutionsWithRespectToReference", 
                      showOnlySubstitutionsWithRespectToReference, bool)), 
              stdinString=flowerNames)
Exemple #9
0
def calculateExpectations(target, sequences, alignments, modelsFile,
                          expectationsFile, options):
    #Run cPecanRealign
    system(
        "cat %s | cPecanRealign --logLevel DEBUG %s %s --outputExpectations=%s %s"
        % (alignments, sequences, nameValue("loadHmm", modelsFile, str),
           expectationsFile, options.optionsToRealign))
Exemple #10
0
def runCactusAdjacencyGraphViewer(graphFile,
                             cactusDiskDatabaseString, flowerName="0",
                             logLevel=None, includeInternalAdjacencies=False):
    logLevel = getLogLevelString2(logLevel)
    includeInternalAdjacencies = nameValue("includeInternalAdjacencies", includeInternalAdjacencies, bool)
    system("cactus_adjacencyGraphViewer --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s" \
                    % (cactusDiskDatabaseString, flowerName, graphFile, logLevel))
    logger.info("Created a break point graph of the problem")
Exemple #11
0
def runCactusCreateMultiCactusProject(experimentFile, outputDir, 
                                      logLevel=None, fixNames=True,
                                      root=None):
    logLevel = getLogLevelString2(logLevel)
    root = nameValue("root", root, str, quotes=True)
    command = "cactus_createMultiCactusProject.py %s %s --fixNames=%s %s" % (experimentFile, outputDir, str(fixNames), root)
    system(command)
    logger.info("Ran the cactus create multi project")
Exemple #12
0
def runCactusBlast(sequenceFiles, outputFile, jobTreeDir,
                   chunkSize=None, overlapSize=None, 
                   logLevel=None, 
                   blastString=None, 
                   selfBlastString=None,
                   compressFiles=None,
                   lastzMemory=None,
                   targetSequenceFiles=None):
    logLevel = getLogLevelString2(logLevel)
    chunkSize = nameValue("chunkSize", chunkSize, int)
    overlapSize = nameValue("overlapSize", overlapSize, int)
    blastString = nameValue("blastString", blastString, str)
    selfBlastString = nameValue("selfBlastString", selfBlastString, str)
    compressFiles = nameValue("compressFiles", compressFiles, bool)
    lastzMemory = nameValue("lastzMemory", lastzMemory, int)
    if targetSequenceFiles != None: 
        targetSequenceFiles = " ".join(targetSequenceFiles)
    targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True)
    command = "cactus_blast.py %s  --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \
            (" ".join(sequenceFiles), outputFile,
             chunkSize, overlapSize, blastString, selfBlastString, compressFiles, 
             lastzMemory, targetSequenceFiles, jobTreeDir, logLevel)
    logger.info("Running command : %s" % command)
    system(command)
    logger.info("Ran the cactus_blast command okay")
Exemple #13
0
def runCactusFastaGenerator(cactusDiskDatabaseString,
                          flowerName,
                          outputFile,
                          referenceEventString=None, 
                          logLevel=None):
    logLevel = getLogLevelString2(logLevel)
    system("cactus_fastaGenerator --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % 
           (cactusDiskDatabaseString, flowerName, outputFile, logLevel, 
            nameValue("referenceEventString", referenceEventString)))
Exemple #14
0
def runCactusAdjacencyGraphViewer(graphFile,
                                  cactusDiskDatabaseString,
                                  flowerName="0",
                                  logLevel=None,
                                  includeInternalAdjacencies=False):
    logLevel = getLogLevelString2(logLevel)
    includeInternalAdjacencies = nameValue("includeInternalAdjacencies",
                                           includeInternalAdjacencies, bool)
    system("cactus_adjacencyGraphViewer --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s" \
                    % (cactusDiskDatabaseString, flowerName, graphFile, logLevel))
    logger.info("Created a break point graph of the problem")
Exemple #15
0
def trimGenome(sequenceFile, coverageFile, outputFile, complement=False,
               flanking=0, minSize=1, windowSize=10, threshold=1, depth=None):
    system("cactus_trimSequences.py %s %s %s %s %s %s %s %s > %s" % (
        nameValue("complement", complement, valueType=bool),
        nameValue("flanking", flanking), nameValue("minSize", minSize),
        nameValue("windowSize", windowSize), nameValue("threshold", threshold),
        nameValue("depth", depth), sequenceFile, coverageFile, outputFile))
Exemple #16
0
def runCactusTreeStats(outputFile,
                       cactusDiskDatabaseString,
                       flowerName='0',
                       logLevel=None,
                       referenceEventString=None):
    logLevel = getLogLevelString2(logLevel)
    referenceEventString = nameValue("referenceEventString",
                                     referenceEventString, str)
    command = "cactus_treeStats --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % (
        cactusDiskDatabaseString, flowerName, outputFile, logLevel,
        referenceEventString)
    system(command)
    logger.info("Ran the cactus tree stats command apprently okay")
Exemple #17
0
def runCPecanEm(sequenceFiles, alignmentsFile, outputModelFile,
                 inputModelFile=None, 
                 modelType=None,
                 jobTreeDir=None,
                 iterations=None, randomStart=None, 
                 trials=None,
                 optionsToRealign=None,
                 logLevel=None, 
                 updateTheBand=None,
                 maxAlignmentLengthPerJob=None,
                 maxAlignmentLengthToSample=None,
                 useDefaultModelAsStart=None, 
                 setJukesCantorStartingEmissions=None,
                 trainEmissions=None,
                 tieEmissions=None,
                 outputTrialHmms = None,
                 outputXMLModelFile = None,
                 blastScoringMatrixFile=None):
    logLevel = getLogLevelString2(logLevel)
    jobTreeDir= nameValue("jobTree", jobTreeDir, str)
    inputModelFile= nameValue("inputModel", inputModelFile, str)
    modelType = nameValue("modelType", modelType, str)
    iterations = nameValue("iterations", iterations, int)
    trials = nameValue("trials", trials, int)
    randomStart = nameValue("randomStart", randomStart, bool)
    updateTheBand = nameValue("updateTheBand", updateTheBand, bool)
    maxAlignmentLengthPerJob = nameValue("maxAlignmentLengthPerJob", maxAlignmentLengthPerJob, int)
    maxAlignmentLengthToSample = nameValue("maxAlignmentLengthToSample", maxAlignmentLengthToSample, int)
    optionsToRealign = nameValue("optionsToRealign", optionsToRealign, quotes=True)
    useDefaultModelAsStart = nameValue("useDefaultModelAsStart", useDefaultModelAsStart, bool) 
    trainEmissions = nameValue("trainEmissions", trainEmissions, bool)
    tieEmissions = nameValue("tieEmissions", tieEmissions, bool)
    setJukesCantorStartingEmissions = nameValue("setJukesCantorStartingEmissions", setJukesCantorStartingEmissions, float)
    outputTrialHmms = nameValue("outputTrialHmms", outputTrialHmms, bool)
    outputXMLModelFile = nameValue("outputXMLModelFile", outputXMLModelFile, str)
    blastScoringMatrixFile = nameValue("blastScoringMatrixFile", blastScoringMatrixFile, str)
    
    system("cPecanEm --sequences '%s' --alignments %s --outputModel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % \
           (" ".join(sequenceFiles), alignmentsFile, outputModelFile, iterations, trials, randomStart, 
            jobTreeDir, inputModelFile, optionsToRealign, modelType,
            maxAlignmentLengthPerJob, maxAlignmentLengthToSample, updateTheBand, useDefaultModelAsStart, 
            trainEmissions, tieEmissions, setJukesCantorStartingEmissions, outputTrialHmms, 
            outputXMLModelFile, blastScoringMatrixFile))
Exemple #18
0
def _fn(jobTreeDir, 
      logLevel=None, retryCount=0, 
      batchSystem="single_machine", 
      rescueJobFrequency=None,
      skipAlignments=False,
      buildAvgs=False, buildReference=False,
      buildHal=False,
      buildFasta=False,
      jobTreeStats=False,
      maxThreads=None,
      maxCpus=None,
      defaultMemory=None,
      logFile=None,
      extraJobTreeArgumentsString=""):
    logLevel = getLogLevelString2(logLevel)
    skipAlignments = nameValue("skipAlignments", skipAlignments, bool)
    buildAvgs = nameValue("buildAvgs", buildAvgs, bool)
    buildReference = nameValue("buildReference", buildReference, bool)
    buildHal = nameValue("buildHal", buildHal, bool)
    buildFasta = nameValue("buildFasta", buildFasta, bool)
    #Jobtree args
    batchSystem = nameValue("batchSystem", batchSystem, str, quotes=True)
    retryCount = nameValue("retryCount", retryCount, int)
    rescueJobFrequency = nameValue("rescueJobsFrequency", rescueJobFrequency, int)
    jobTreeStats = nameValue("stats", jobTreeStats, bool)
    maxThreads = nameValue("maxThreads", maxThreads, int)
    maxCpus = nameValue("maxCpus", maxCpus, int)
    defaultMemory= nameValue("defaultMemory", defaultMemory, int)
    logFile = nameValue("logFile", logFile, str)
    return "%s %s %s --jobTree %s --logLevel %s %s %s %s %s %s %s %s %s %s %s %s" % (skipAlignments, buildAvgs, 
             buildReference, jobTreeDir, logLevel, buildHal, buildFasta, batchSystem, retryCount, rescueJobFrequency, jobTreeStats, maxThreads, maxCpus, logFile, defaultMemory, extraJobTreeArgumentsString)
Exemple #19
0
def runCactusTreeStats(outputFile, cactusDiskDatabaseString, flowerName='0', logLevel=None, referenceEventString=None):
    logLevel = getLogLevelString2(logLevel)
    referenceEventString = nameValue("referenceEventString", referenceEventString, str)
    command = "cactus_treeStats --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s %s" % (cactusDiskDatabaseString, flowerName, outputFile, logLevel, referenceEventString)
    system(command)
    logger.info("Ran the cactus tree stats command apprently okay")
Exemple #20
0
def runCactusReference(cactusDiskDatabaseString, flowerNames, logLevel=None,
                       matchingAlgorithm=None, 
                       referenceEventString=None, 
                       permutations=None,
                       useSimulatedAnnealing=None,
                       theta=None,
                       phi=None, 
                       maxWalkForCalculatingZ=None,
                       ignoreUnalignedGaps=None,
                       wiggle=None, 
                       numberOfNs=None,
                       minNumberOfSequencesToSupportAdjacency=None,
                       makeScaffolds=None):
    """Runs cactus reference.
    """
    logLevel = getLogLevelString2(logLevel)
    matchingAlgorithm = nameValue("matchingAlgorithm", matchingAlgorithm)
    referenceEventString = nameValue("referenceEventString", referenceEventString)
    permutations = nameValue("permutations", permutations, int)
    useSimulatedAnnealing = nameValue("useSimulatedAnnealing", useSimulatedAnnealing, bool)
    theta = nameValue("theta", theta, float)
    phi = nameValue("phi", phi, float)
    maxWalkForCalculatingZ = nameValue("maxWalkForCalculatingZ", maxWalkForCalculatingZ, int)
    ignoreUnalignedGaps = nameValue("ignoreUnalignedGaps", ignoreUnalignedGaps, bool)
    wiggle = nameValue("wiggle", wiggle, float)
    numberOfNs = nameValue("numberOfNs", numberOfNs, int)
    minNumberOfSequencesToSupportAdjacency = nameValue("minNumberOfSequencesToSupportAdjacency", minNumberOfSequencesToSupportAdjacency, int)
    makeScaffolds = nameValue("makeScaffolds", makeScaffolds, bool)
    command = "cactus_reference --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s" % \
    (cactusDiskDatabaseString, logLevel, matchingAlgorithm, referenceEventString, permutations, 
     useSimulatedAnnealing, theta, phi, maxWalkForCalculatingZ, ignoreUnalignedGaps, wiggle, numberOfNs, minNumberOfSequencesToSupportAdjacency, makeScaffolds)
    masterMessages = popenCatch(command, stdinString=flowerNames)
    logger.info("Ran cactus_reference okay")
    return [ i for i in masterMessages.split("\n") if i != '' ]
Exemple #21
0
def runCactusBar(cactusDiskDatabaseString, flowerNames, logLevel=None,
                         spanningTrees=None, maximumLength=None, 
                         gapGamma=None,
                         matchGamma=None,
                         splitMatrixBiggerThanThis=None,
                         anchorMatrixBiggerThanThis=None,
                         repeatMaskMatrixBiggerThanThis=None,
                         diagonalExpansion=None,
                         constraintDiagonalTrim=None,
                         minimumBlockDegree=None,
                         minimumIngroupDegree=None,
                         minimumOutgroupDegree=None,
                         alignAmbiguityCharacters=None,
                         pruneOutStubAlignments=None,
                         useProgressiveMerging=None,
                         calculateWhichEndsToComputeSeparately=None,
                         largeEndSize=None,
                         endAlignmentsToPrecomputeOutputFile=None,
                         precomputedAlignments=None,
                         ingroupCoverageFile=None,
                         minimumSizeToRescue=None,
                         minimumCoverageToRescue=None,
                         minimumNumberOfSpecies=None):
    """Runs cactus base aligner.
    """
    logLevel = getLogLevelString2(logLevel)
    maximumLength = nameValue("maximumLength", maximumLength, int)
    spanningTrees = nameValue("spanningTrees", spanningTrees, int)
    gapGamma = nameValue("gapGamma", gapGamma, float)
    matchGamma = nameValue("matchGamma", matchGamma, float)
    splitMatrixBiggerThanThis=nameValue("splitMatrixBiggerThanThis", splitMatrixBiggerThanThis, int)
    anchorMatrixBiggerThanThis=nameValue("anchorMatrixBiggerThanThis", anchorMatrixBiggerThanThis, int)
    repeatMaskMatrixBiggerThanThis=nameValue("repeatMaskMatrixBiggerThanThis", repeatMaskMatrixBiggerThanThis, int)                   
    diagonalExpansion=nameValue("diagonalExpansion", diagonalExpansion, int)
    constraintDiagonalTrim = nameValue("constraintDiagonalTrim", constraintDiagonalTrim, int)
    minimumBlockDegree = nameValue("minimumDegree", minimumBlockDegree, int)
    minimumIngroupDegree = nameValue("minimumIngroupDegree", minimumIngroupDegree, int)
    minimumOutgroupDegree = nameValue("minimumOutgroupDegree", minimumOutgroupDegree, int)
    pruneOutStubAlignments = nameValue("pruneOutStubAlignments", pruneOutStubAlignments, bool)
    alignAmbiguityCharacters = nameValue("alignAmbiguityCharacters", alignAmbiguityCharacters, bool)
    useProgressiveMerging=nameValue("useProgressiveMerging", useProgressiveMerging, bool)
    calculateWhichEndsToComputeSeparately=nameValue("calculateWhichEndsToComputeSeparately", calculateWhichEndsToComputeSeparately, bool)
    largeEndSize=nameValue("largeEndSize", largeEndSize, int)
    endAlignmentsToPrecomputeOutputFile=nameValue("endAlignmentsToPrecomputeOutputFile", endAlignmentsToPrecomputeOutputFile, str)
    precomputedAlignments=nameValue("precomputedAlignments", precomputedAlignments, str, quotes=True)
    ingroupCoverageFile = nameValue("ingroupCoverageFile", ingroupCoverageFile, str, quotes=True)
    minimumSizeToRescue = nameValue("minimumSizeToRescue", minimumSizeToRescue, int)
    minimumCoverageToRescue = nameValue("minimumCoverageToRescue", minimumCoverageToRescue, float)
    minimumNumberOfSpecies = nameValue("minimumNumberOfSpecies", minimumNumberOfSpecies, int)

    masterMessages = popenCatch("cactus_bar --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % 
           (cactusDiskDatabaseString, logLevel, spanningTrees, maximumLength, gapGamma, matchGamma,
            splitMatrixBiggerThanThis, anchorMatrixBiggerThanThis, repeatMaskMatrixBiggerThanThis,
            constraintDiagonalTrim, minimumBlockDegree, minimumIngroupDegree, minimumOutgroupDegree,  
            alignAmbiguityCharacters, pruneOutStubAlignments, diagonalExpansion,
            useProgressiveMerging, calculateWhichEndsToComputeSeparately,
            largeEndSize, endAlignmentsToPrecomputeOutputFile, precomputedAlignments, ingroupCoverageFile, minimumSizeToRescue, minimumCoverageToRescue, minimumNumberOfSpecies), stdinString=flowerNames)
    logger.info("Ran cactus_bar okay")
    return [ i for i in masterMessages.split("\n") if i != '' ]
Exemple #22
0
def runCactusCaf(cactusDiskDatabaseString, alignments, 
                 flowerNames=encodeFlowerNames((0,)),
                 logLevel=None, 
                 writeDebugFiles=False,
                 annealingRounds=None,
                 deannealingRounds=None,
                 trim=None,
                 minimumTreeCoverage=None,
                 blockTrim=None,
                 minimumBlockDegree=None,
                 minimumIngroupDegree=None,
                 minimumOutgroupDegree=None,
                 alignmentFilter=None,
                 lastzArguments=None,
                 minimumSequenceLengthForBlast=None,
                 maxAdjacencyComponentSizeRatio=None,
                 constraints=None,
                 minLengthForChromosome=None,
                 proportionOfUnalignedBasesForNewChromosome=None, 
                 maximumMedianSequenceLengthBetweenLinkedEnds=None,
                 realign=None,
                 realignArguments=None,
                 phylogenyNumTrees=None,
                 phylogenyScoringMethod=None,
                 phylogenyRootingMethod=None,
                 phylogenyBreakpointScalingFactor=None,
                 phylogenySkipSingleCopyBlocks=None,
                 phylogenyMaxBaseDistance=None,
                 phylogenyMaxBlockDistance=None,
                 phylogenyDebugFile=None,
                 phylogenyKeepSingleDegreeBlocks=None,
                 phylogenyTreeBuildingMethod=None,
                 phylogenyCostPerDupPerBase=None,
                 phylogenyCostPerLossPerBase=None,
                 referenceEventHeader=None,
                 phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce=None,
                 numTreeBuildingThreads=None,
                 doPhylogeny=None,
                 removeLargestBlock=None,
                 phylogenyNucleotideScalingFactor=None,
                 minimumBlockDegreeToCheckSupport=None,
                 minimumBlockHomologySupport=None,
                  removeRecoverableChains=None,
                  minimumNumberOfSpecies=None,
                  maxRecoverableChainsIterations=None,
                  maxRecoverableChainLength=None,
                 phylogenyHomologyUnitType=None,
                 phylogenyDistanceCorrectionMethod=None):
    # remove annoying carriage returns in caf command line.
    cactusDiskDatabaseString = cactusDiskDatabaseString.replace('\n', '')

    logLevel = getLogLevelString2(logLevel)
    annealingRounds = nameValue("annealingRounds", annealingRounds, quotes=True)
    deannealingRounds = nameValue("deannealingRounds", deannealingRounds, quotes=True)
    trim = nameValue("trim", trim, quotes=True)
    alignments = nameValue("alignments", alignments)
    lastzArguments = nameValue("lastzArguments", lastzArguments, quotes=True)
    minimumTreeCoverage = nameValue("minimumTreeCoverage", minimumTreeCoverage, float)
    blockTrim = nameValue("blockTrim", blockTrim, int)
    minimumBlockDegree = nameValue("minimumDegree", minimumBlockDegree, int)
    minimumSequenceLengthForBlast = nameValue("minimumSequenceLengthForBlast", minimumSequenceLengthForBlast, int)
    minimumIngroupDegree = nameValue("minimumIngroupDegree", minimumIngroupDegree, int)
    minimumOutgroupDegree = nameValue("minimumOutgroupDegree", minimumOutgroupDegree, int)
    alignmentFilter = nameValue("alignmentFilter", alignmentFilter)
    maxAdjacencyComponentSizeRatio = nameValue("maxAdjacencyComponentSizeRatio", maxAdjacencyComponentSizeRatio, float)
    constraints = nameValue("constraints", constraints)
    realign = nameValue("realign", realign, bool)
    realignArguments = nameValue("realignArguments", realignArguments, quotes=True)
    phylogenyNumTrees = nameValue("phylogenyNumTrees", phylogenyNumTrees, int)
    phylogenyRootingMethod = nameValue("phylogenyRootingMethod", phylogenyRootingMethod, quotes=True)
    phylogenyScoringMethod = nameValue("phylogenyScoringMethod", phylogenyScoringMethod, quotes=True)
    phylogenyBreakpointScalingFactor = nameValue("phylogenyBreakpointScalingFactor", phylogenyBreakpointScalingFactor)
    phylogenySkipSingleCopyBlocks = nameValue("phylogenySkipSingleCopyBlocks", phylogenySkipSingleCopyBlocks, bool)
    phylogenyMaxBaseDistance = nameValue("phylogenyMaxBaseDistance", phylogenyMaxBaseDistance)
    phylogenyMaxBlockDistance = nameValue("phylogenyMaxBlockDistance", phylogenyMaxBlockDistance)
    phylogenyDebugFile = nameValue("phylogenyDebugFile", phylogenyDebugFile)
    phylogenyKeepSingleDegreeBlocks = nameValue("phylogenyKeepSingleDegreeBlocks", phylogenyKeepSingleDegreeBlocks, bool)
    phylogenyTreeBuildingMethod = nameValue("phylogenyTreeBuildingMethod", phylogenyTreeBuildingMethod)
    phylogenyCostPerDupPerBase = nameValue("phylogenyCostPerDupPerBase", phylogenyCostPerDupPerBase)
    phylogenyCostPerLossPerBase = nameValue("phylogenyCostPerLossPerBase", phylogenyCostPerLossPerBase)
    referenceEventHeader = nameValue("referenceEventHeader", referenceEventHeader, quotes=True)
    phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce = nameValue("phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce", phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce)
    numTreeBuildingThreads = nameValue("numTreeBuildingThreads", numTreeBuildingThreads)
    doPhylogeny = nameValue("phylogeny", doPhylogeny, bool)
    minimumBlockDegreeToCheckSupport = nameValue("minimumBlockDegreeToCheckSupport", minimumBlockDegreeToCheckSupport)
    minimumBlockHomologySupport = nameValue("minimumBlockHomologySupport", minimumBlockHomologySupport)
    phylogenyNucleotideScalingFactor = nameValue("phylogenyNucleotideScalingFactor", phylogenyNucleotideScalingFactor)
    removeRecoverableChains = nameValue("removeRecoverableChains", removeRecoverableChains)
    minimumNumberOfSpecies = nameValue("minimumNumberOfSpecies", minimumNumberOfSpecies, int)
    maxRecoverableChainsIterations = nameValue("maxRecoverableChainsIterations", maxRecoverableChainsIterations, int)
    maxRecoverableChainLength = nameValue("maxRecoverableChainLength", maxRecoverableChainLength, int)
    phylogenyHomologyUnitType = nameValue("phylogenyHomologyUnitType", phylogenyHomologyUnitType, quotes=True)
    phylogenyDistanceCorrectionMethod = nameValue("phylogenyDistanceCorrectionMethod", phylogenyDistanceCorrectionMethod, quotes=True)

    minLengthForChromosome = nameValue("minLengthForChromosome", minLengthForChromosome, int)
    proportionOfUnalignedBasesForNewChromosome = nameValue("proportionOfUnalignedBasesForNewChromosome", proportionOfUnalignedBasesForNewChromosome, float)
    maximumMedianSequenceLengthBetweenLinkedEnds = nameValue("maximumMedianSequenceLengthBetweenLinkedEnds", maximumMedianSequenceLengthBetweenLinkedEnds, int)

    command = "cactus_caf --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % \
    (cactusDiskDatabaseString, logLevel, alignments, annealingRounds, deannealingRounds, 
     trim, minimumTreeCoverage, blockTrim, 
     minimumBlockDegree, minimumIngroupDegree, minimumOutgroupDegree,  
     alignmentFilter, lastzArguments, minimumSequenceLengthForBlast, maxAdjacencyComponentSizeRatio, constraints,
     minLengthForChromosome, proportionOfUnalignedBasesForNewChromosome, maximumMedianSequenceLengthBetweenLinkedEnds, realign, realignArguments, phylogenyNumTrees, phylogenyRootingMethod, phylogenyScoringMethod, phylogenyBreakpointScalingFactor, phylogenySkipSingleCopyBlocks, phylogenyMaxBaseDistance, phylogenyMaxBlockDistance, phylogenyDebugFile, phylogenyKeepSingleDegreeBlocks, phylogenyTreeBuildingMethod, phylogenyCostPerDupPerBase, phylogenyCostPerLossPerBase, referenceEventHeader, phylogenyDoSplitsWithSupportHigherThanThisAllAtOnce, numTreeBuildingThreads, doPhylogeny, minimumBlockDegreeToCheckSupport, minimumBlockHomologySupport, phylogenyNucleotideScalingFactor, removeRecoverableChains, minimumNumberOfSpecies, phylogenyHomologyUnitType, phylogenyDistanceCorrectionMethod, maxRecoverableChainsIterations, maxRecoverableChainLength)
    masterMessages = popenCatch(command, stdinString=flowerNames)
    logger.info("Ran cactus_core okay")
    return [ i for i in masterMessages.split("\n") if i != '' ]
Exemple #23
0
def runConvertAlignmentsToInternalNames(cactusDiskString, alignmentsFile, outputFile, flowerName, isBedFile=False):
    bed = nameValue("bed", isBedFile, bool)
    popenCatch("cactus_convertAlignmentsToInternalNames --cactusDisk '%s' %s %s %s" % (cactusDiskString, alignmentsFile, outputFile, bed), stdinString=encodeFlowerNames((flowerName,)))
Exemple #24
0
def calculateExpectations(target, sequences, alignments, modelsFile, expectationsFile, options):
    #Run cPecanRealign
    system("cat %s | cPecanRealign --logLevel DEBUG %s %s --outputExpectations=%s %s" % (alignments, sequences, nameValue("loadHmm", modelsFile, str), expectationsFile, options.optionsToRealign))