Example #1
0
def functionsGoNOW(sampleNames, path, runTrimMetadata, commands):
    print "\nPerforming quality checks on assemblies."
    quastList = quastProcesses(sampleNames, path, runTrimMetadata, commands)
    quastMeta = metadataFiller.filler(runTrimMetadata, quastList)
    runTrimAssemblyMetadata = quastMetadata(sampleNames, path, quastMeta)
    jsonReportR.jsonR(sampleNames, path, runTrimAssemblyMetadata, "Collection")
    return runTrimAssemblyMetadata
Example #2
0
def functionsGoNOW(correctedFiles, path, metadata, fLength, commands):
    """Run the helper function"""
    print("\nAssembling reads.")
    flagMetadataList = spadesPrepProcesses(correctedFiles, path, fLength, metadata, commands)
    flagMetadata = metadataFiller.filler(metadata, flagMetadataList)
    updatedMetadata = contigFileFormatter(correctedFiles, path, flagMetadata)
    assembledFiles = completionist(correctedFiles, path)
    jsonReportR.jsonR(correctedFiles, path, updatedMetadata, "Collection")
    return updatedMetadata, assembledFiles
Example #3
0
def functionsGoNow(files, path, metadata, fLength, commands):
    # print path, fLength
    commandList = spadesPrepProcesses(files, path, fLength, metadata, commands)
    commandMetadata = metadataFiller.filler(metadata, commandList)
    updatedMetadata = contigFileFormatter(files, path, commandMetadata)
    assembledFiles = completionist(files, path)
    # moreMetadata = pipelineMetadata(path, updatedMetadata, assembledFiles)
    jsonReportR.jsonR(files, path, updatedMetadata, "Collection")
    return updatedMetadata, assembledFiles
Example #4
0
def functionsGoNOW(assembledFiles, path, assemblyMetadata, refFilePath, commands):
    print "\nPerforming GeneSeekr analysis"
    # Clear out any summary reports from a previous iteration of the pipeline
    reportRemover(path)
    # Do everything - uniVec screening, geneSeeking, V-typing, and MLST analysis
    geneSeekrMetadataList = geneSeekrPrepProcesses(assembledFiles, path, assemblyMetadata, refFilePath, commands)
    # print json.dumps(geneSeekrMetadata, sort_keys=True, indent=4, separators=(',', ': '))
    geneSeekrMetadata = metadataFiller.filler(assemblyMetadata, geneSeekrMetadataList)
    jsonReportR.jsonR(assembledFiles, path, geneSeekrMetadata, "Collection")
    return geneSeekrMetadata
Example #5
0
def functionsGoNOW(sampleNames, path, date, metadata, refFilesPath):
    """Commenting is subpar in this script, as I am using code written by Mike,
    so I can't really comment on it very well"""
    '''Yeah, well if I didn't have to change it so much to work it would have been commented better Adam'''
    print "\nPerforming rMLST analyses."
    rMLSTgenes = refFilesPath + "/rMLST/alleles/"
    make_path("%s/tmp" % path)
    print "\nFinding rMLST alleles."
    dictionaryPreparer(sampleNames)
    plusdict = blaster(rMLSTgenes, sampleNames, path, path, date, refFilesPath)
    additionalMetadata = determineReferenceGenome(plusdict, path, metadata, refFilesPath)
    moreMetadata = determineSubtype(plusdict, path, additionalMetadata, refFilesPath)
    allMetadata = rMLSTsizer(moreMetadata, sampleNames)
    # print json.dumps(genomeCovered, sort_keys=True, indent=4, separators=(',', ': '))
    # print json.dumps(testDict, sort_keys=True, indent=4, separators=(',', ': '))
    jsonReportR.jsonR(sampleNames, path, allMetadata, "Collection")
    return allMetadata
Example #6
0
def functionsGoNOW(sampleNames, path, runTrimMetadata, commands):
    """Calls all the functions in a way that they can be multi-processed"""
    inputData = referenceFiletoAssembly(path, sampleNames)
    print "\nSampling fastq files."
    sampleMeta = sampleFastq(path, sampleNames, runTrimMetadata, commands)
    indexList = indexTargetsProcesses(path, inputData, sampleMeta, commands)
    indexMeta = metadataFiller.filler(runTrimMetadata, indexList)
    #Start the mapping operations
    mappingList = mappingProcesses(path, inputData, indexMeta, commands)
    mappingMeta = metadataFiller.filler(runTrimMetadata, mappingList)
    extractingList = extractingProcesses(path, inputData, mappingMeta, commands)
    extractingMeta = metadataFiller.filler(runTrimMetadata, extractingList)
    graphingList = graphingProcesses(path, inputData, extractingMeta, commands)
    graphingMeta = metadataFiller.filler(runTrimMetadata, graphingList)
    os.chdir(path)
    runTrimInsertMetadata = formatOutput(path, sampleNames, graphingMeta)
    jsonReportR.jsonR(sampleNames, path, runTrimInsertMetadata, "Collection")
    return runTrimInsertMetadata
Example #7
0
def functionsGoNOW(sampleNames, path, runMetadata, fLength, commands):
    """Run the functions"""
    print('\nPerforming error correction on fastq files.')
    # Removed the multiprocessing aspect of this function - it seemed to be unreliable.
    # Sometimes, fastq files with more data would not be corrected.
    os.chdir(path)
    print "Preparing fastq files for processing"
    prepList = quakePrepProcesses(sampleNames, path, fLength, runMetadata, commands)
    prepMetadata = metadataFiller.filler(runMetadata, prepList)
    print "Determining cut-off values for error correction"
    cutoffList = quakeCutOffProcesses(sampleNames, path, prepMetadata, commands)
    cutoffMetadata = metadataFiller.filler(prepMetadata, cutoffList)
    print "Correcting errors"
    correctList = quakeCorrectProcesses(sampleNames, path, fLength, cutoffMetadata, commands)
    correctMetadata = metadataFiller.filler(runMetadata, correctList)
    # runQuake(sampleNames, path)
    os.chdir(path)
    # Run completionist to determine unprocessable files, and acquire metadata
    runTrimMetadata, correctedList = completionist(sampleNames, path, correctMetadata, fLength)
    # Clean up tmp files
    tmpFileRemover(path, correctedList)
    # Return important variables
    jsonReportR.jsonR(correctedList, path, runTrimMetadata, "Collection")
    return correctedList, runTrimMetadata
Example #8
0
def reportWriter(sampleNames, metadata, path):
    """As outputting a JSON file is so straightforward, helper functions were unnecessary"""
    print "\nCreating reports."
    reportPath = "%s/reports" % path
    # Grab the name of the analysis from the path variable
    folderName = path.split("/")[-1]
    # Get the path of the parental folder. This is where subfolders containing copies of
    # all the assemblies and reports will be stored
    repositoryPath = os.path.dirname(path)
    # Create the appropriate folders (if necessary) for storing the appropriate files
    make_path("%s/AssemblyData/Assemblies" % repositoryPath)
    make_path("%s/AssemblyData/JsonReports" % repositoryPath)
    make_path("%s/AssemblyData/SummaryReports" % repositoryPath)
    make_path(reportPath)
    combinedReport = open("%s/%s_CombinedMetadataReport.tsv" % (reportPath, folderName), "wb")
    # The headings will be used to search the json file and return only these particular data for
    # inclusion in the metadata spreadsheet
    headings = ["SampleName", "fileName", "N50", "NumContigs", "TotalLength", "MeanInsertSize", "averageDepthofCov",
                "referenceGenome", "NumIdenticalAlleles", "rMLSTSequenceType", "rMLSTIdenticalAlleles", "SequencingDate",
                "NumPredictedGenes", "NumPredictedGenes>500bp", "NumPredictedGenes>1000bp", "NumPredictedGenes>3000bp",
                "geneSeekrProfile", "verotoxinProfile", "MLST_sequenceType", "percentGC", "Investigator",
                "NumberOfClustersPF", "PercentOfClusters", "TotalClustersinRun", "LengthofFirstRead",
                "LengthofSecondRead", "Project", "PipelineVersion"]
    # General headings within the metadata json file
    reportHeadings = ["1.General", "2.Assembly", "3.Run", "4.Correction",
                      "5.rMLST", "6.rMLSTmatchestoRef", "7.PipelineCommands", "8.PipelineVersions"]
    combinedReport.write("\t".join(headings))
    combinedReport.write("\n")
    # Use jsonReportR to fill out the final json metadata report
    jsonReportR.jsonR(sampleNames, path, metadata, "Report")
    # Fill the combined metadata report spreadsheet with the appropriate data
    for name in sampleNames:
        newPath = path + "/" + name
        for heading in headings:
            value = "NA"
            for rHeading in reportHeadings:
                # Had some issues with the values being the wrong "type".
                # This if statement only accepts strings, floats, and integers
                if type(metadata[name][rHeading][heading]) is StringType or type(metadata[name][rHeading][heading]) \
                        is FloatType or type(metadata[name][rHeading][heading]) is IntType:
                    value = str(metadata[name][rHeading][heading])
            # Write the value to the spreadsheet
            combinedReport.write(value)
            combinedReport.write("\t")
        combinedReport.write("\n")

        # file clean-up
        dbm = "%s/%s_fastqFiles.dbm" % (newPath, name)
        if os.path.isfile(dbm):
            os.remove(dbm)
        fastq = glob.glob("%s/*_001.fastq" % newPath)
        for files in fastq:
            if os.path.isfile(files):
                os.remove(files)
        counts = "%s/%s_counts.txt" % (newPath, name)
        if os.path.isfile(counts):
            os.remove(counts)
        qcts = "%s/%s_fastqFiles.txt.qcts" % (newPath, name)
        if os.path.isfile(qcts):
            os.remove(qcts)
        jsonCollection = glob.glob("%s/reports/*Collection.json" % path)
        for jsonFile in jsonCollection:
            if os.path.isfile(jsonFile):
                os.remove(jsonFile)

        # Move assemblies and reports to appropriate Master repositories
        shutil.copy("%s/%s/%s_filteredAssembled.fasta" % (path, name, name),
                    "%s/AssemblyData/Assemblies/%s_filteredAssembled.fasta" % (repositoryPath, name))
        shutil.copy("%s/%s/%s_metadataReport.json" % (path, name, name),
                    "%s/AssemblyData/JsonReports/%s_metadataReport.json" % (repositoryPath, name))
    # Move the metadata spreadsheet
    combinedReport.close()
    shutil.copy("%s/%s_CombinedMetadataReport.tsv" % (reportPath, folderName),
                "%s/AssemblyData/SummaryReports/%s_CombinedMetadataReport.tsv" % (repositoryPath, folderName))