def parsePreparedInput(inputFilePaths: List[str], genomeFilePath, checkEachLine = True):
    
    for inputFilePath in inputFilePaths:

        print("\nWorking in",os.path.basename(inputFilePath))

        # Perform some checks to make sure the input is formatted correctly.
        dataGroupName = getIsolatedParentDir(inputFilePath)
        inputFileBasename = os.path.basename(inputFilePath)
        inputFileContext = getContext(inputFilePath)

        if inputFileContext is None: raise UserInputError("No context is apparent from the given prepared input file.")
        if inputFileBasename.split('_'+inputFileContext)[0] != dataGroupName:
            raise InvalidPathError(inputFilePath, 
                                   "Prepared input file is not named as expected given the data group name generated from the "
                                   "parent directory.  Expected: \"" + dataGroupName + "\" immediately preceding the context definition "
                                   "but given file path is:")
        if not inputFileBasename.endswith(DataTypeStr.mutations + ".bed"):
            raise InvalidPathError(inputFilePath,
                                   "Prepared input file is not named properly to indicate the presence of mutation data.  "
                                   "Expected a file ending in \"" + DataTypeStr.mutations + ".bed\" but given path is:")

        acceptableChromosomes = getAcceptableChromosomes(genomeFilePath)
        acceptableChromosomesFilePath = getAcceptableChromosomes(genomeFilePath, True)

        # Perform QA with the checkForErrors function
        print("Checking for errors in line formatting...")
        with open(inputFilePath, 'r') as inputFile:
            choppedUpLine = inputFile.readline().strip().split('\t')
            cohortDesignationPresent = len(choppedUpLine) == 7
            checkForErrors(choppedUpLine, cohortDesignationPresent, acceptableChromosomes,
                            acceptableChromosomesFilePath)

            if checkEachLine:
                for line in inputFile:
                    choppedUpLine = line.strip().split('\t')
                    checkForErrors(choppedUpLine, cohortDesignationPresent, acceptableChromosomes,
                                    acceptableChromosomesFilePath)

        # If everything else looks good, generate the metadata.  This directory is now ready to go!
        print("Checks passed.  Generating metadata, including mutation counts using a call to wc -l")
        generateMetadata(dataGroupName, getIsolatedParentDir(genomeFilePath), 
                         os.path.basename(inputFilePath), InputFormat.prepared,  os.path.dirname(inputFilePath))
        featureCounts = int(subprocess.check_output(("wc", "-l", inputFilePath), encoding = "UTF-8").split()[0])
        Metadata(inputFilePath).addMetadata(Metadata.AddableKeys.mutCounts, featureCounts)
def runAnalysisSuite(mutationFilePaths: List[str],
                     nucleosomeMapNames: List[str],
                     normalizationMethod,
                     customBackgroundDir,
                     useSingleNucRadius,
                     includeLinker,
                     useNucGroupRadius,
                     includeAlternativeScaling=False):

    # Make sure at least one radius was selected.
    if not useNucGroupRadius and not useSingleNucRadius:
        raise UserInputError("Must select at least one radius.")

    # Make sure at least one mutation and one nucleosome file was found.
    if len(mutationFilePaths) == 0:
        raise UserInputError("No valid input files given.")
    if len(nucleosomeMapNames) == 0:
        raise UserInputError("No valid nucleosome map files given")

    # Convert background context to int
    if normalizationMethod == "Singlenuc/Dinuc":
        normalizationMethodNum = 1
    elif normalizationMethod == "Trinuc/Quadrunuc":
        normalizationMethodNum = 3
    elif normalizationMethod == "Pentanuc/Hexanuc":
        normalizationMethodNum = 5
    elif normalizationMethod in ("No Normalization", "Custom Background"):
        normalizationMethodNum = None
    else:
        raise ValueError("Matching strings is hard.")

    # Set the linker offset
    if includeLinker: linkerOffset = 30
    else: linkerOffset = 0

    ### Ensure that every mutation file has a context sufficient for the requested background.

    # create a new list of mutation file paths, replacing any with contexts that are too low.
    if normalizationMethodNum is not None:
        print("\nExpanding file context where necessary...\n")
        updatedMutationFilePaths = list()
        for mutationFilePath in mutationFilePaths:
            mutationFileContext = getContext(mutationFilePath, True)

            # Some error checking...
            if mutationFileContext is None:
                raise InvalidPathError(
                    os.path.basename(mutationFilePath),
                    "Malformed file name.  Context is not clear for:",
                    "Are you sure the file was generated by mutperiod?")
            if mutationFileContext == 0:
                raise UserInputError(
                    "Mixed context files cannot be normalized by sequence context."
                )
            assert mutationFileContext != -1, "Wait, what?  How did you even get this context for this input file? " + os.path.basename

            if mutationFileContext < normalizationMethodNum:
                updatedMutationFilePaths += expandContext(
                    (mutationFilePath, ), normalizationMethodNum)
            else:
                updatedMutationFilePaths.append(mutationFilePath)
    else:
        updatedMutationFilePaths = mutationFilePaths

    ### Run the rest of the analysis.

    print("\nCounting mutations at each dyad position...")
    nucleosomeMutationCountsFilePaths = countNucleosomePositionMutations(
        updatedMutationFilePaths, nucleosomeMapNames, useSingleNucRadius,
        useNucGroupRadius, linkerOffset)

    if normalizationMethodNum is not None:

        print("\nGenerating genome-wide mutation background...")
        mutationBackgroundFilePaths = generateMutationBackground(
            updatedMutationFilePaths, normalizationMethodNum)

        print("\nGenerating nucleosome mutation background...")
        nucleosomeMutationBackgroundFilePaths = generateNucleosomeMutationBackground(
            mutationBackgroundFilePaths, nucleosomeMapNames,
            useSingleNucRadius, useNucGroupRadius, linkerOffset)

        print("\nNormalizing counts with nucleosome background data...")
        normalizeCounts(nucleosomeMutationBackgroundFilePaths)

    elif normalizationMethod == "Custom Background":
        print("\nNormalizing counts using custom background data...")
        normalizeCounts(list(), nucleosomeMutationCountsFilePaths,
                        customBackgroundDir, includeAlternativeScaling)
Esempio n. 3
0
def generateMutationBackground(mutationFilePaths, backgroundContextNum):

    mutationBackgroundFilePaths = list(
    )  # A list of paths to the output files generated by the function

    # A dictionary for converting context numbers to text.
    contextNumToText = {
        1: "singlenuc",
        2: "dinuc",
        3: "trinuc",
        4: "quadrunuc",
        5: "pentanuc",
        6: "hexanuc"
    }

    for mutationFilePath in mutationFilePaths:

        # Retrieve metadata
        metadata = Metadata(mutationFilePath)
        intermediateFilesDirectory = os.path.join(metadata.directory,
                                                  "intermediate_files")

        # If necessary, adjust the context for files with even-length features.
        if getContext(mutationFilePath, asInt=True) % 2 == 0:
            thisBackgroundContextNum = backgroundContextNum + 1
        else:
            thisBackgroundContextNum = backgroundContextNum

        # Set the name of the type of context being used.
        assert thisBackgroundContextNum in contextNumToText, "Unexpected background context number: " + str(
            thisBackgroundContextNum)
        contextText = contextNumToText[thisBackgroundContextNum]

        # Get the list of acceptable chromosomes
        acceptableChromosomes = getAcceptableChromosomes(
            metadata.genomeFilePath)

        print("\nWorking in:", os.path.split(mutationFilePath)[1])
        if not DataTypeStr.mutations in os.path.split(mutationFilePath)[1]:
            raise InvalidPathError(
                mutationFilePath,
                "Given mutation file does not have \"" +
                DataTypeStr.mutations + "\" in the name.",
                postPathMessage=
                "Are you sure you inputted a file from the mutperiod pipeline?"
            )

        # Generate the file path for the genome context frequency file.
        genomeContextFrequencyFilePath = generateFilePath(
            directory=os.path.dirname(metadata.genomeFilePath),
            dataGroup=metadata.genomeName,
            context=contextText,
            dataType="frequency",
            fileExtension=".tsv")

        # Generate the file path for the mutation context frequency file.
        mutationContextFrequencyFilePath = generateFilePath(
            directory=intermediateFilesDirectory,
            dataGroup=metadata.dataGroupName,
            context=contextText,
            dataType="mutation_frequencies",
            fileExtension=".tsv")

        # Generate the file path for the background mutation rate file.
        mutationBackgroundFilePath = generateFilePath(
            directory=metadata.directory,
            dataGroup=metadata.dataGroupName,
            context=contextText,
            dataType=DataTypeStr.mutBackground,
            fileExtension=".tsv")

        # If the genome context frequency file doesn't exist, create it.
        if not os.path.exists(genomeContextFrequencyFilePath):
            print("Genome", contextText,
                  "context frequency file not found at path:",
                  genomeContextFrequencyFilePath)
            print("Generating genome " + contextText +
                  " context frequency file...")
            generateGenomeContextFrequencyFile(metadata.genomeFilePath,
                                               genomeContextFrequencyFilePath,
                                               thisBackgroundContextNum,
                                               contextText,
                                               acceptableChromosomes)

        # Create a directory for intermediate files if it does not already exist...
        if not os.path.exists(intermediateFilesDirectory):
            os.mkdir(intermediateFilesDirectory)

        # Create the mutation context frequency file.
        print("Generating mutation context frequency file...")
        generateMutationContextFrequencyFile(mutationFilePath,
                                             mutationContextFrequencyFilePath,
                                             thisBackgroundContextNum,
                                             contextText,
                                             acceptableChromosomes)

        # Generate the mutation background file.
        generateMutationBackgroundFile(genomeContextFrequencyFilePath,
                                       mutationContextFrequencyFilePath,
                                       mutationBackgroundFilePath, contextText)

        mutationBackgroundFilePaths.append(mutationBackgroundFilePath)

    return mutationBackgroundFilePaths
def getFilePathGroup(potentialFilePaths, normalizationMethods: List[int],
                     singleNuc, nucGroup, acceptableMSCohorts: List[str],
                     acceptableMutSigCohorts: List[str],
                     acceptableCustomCohorts: List[str],
                     acceptableNucleosomeMaps: List[str]):

    filePathGroup = list()  # The file paths to be returned.

    for potentialFilePath in potentialFilePaths:

        potentialFileName = os.path.basename(potentialFilePath)

        # Does it satisfy the normalization methods qualification?
        # (Also ensure that we have nucleosome counts, whether raw or normalized.)
        if len(normalizationMethods) != 0:
            if DataTypeStr.rawNucCounts in potentialFileName and 0 in normalizationMethods:
                passed = True
            elif DataTypeStr.normNucCounts in potentialFileName and getContext(
                    potentialFilePath, True) in normalizationMethods:
                passed = True
            else:
                continue

        # Does it satisfy the nucleosome radius qualification?
        if singleNuc or nucGroup:
            if checkForNucGroup(potentialFilePath) and nucGroup:
                passed = True
            elif not checkForNucGroup(potentialFilePath) and singleNuc:
                passed = True
            else:
                continue

        # Does it belong to one of the acceptable cohorts in each category?
        invalidCohortGroup = False
        for acceptableCohortsGroup in (acceptableMSCohorts,
                                       acceptableMutSigCohorts,
                                       acceptableCustomCohorts):

            if len(acceptableCohortsGroup) != 0:

                filePathCohortDesignations = Metadata(
                    potentialFilePath).cohorts
                acceptableCohortFound = False
                for cohort in filePathCohortDesignations:
                    if cohort in acceptableCohortsGroup:
                        acceptableCohortFound = True
                        continue

                if not acceptableCohortFound:
                    invalidCohortGroup = True
                    continue

        if invalidCohortGroup: continue

        # Does it belong to one of the acceptable nucleosome maps given?
        if len(acceptableNucleosomeMaps) != 0:
            filePathNucleosomeMap = Metadata(potentialFilePath).nucPosName
            if not filePathNucleosomeMap in acceptableNucleosomeMaps: continue

        # If we've made it this far, add the file path to the return group!
        filePathGroup.append(potentialFilePath)

    return filePathGroup
def normalizeCounts(backgroundCountsFilePaths: List[str],
                    customRawCountsFilePaths: List[str] = list(),
                    customBackgroundCountsDir=None,
                    includeAlternativeScaling=False):

    normalizedCountsFilePaths = list()

    backgroundRawPairs = getBackgroundRawPairs(backgroundCountsFilePaths)

    # Get the background-raw pairs from the custom directories, if they were given.
    if customBackgroundCountsDir is not None:
        customBackgroundRawPairs = getCustomBackgroundRawPairs(
            customRawCountsFilePaths, customBackgroundCountsDir)
        for customBackgroundCountsFilePath in customBackgroundRawPairs:
            assert customBackgroundCountsFilePath not in backgroundRawPairs, "Unexpected intersection!"
            backgroundRawPairs[
                customBackgroundCountsFilePath] = customBackgroundRawPairs[
                    customBackgroundCountsFilePath]

    # Iterate through each background + raw counts pair
    for backgroundCountsFilePath in backgroundRawPairs:
        for rawCountsFilePath in backgroundRawPairs[backgroundCountsFilePath]:

            print("\nWorking with", os.path.basename(rawCountsFilePath), "and",
                  os.path.basename(backgroundCountsFilePath))

            metadata = Metadata(rawCountsFilePath)

            # Generate the path to the normalized file.
            if DataTypeStr.rawNucCounts in backgroundCountsFilePath:
                context = "custom_context"
            else:
                context = getContext(backgroundCountsFilePath)
            normalizedCountsFilePath = generateFilePath(
                directory=metadata.directory,
                dataGroup=metadata.dataGroupName,
                context=context,
                linkerOffset=getLinkerOffset(backgroundCountsFilePath),
                usesNucGroup=checkForNucGroup(backgroundCountsFilePath),
                dataType=DataTypeStr.normNucCounts,
                fileExtension=".tsv")

            # Prepare the arguments to the subprocess call.
            args = [
                "Rscript",
                os.path.join(rScriptsDirectory,
                             "NormalizeNucleosomeMutationCounts.R"),
                rawCountsFilePath, backgroundCountsFilePath,
                normalizedCountsFilePath
            ]

            # If alternative scaling is requested, determine the appropriate scaling factor and add it to the arguments
            if includeAlternativeScaling:

                # If we are normalizing by sequence context, just revert the automatic scaling.
                if customBackgroundCountsDir is None:
                    args.append(1)

                    # If we are normalizing by a custom context, scale based on the relative sizes of the parent background and raw data sets.
                else:
                    args.append(
                        str(
                            getParentDataFeatureCounts(
                                backgroundCountsFilePath) /
                            getParentDataFeatureCounts(rawCountsFilePath)))

            # Pass the file paths to the R script to generate the normalized counts file.
            print("Calling R script to generate normalized counts...")
            subprocess.run(args, check=True)

            normalizedCountsFilePaths.append(normalizedCountsFilePath)

    # Document where the custom background counts came from in each relevant directory.
    if customBackgroundCountsDir is not None:
        for customRawCountsDir in set([
                os.path.dirname(customRawCountsFilePath)
                for customRawCountsFilePath in customRawCountsFilePaths
        ]):
            metadata = Metadata(customRawCountsDir)
            customBackgroundInfoFilePath = generateFilePath(
                directory=metadata.directory,
                dataGroup=metadata.dataGroupName,
                dataType=DataTypeStr.customBackgroundInfo,
                fileExtension=".txt")
            with open(customBackgroundInfoFilePath,
                      'w') as customBackgroundInfoFile:
                customBackgroundInfoFile.write(
                    "Custom background directory: " +
                    customBackgroundCountsDir + '\n')
                customBackgroundInfoFile.write(
                    "Last date used: " +
                    str(datetime.datetime.now()).rsplit(':', 1)[0] + '\n')

    return normalizedCountsFilePaths
def generateNucleosomeMutationBackground(mutationBackgroundFilePaths,
                                         nucleosomeMapNames,
                                         useSingleNucRadius, useNucGroupRadius,
                                         linkerOffset):

    if not (useSingleNucRadius or useNucGroupRadius):
        raise UserInputError(
            "Must generate background in either a single nucleosome or group nucleosome radius."
        )

    nucleosomeMutationBackgroundFilePaths = list(
    )  # A list of paths to the output files generated by the function

    # Loop through each given mutation background file path, creating the corresponding nucleosome mutation background(s) for each.
    for mutationBackgroundFilePath in mutationBackgroundFilePaths:

        print("\nWorking with", os.path.basename(mutationBackgroundFilePath))
        if not DataTypeStr.mutBackground in os.path.basename(
                mutationBackgroundFilePath):
            raise InvalidPathError(
                mutationBackgroundFilePath,
                "Given file path does not have \"" +
                DataTypeStr.mutBackground + "\" in the file name.")

        for nucleosomeMapName in nucleosomeMapNames:

            print("Counting with nucleosome map:", nucleosomeMapName)

            # Get metadata (Assumes that the metadata has already been generated from a call to countNucleosomePositionMutations)
            metadata = Metadata(
                os.path.join(os.path.dirname(mutationBackgroundFilePath),
                             nucleosomeMapName))

            # Determine the context of the mutation background file
            contextNum = getContext(mutationBackgroundFilePath, asInt=True)
            contextText = getContext(mutationBackgroundFilePath)
            print("Given mutation background is in", contextText, "context.")

            # To avoid copy pasting this code, here is a simple function to change how the background file is generated
            # based on the desired dyad radius.
            def generateBackgroundBasedOnRadius(usesNucGroup):

                # Set the dyad radius (And linker offset)
                if usesNucGroup:
                    dyadRadius = 1000
                    currentLinkerOffset = 0
                else:
                    dyadRadius = 73
                    currentLinkerOffset = linkerOffset

                # Generate the path to the tsv file of dyad position context counts
                dyadPosContextCountsFilePath = generateFilePath(
                    directory=os.path.dirname(metadata.baseNucPosFilePath),
                    dataGroup=metadata.nucPosName,
                    context=contextText,
                    linkerOffset=currentLinkerOffset,
                    usesNucGroup=usesNucGroup,
                    dataType="dyad_pos_counts",
                    fileExtension=".tsv")

                # Make sure we have a tsv file with the appropriate context counts at each dyad position.
                if not os.path.exists(dyadPosContextCountsFilePath):
                    print(
                        "Dyad position " + contextText +
                        " counts file not found at",
                        dyadPosContextCountsFilePath)
                    print("Generating genome wide dyad position " +
                          contextText + " counts file...")
                    # Make sure we have a fasta file for strongly positioned nucleosome coordinates
                    nucPosFastaFilePath = generateNucleosomeFasta(
                        metadata.baseNucPosFilePath, metadata.genomeFilePath,
                        dyadRadius, currentLinkerOffset)
                    generateDyadPosContextCounts(nucPosFastaFilePath,
                                                 dyadPosContextCountsFilePath,
                                                 contextNum, dyadRadius,
                                                 currentLinkerOffset)

                # A path to the final output file.
                nucleosomeMutationBackgroundFilePath = generateFilePath(
                    directory=metadata.directory,
                    dataGroup=metadata.dataGroupName,
                    context=contextText,
                    linkerOffset=currentLinkerOffset,
                    usesNucGroup=usesNucGroup,
                    dataType=DataTypeStr.nucMutBackground,
                    fileExtension=".tsv")

                # Generate the nucleosome mutation background file!
                generateNucleosomeMutationBackgroundFile(
                    dyadPosContextCountsFilePath, mutationBackgroundFilePath,
                    nucleosomeMutationBackgroundFilePath, dyadRadius,
                    currentLinkerOffset)

                nucleosomeMutationBackgroundFilePaths.append(
                    nucleosomeMutationBackgroundFilePath)

            if useSingleNucRadius:
                generateBackgroundBasedOnRadius(False)
            if useNucGroupRadius:
                generateBackgroundBasedOnRadius(True)

    return nucleosomeMutationBackgroundFilePaths
def generateNucleosomeMutationBackgroundFile(
        dyadPosContextCountsFilePath, mutationBackgroundFilePath,
        nucleosomeMutationBackgroundFilePath, dyadRadius, linkerOffset):

    # Dictionaries of expected mutations for every dyad position included in the analysis, one for each strand.
    plusStrandNucleosomeMutationBackground = dict()
    minusStrandNucleosomeMutationBackground = dict()

    # This is a bit weird.  If the context number is even, we need to account for half positions,
    # but if the context number is odd, we need to keep in mind that there's one extra valid position in the dyad range.
    if getContext(mutationBackgroundFilePath, asInt=True) % 2 == 0:
        halfBaseOffset = 0.5
        extraDyadPos = 0
    else:
        halfBaseOffset = 0
        extraDyadPos = 1

    # Initialize the dictionary
    for i in range(-dyadRadius - linkerOffset,
                   dyadRadius + linkerOffset + extraDyadPos):
        dyadPos = i + halfBaseOffset
        plusStrandNucleosomeMutationBackground[dyadPos] = 0
        minusStrandNucleosomeMutationBackground[dyadPos] = 0

    # Get the corresponding mutation background and context counts dictionaries.
    backgroundMutationRate = getGenomeBackgroundMutationRates(
        mutationBackgroundFilePath)
    dyadPosContextCounts = getDyadPosContextCounts(
        dyadPosContextCountsFilePath)

    # Calculate the expected mutation rates for each dyad position based on the context counts at that position and that context's mutation rate
    for dyadPos in dyadPosContextCounts:

        for context in dyadPosContextCounts[dyadPos]:

            reverseContext = reverseCompliment(context)

            # Add the context's mutation rate to the running total in the background dictionaries.
            plusStrandNucleosomeMutationBackground[
                dyadPos] += backgroundMutationRate[
                    context] * dyadPosContextCounts[dyadPos][context]
            minusStrandNucleosomeMutationBackground[
                dyadPos] += backgroundMutationRate[
                    reverseContext] * dyadPosContextCounts[dyadPos][context]

    # Write the results of the dictionary to the nucleosome mutation background file.
    with open(nucleosomeMutationBackgroundFilePath,
              'w') as nucleosomeMutationBackgroundFile:

        # Write the headers for the data.
        headers = '\t'.join(("Dyad_Position", "Expected_Mutations_Plus_Strand",
                             "Expected_Mutations_Minus_Strand",
                             "Expected_Mutations_Both_Strands",
                             "Expected_Mutations_Aligned_Strands"))

        nucleosomeMutationBackgroundFile.write(headers + '\n')

        # Write the data for each dyad position.
        for i in range(-dyadRadius - linkerOffset,
                       dyadRadius + linkerOffset + extraDyadPos):

            dyadPos = i + halfBaseOffset
            dataRow = '\t'.join(
                (str(dyadPos),
                 str(plusStrandNucleosomeMutationBackground[dyadPos]),
                 str(minusStrandNucleosomeMutationBackground[dyadPos]),
                 str(plusStrandNucleosomeMutationBackground[dyadPos] +
                     minusStrandNucleosomeMutationBackground[dyadPos]),
                 str(plusStrandNucleosomeMutationBackground[dyadPos] +
                     minusStrandNucleosomeMutationBackground[-dyadPos])))

            nucleosomeMutationBackgroundFile.write(dataRow + '\n')
Esempio n. 8
0
def expandContext(inputBedFilePaths, expansionContextNum):

    assert expansionContextNum in (
        3, 5), "Unexpected expansion context: " + str(expansionContextNum)

    expandedContextFilePaths = list(
    )  # A list of paths to the output files generated by the function

    for inputBedFilePath in inputBedFilePaths:

        # Retrieve metadata
        metadata = Metadata(inputBedFilePath)

        # If necessary, adjust the context for files with even-length features.
        if getContext(inputBedFilePath, asInt=True) % 2 == 0:
            thisExpansionContextNum = expansionContextNum + 1
        else:
            thisExpansionContextNum = expansionContextNum

        # Make sure file names look valid.
        print("\nWorking in:", os.path.split(inputBedFilePath)[1])
        if not DataTypeStr.mutations in os.path.split(inputBedFilePath)[1]:
            raise InvalidPathError(
                inputBedFilePath,
                "Given mutation file does not have \"" +
                DataTypeStr.mutations + "\" in the name.",
                postPathMessage=
                "Are you sure you inputted a file from the mutperiod pipeline?"
            )

        # Make sure the context of the input bed file is less than the expansion context.
        if getContext(inputBedFilePath, asInt=True) >= thisExpansionContextNum:
            raise InvalidPathError(
                inputBedFilePath,
                "The given mutation file at does not have a lower context "
                "than the desired output context.",
                postPathMessage="There is nothing to expand.")

        # Generate paths to intermediate data files.
        intermediateFilesDirectory = os.path.join(metadata.directory,
                                                  "intermediate_files")

        bedExpansionFilePath = generateFilePath(
            directory=intermediateFilesDirectory,
            dataGroup=metadata.dataGroupName,
            dataType="intermediate_expansion",
            fileExtension=".bed")

        fastaReadsFilePath = generateFilePath(
            directory=intermediateFilesDirectory,
            dataGroup=metadata.dataGroupName,
            dataType="expanded_reads",
            fileExtension=".fa")

        # Generate a path to the final output file.
        expandedContextFilePath = generateFilePath(
            directory=metadata.directory,
            dataGroup=metadata.dataGroupName,
            context=thisExpansionContextNum,
            dataType=DataTypeStr.mutations,
            fileExtension=".bed")

        # Create a directory for intermediate files if it does not already exist...
        if not os.path.exists(intermediateFilesDirectory):
            os.mkdir(os.path.join(intermediateFilesDirectory))

        # Expand the nucleotide coordinates in the singlenuc context bed file as requested.
        expandBedPositions(inputBedFilePath, bedExpansionFilePath,
                           thisExpansionContextNum)

        # Convert the expanded coordinates in the bed file to the referenced nucleotides in fasta format.
        bedToFasta(bedExpansionFilePath, metadata.genomeFilePath,
                   fastaReadsFilePath)

        # Using the newly generated fasta file, create a new bed file with the expanded context.
        generateExpandedContext(inputBedFilePath, fastaReadsFilePath,
                                expandedContextFilePath,
                                thisExpansionContextNum)

        expandedContextFilePaths.append(expandedContextFilePath)

        # Delete the input file, which has the same mutation information, but a smaller context.
        print("Deleting old mutation context file...")
        os.remove(inputBedFilePath)

    return expandedContextFilePaths