Esempio n. 1
0
def getDataDirectory():

    # Check for the text file which should contain the path to the data directory.
    dataDirectoryTextFilePath = os.path.join(os.getenv("HOME"), ".mutperiod",
                                             "data_dir.txt")

    # If it exists, return the directory path within.
    if os.path.exists(dataDirectoryTextFilePath):
        with open(dataDirectoryTextFilePath, 'r') as dataDirectoryTextFile:

            dataDirectory = dataDirectoryTextFile.readline().strip()

            # Double check to make sure the data directory is still intact.
            # If it isn't, inform the user, and progress through the function to recreate it.
            if not os.path.exists(dataDirectory):
                print(
                    "Data directory not found at expected location: {}".format(
                        dataDirectory))
                print(
                    "Please select a new location to create a data directory.")
            else:
                return dataDirectory

    else:

        # Create a simple dialog to select a new data directory location.
        from benbiohelpers.TkWrappers.TkinterDialog import TkinterDialog, Selections
        checkDirs(os.path.dirname(dataDirectoryTextFilePath))
        dialog = TkinterDialog(
            workingDirectory=os.path.dirname(dataDirectoryTextFilePath))
        dialog.createFileSelector("Location to create new data directory:",
                                  0, ("Fasta Files", ".fa"),
                                  directory=True)

        # Run the UI
        dialog.mainloop()

        # If no input was received (i.e. the UI was terminated prematurely), then quit!
        if dialog.selections is None: quit()

        selections: Selections = dialog.selections
        dataDirectoryDirectory = selections.getIndividualFilePaths()[0]

        # Make sure a valid, writeable directory was given.  Then create the new directory (if it doesn't exist already),
        # write it to the text file, and return it!  (Also create the __external_data directory.)
        if not os.path.exists(dataDirectoryDirectory):
            raise UserInputError("Given directory: " + dataDirectoryDirectory +
                                 " does not exist.")

        dataDirectory = os.path.join(dataDirectoryDirectory, "mutperiod_data")
        try:
            checkDirs(dataDirectory)
            checkDirs(os.path.join(dataDirectory, "__external_data"))
        except IOError:
            raise InvalidPathError(
                dataDirectoryDirectory,
                "Given location for data directory is not writeable:")
        with open(dataDirectoryTextFilePath, 'w') as dataDirectoryTextFile:
            dataDirectoryTextFile.write(dataDirectory + '\n')
        return dataDirectory
Esempio n. 2
0
def main():

    # Create the Tkinter dialog.
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector(
        "Bed Mutation File:", 0, "singlenuc_" + DataTypeStr.mutations + ".bed",
        ("Bed Files", ".bed"))
    dialog.createDropdown("Expansion Context", 1, 0,
                          ("Trinuc/Quadrunuc", "Pentanuc/Hexanuc"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    inputBedFilePaths = list(selections.getFilePathGroups())[
        0]  # A list of paths to original bed mutation files
    expansionContext = list(selections.getDropdownSelections())[
        0]  # What context the file should be expanded to.

    if expansionContext == "Trinuc/Quadrunuc":
        expansionContextNum = 3
    elif expansionContext == "Pentanuc/Hexanuc":
        expansionContextNum = 5
    else:
        raise ValueError("Matching strings is hard.")

    expandContext(inputBedFilePaths, expansionContextNum)
Esempio n. 3
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Custom bed Input Files:", 0,
                                      "custom_input.bed",
                                      ("bed files", ".bed"))
    dialog.createFileSelector("Genome Fasta File:", 1, ("Fasta Files", ".fa"))
    dialog.createCheckbox("Stratify data by microsatellite stability?", 2, 0)
    dialog.createCheckbox("Stratify by mutation signature?", 2, 1)
    dialog.createCheckbox("Separate individual cohorts?", 3, 0)
    dialog.createCheckbox("Only use single nucleotide substitutions?", 4, 0)
    dialog.createCheckbox("Include indels in output?", 4, 1)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    bedInputFilePaths = selections.getFilePathGroups()[0]
    genomeFilePath = selections.getIndividualFilePaths()[0]
    stratifyByMS = selections.getToggleStates()[0]
    stratifyByMutSig = selections.getToggleStates()[1]
    separateIndividualCohorts = selections.getToggleStates()[2]
    onlySingleBaseSubs = selections.getToggleStates()[3]
    includeIndels = selections.getToggleStates()[4]

    parseCustomBed(bedInputFilePaths, genomeFilePath, stratifyByMS,
                   stratifyByMutSig, separateIndividualCohorts,
                   onlySingleBaseSubs, includeIndels)
Esempio n. 4
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Bed Mutation Files:", 0,
                                      DataTypeStr.mutations + ".bed",
                                      ("Bed Files", ".bed"))
    dialog.createDropdown(
        "Background Context", 1, 0,
        ("Singlenuc/Dinuc", "Trinuc/Quadrunuc", "Pentanuc/Hexanuc"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    mutationFilePaths = list(selections.getFilePathGroups())[
        0]  # A list of paths to the bed mutation files
    backgroundContext: str = list(selections.getDropdownSelections())[
        0]  # What context should be used to generate the background.

    # Convert background context to int
    if backgroundContext == "Singlenuc/Dinuc":
        backgroundContextNum = 1
    elif backgroundContext == "Trinuc/Quadrunuc":
        backgroundContextNum = 3
    elif backgroundContext == "Pentanuc/Hexanuc":
        backgroundContextNum = 5
    else:
        raise ValueError("Matching strings is hard.")

    generateMutationBackground(mutationFilePaths, backgroundContextNum)
Esempio n. 5
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("ICGC Mutation Files:", 0, ".tsv.gz",
                                      ("gzip files", ".gz"))
    dialog.createFileSelector("Genome Fasta File:", 1, ("Fasta Files", ".fa"))
    dialog.createCheckbox("Create individual bed files for each donor.", 2, 0)
    dialog.createCheckbox("Stratify results by microsatellite stability", 3, 0)
    dialog.createCheckbox("Stratify results by mutation signature", 4, 0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    ICGCFilePaths = list(selections.getFilePathGroups())[
        0]  # A list of ICGC mutation file paths
    genomeFilePath = list(selections.getIndividualFilePaths())[0]
    separateDonors = list(selections.getToggleStates())[0]
    stratifyByMS = list(selections.getToggleStates())[1]
    stratifyByMutSig = list(selections.getToggleStates())[2]

    parseICGC(ICGCFilePaths, genomeFilePath, separateDonors, stratifyByMS,
              stratifyByMutSig)
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Feature Files (e.g. mutations):", 0,
                                      "bin_me.bed", ("Bed Files", ".bed"))
    dialog.createFileSelector("Gene Designations:", 1, ("Bed Files", ".bed"))
    dialog.createCheckbox("Color Domain is present in 7th (index=6) column", 2,
                          0)

    flankDialog = dialog.createDynamicSelector(3, 0)
    flankDialog.initCheckboxController(
        "Gene designations include flanking regions")
    flankSizeDialog = flankDialog.initDisplay(True, "FlankSize")
    flankSizeDialog.createTextField("Flanking bin size:",
                                    0,
                                    0,
                                    defaultText="0")
    flankSizeDialog.createTextField("Flanking bin number:",
                                    1,
                                    0,
                                    defaultText="0")
    flankDialog.initDisplayState()

    fileSuffixDialog = dialog.createDynamicSelector(4, 0)
    fileSuffixDialog.initCheckboxController("Custom file suffix")
    suffixDialog = fileSuffixDialog.initDisplay(True, "Suffix")
    suffixDialog.createTextField("File Suffix:", 0, 0, defaultText="all_genes")
    fileSuffixDialog.initDisplayState()

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    if dialog.selections.getToggleStates()[0]: colorColIndex = 6
    else: colorColIndex = None

    if flankDialog.getControllerVar():
        flankBinSize = int(dialog.selections.getTextEntries("FlankSize")[0])
        flankBinNum = int(dialog.selections.getTextEntries("FlankSize")[1])
    else:
        flankBinSize = 0
        flankBinNum = 0

    if fileSuffixDialog.getControllerVar():
        fileSuffix = dialog.selections.getTextEntries("Suffix")[0]
    else:
        fileSuffix = ""

    binInGenes(dialog.selections.getFilePathGroups()[0],
               dialog.selections.getIndividualFilePaths()[0], flankBinSize,
               flankBinNum, fileSuffix, colorColIndex)
def main():

    # Create a simple dialog for selecting the gene designation files.
    dialog = TkinterDialog(workingDirectory=os.path.dirname(__file__))
    dialog.createFileSelector("Bed File:", 0, ("bed file", ".bed"))

    dialog.mainloop()

    if dialog.selections is None: quit()

    expandToBothStrands(dialog.selections.getIndividualFilePaths()[0], )
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Mutation Background Files:", 0,
                                      DataTypeStr.mutBackground + ".tsv",
                                      ("Tab Seperated Values Files", ".tsv"))
    dialog.createMultipleFileSelector("Nucleosome Map Files:", 1,
                                      "nucleosome_map.bed",
                                      ("Bed Files", ".bed"))

    selectSingleNuc = dialog.createDynamicSelector(2, 0)
    selectSingleNuc.initCheckboxController(
        "Generate background with a single nucleosome radius (73 bp)")
    linkerSelectionDialog = selectSingleNuc.initDisplay(1, "singleNuc")
    linkerSelectionDialog.createCheckbox(
        "Include 30 bp linker DNA on either side of single nucleosome radius.",
        0, 0)
    selectSingleNuc.initDisplay(0)
    selectSingleNuc.initDisplayState()

    dialog.createCheckbox(
        "Generate background with a nucleosome group radius (1000 bp)", 3, 0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    mutationBackgroundFilePaths = selections.getFilePathGroups()[
        0]  # A list of mutation file paths
    nucleosomeMapNames = [
        getIsolatedParentDir(nucleosomeMapFile)
        for nucleosomeMapFile in selections.getFilePathGroups()[1]
    ]
    if selectSingleNuc.getControllerVar():
        useSingleNucRadius = True
        includeLinker = selections.getToggleStates("singleNuc")[0]
    else:
        useSingleNucRadius = False
        includeLinker = False
    useNucGroupRadius = selections.getToggleStates()[0]

    if includeLinker: linkerOffset = 30
    else: linkerOffset = 0

    generateNucleosomeMutationBackground(mutationBackgroundFilePaths,
                                         nucleosomeMapNames,
                                         useSingleNucRadius, useNucGroupRadius,
                                         linkerOffset)
Esempio n. 9
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=os.path.dirname(__file__))
    dialog.createFileSelector("Spivakov File:", 0, ("Text File", ".txt"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    parseSpivakovToBed(dialog.selections.getIndividualFilePaths()[0])
Esempio n. 10
0
def main():

    dialog = TkinterDialog(workingDirectory=os.path.dirname(__file__))
    dialog.createFileSelector("Colored Gene Designations:", 0, ("Bed File",".bed"))
    dialog.createFileSelector("Colorless Gene Data (e.g. RPKM)", 1, ("Tab separated files",".tsv"))

    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    assignToDomainByGene(dialog.selections.getIndividualFilePaths()[0],
                         dialog.selections.getIndividualFilePaths()[1])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector(
        "Background Nucleosome Mutation Counts Files:", 0,
        DataTypeStr.nucMutBackground + ".tsv",
        ("Tab Seperated Values Files", ".tsv"))

    customBackgroundSelector = dialog.createDynamicSelector(1, 0)
    customBackgroundSelector.initCheckboxController(
        "Include files with another data set as custom background.")
    customBackgroundFileSelector = customBackgroundSelector.initDisplay(
        True, "customBackground")
    customBackgroundFileSelector.createMultipleFileSelector(
        "Raw nucleosome counts to be normalized", 0,
        DataTypeStr.rawNucCounts + ".tsv", ("TSV Files", ".tsv"))
    customBackgroundFileSelector.createFileSelector(
        "Data Directory for nucleosome counts to be used as background",
        1,
        directory=True)
    customBackgroundSelector.initDisplayState()

    dialog.createCheckbox(
        "Include alternative scaling factor indepedent of nucleosome map.", 2,
        0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    backgroundCountsFilePaths = selections.getFilePathGroups()[
        0]  # A list of background mutation counts file paths

    if customBackgroundSelector.getControllerVar():
        customRawCountsFilePaths = selections.getFilePathGroups(
            "customBackground")[0]
        customBackgroundCountsDir = selections.getIndividualFilePaths(
            "customBackground")[0]
    else:
        customRawCountsFilePaths = None
        customBackgroundCountsDir = None

    includeAlternativeScaling = selections.getToggleStates()[0]

    normalizeCounts(backgroundCountsFilePaths, customRawCountsFilePaths,
                    customBackgroundCountsDir, includeAlternativeScaling)
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createFileSelector("RNAseq File:", 0, ("Bed Files", ".bed"))
    dialog.createFileSelector("Gene Designations (color in 7th column):", 1,
                              ("Bed Files", ".bed"))

    # Run the UI
    dialog.mainloop()

    binRNASeqByChromatinDomainInGenes(
        dialog.selections.getIndividualFilePaths()[0],
        dialog.selections.getIndividualFilePaths()[1], 6)
Esempio n. 13
0
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Bed Files:",0,"I_have_bad_chromosomes.bed",("Bed Files",".bed"))    
    dialog.createFileSelector("Genome Fasta File:",1,("Fasta Files",".fa"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    removeUnacceptableChromosomes(dialog.selections.getFilePathGroups()[0], dialog.selections.getIndividualFilePaths()[0])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Prepared Input Files:",0,DataTypeStr.mutations + ".bed",("bed files",".bed"))
    dialog.createFileSelector("Genome Fasta File:",1,("Fasta Files",".fa"))
    dialog.createCheckbox("Skip formatting checks for all but the first line",2,0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    parsePreparedInput(dialog.selections.getFilePathGroups()[0], dialog.selections.getIndividualFilePaths()[0],
                       not dialog.selections.getToggleStates()[0])
Esempio n. 15
0
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createFileSelector("Encompassing Feature File:", 0,
                              ("Bed Files", ".bed"))
    dialog.createFileSelector("Nucleosome Dyad Center Positions:", 1,
                              ("Bed Files", ".bed"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    stratifyNucleosomesByEncompassment(
        dialog.selections.getIndividualFilePaths()[0],
        dialog.selections.getIndividualFilePaths()[1])
Esempio n. 16
0
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Genome Feature Positions Files:", 0,
                                      "context_mutations.bed",
                                      ("Bed Files", ".bed"))
    dialog.createFileSelector("Gene Ranges File (merged):", 1,
                              ("Bed Files", ".bed"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    splitGenicAndIntergenic(dialog.selections.getFilePathGroups()[0],
                            dialog.selections.getIndividualFilePaths()[0])
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createFileSelector("Bed Mutation Data:", 0, ("Bed Files", ".bed"))
    dialog.createFileSelector("TFBS Positions:", 1, ("Bed Files", ".bed"))
    dialog.createFileSelector("Output File:",
                              2, ("TSV Files", ".tsv"),
                              newFile=True)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    recordMutationsInTFBSs(dialog.selections.getIndividualFilePaths()[0],
                           dialog.selections.getIndividualFilePaths()[1],
                           dialog.selections.getIndividualFilePaths()[2])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Mutation Files:", 0,
                                      DataTypeStr.mutations + ".bed",
                                      ("Bed Files", ".bed"))
    dialog.createMultipleFileSelector("Binding Motifs Files:", 1,
                                      "binding_motifs.bed",
                                      ("Bed Files", ".bed"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    countInBindingMotifs(dialog.selections.getFilePathGroups()[0],
                         dialog.selections.getFilePathGroups()[1])
Esempio n. 19
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Tab-Separated Nucleosome Counts Files:",0,
                                      DataTypeStr.generalNucCounts + ".tsv",("tsv files",".tsv"))
    dialog.createMultipleFileSelector("R Nucleosome Mutation Analysis Files:",1,
                                      ".rda",("rda files",".rda"))

    fileNumSelector = dialog.createDynamicSelector(2,0)
    fileNumSelector.initCheckboxController("Export to one file (as opposed to one file for each graph)")
    oneFileDialog = fileNumSelector.initDisplay(1, "oneFile")
    oneFileDialog.createFileSelector("Export File", 0, ("pdf file",".pdf"), newFile = True)
    manyFilesDialog = fileNumSelector.initDisplay(0, "manyFiles")
    manyFilesDialog.createFileSelector("Export Directory", 0, directory = True)
    fileNumSelector.initDisplayState()

    dialog.createCheckbox("Omit Outliers", 3, 0)
    dialog.createCheckbox("Smooth Nuc Group results", 3, 1)
    dialog.createCheckbox("Strand align results", 4, 0)
    #dialog.createCheckbox("Use normalized values from rda input", 5, 0)
    #dialog.createCheckbox("Use raw values from rda input", 5, 1)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections = dialog.selections
    tsvFilePaths = selections.getFilePathGroups()[0]
    rdaFilePaths = selections.getFilePathGroups()[1]
    if fileNumSelector.getControllerVar(): exportPath = selections.getIndividualFilePaths("oneFile")[0]
    else: exportPath = selections.getIndividualFilePaths("manyFiles")[0]

    omitOutliers = bool(selections.getToggleStates()[0])
    smoothNucGroup = bool(selections.getToggleStates()[1])
    strandAlign = bool(selections.getToggleStates()[2])

    generateFigures(tsvFilePaths, rdaFilePaths, exportPath, omitOutliers, smoothNucGroup, 
                    strandAlign)
Esempio n. 20
0
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createFileSelector("Data Set Directory:", 0, directory=True)

    # TODO: Maybe allow for different cohort selections here.  Right now, I'm assuming that we group MS data by mut sigs.
    #       This will probably work best if it communicates with the project manager to some capacity (what cohort data is available?)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    dataSetDirectory = selections.getFilePaths()[0]

    groupCohorts(dataSetDirectory)
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("CPD Files:", 0, "CPD_data.bed",
                                      ("Bed Files", ".bed"))
    dialog.createMultipleFileSelector("Deamination Files:", 1,
                                      "deamination_data.bed",
                                      ("Bed Files", ".bed"))
    dialog.createFileSelector("Genome Fasta File:", 2, ("Fasta File", ".fa"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    parseDeaminationData(dialog.selections.getFilePathGroups()[0],
                         dialog.selections.getFilePathGroups()[1],
                         dialog.selections.getIndividualFilePaths()[0])
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createFileSelector("Original Nucleosome Map Directory:",
                              0,
                              directory=True)
    dialog.createMultipleFileSelector("Stratifying Feature Ranges:", 1,
                                      "stratifying_feature_ranges.narrowPeak",
                                      ("Bed Files", ".bed"),
                                      ("Narrow Peak File", ".narrowPeak"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    stratifyNucleosomeMap(dialog.selections.getIndividualFilePaths()[0],
                          dialog.selections.getFilePathGroups()[0])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=os.path.dirname(__file__))
    dialog.createMultipleFileSelector("Genome Feature Files:", 0,
                                      DataTypeStr.mutations + ".bed",
                                      ("Bed Files", ".bed"))
    dialog.createFileSelector("Chromosome Sizes File:", 1,
                              ("Text File", ".txt"))
    dialog.createDropdown("Bin Size (bp):", 2, 0,
                          ("1000", "10000", "100000", "1000000"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    binAcrossGenome(dialog.selections.getFilePathGroups()[0],
                    dialog.selections.getIndividualFilePaths()[0],
                    int(dialog.selections.getDropdownSelections()[0]))
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Quartile Files:", 0, "quartile.tsv",
                                      ("Tab Separated Files", ".tsv"))
    dialog.createFileSelector("Nucleosome Directory:", 1, directory=True)
    dialog.createDropdown("Stratification Type", 2, 0, ["h1 density", "other"])
    dialog.createCheckbox("Sloppy Copy?", 3, 0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    getQuartileNucleosomePositions(
        dialog.selections.getFilePathGroups()[0],
        dialog.selections.getIndividualFilePaths()[0],
        dialog.selections.getDropdownSelections()[0].replace(' ', '_'),
        dialog.selections.getToggleStates()[0])
def main():

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Genome Feature Positions Files:", 0,
                                      "context_mutations.bed",
                                      ("Bed Files", ".bed"))
    dialog.createFileSelector("Nucleosome Dyad Center Positions:", 1,
                              ("Bed Files", ".bed"))
    dialog.createCheckbox("Only Count Linker", 2, 0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    countFeaturesAboutNucleosomes(
        dialog.selections.getFilePathGroups()[0],
        dialog.selections.getIndividualFilePaths()[0],
        dialog.selections.getToggleStates()[0])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=os.path.dirname(__file__))
    dialog.createFileSelector("Chromatin Domains File:", 0,
                              ("Bed Files", ".bed"))

    binnerTypeDS = dialog.createDynamicSelector(1, 0)
    binnerTypeDS.initDropdownController("Bins are...",
                                        ("Regular", "Specific Ranges"))
    regularBinsDialog = binnerTypeDS.initDisplay("Regular", "Regular")
    specificRangeBinsDialog = binnerTypeDS.initDisplay("Specific Ranges",
                                                       "Specific Ranges")

    regularBinsDialog.createFileSelector("Chromosome Sizes File:", 0,
                                         ("Text File", ".txt"))
    regularBinsDialog.createDropdown("Bin Size (bp):", 1, 0,
                                     ("1000", "10000", "100000", "1000000"))

    specificRangeBinsDialog.createFileSelector("Ranges to bin:", 0,
                                               ("Bed File", ".bed"))

    binnerTypeDS.initDisplayState()

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    if binnerTypeDS.getControllerVar() == "Regular":
        determineRegularBinColors(
            dialog.selections.getIndividualFilePaths()[0],
            dialog.selections.getIndividualFilePaths("Regular")[0],
            int(dialog.selections.getDropdownSelections("Regular")[0]))
    elif binnerTypeDS.getControllerVar() == "Specific Ranges":
        determineSpecifiedBinColors(
            dialog.selections.getIndividualFilePaths()[0],
            dialog.selections.getIndividualFilePaths("Specific Ranges")[0])
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Mutation Files:", 0,
                                      DataTypeStr.mutations + ".bed",
                                      ("Bed Files", ".bed"))
    dialog.createFileSelector("Domain Range File:", 1, ("Bed File", ".bed"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    mutationFilePaths = selections.getFilePathGroups()[
        0]  # A list of mutation file paths
    domainRangesFilePath = selections.getIndividualFilePaths()[
        0]  # The gene positions file path

    separateByChromatinRegions(mutationFilePaths, domainRangesFilePath)
def main():

    # Create the Tkinter dialog.
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("Bed Mutation Files:", 0,
                                      DataTypeStr.mutations + ".bed",
                                      ("Bed Files", ".bed"))

    dialog.createMultipleFileSelector("Nucleosome Map Files", 1,
                                      "nucleosome_map.bed",
                                      ("Bed Files", ".bed"))

    normalizationSelector = dialog.createDynamicSelector(2, 0)
    normalizationSelector.initDropdownController(
        "Normalization Method",
        ("No Normalization", "Singlenuc/Dinuc", "Trinuc/Quadrunuc",
         "Pentanuc/Hexanuc", "Custom Background"))
    customBackgroundFileSelector = normalizationSelector.initDisplay(
        "Custom Background", "customBackground")
    customBackgroundFileSelector.createFileSelector(
        "Custom Background Directory:",
        0, ("Bed Files", ".bed"),
        directory=True)
    customBackgroundFileSelector.createCheckbox("Generate Background now", 1,
                                                0)
    normalizationSelector.initDisplayState()

    dialog.createCheckbox(
        "Include alternative scaling factor indepedent of nucleosome map.", 3,
        0)
    dialog.createLabel('', 4, 0)

    selectNucleosomeDyadRadius = dialog.createDynamicSelector(5, 0)
    selectNucleosomeDyadRadius.initCheckboxController(
        "Run analysis with a single nucleosome dyad radius (73 bp)")
    linkerSelectionDialog = selectNucleosomeDyadRadius.initDisplay(
        1, "singleNuc")
    linkerSelectionDialog.createCheckbox(
        "Include 30 bp linker DNA on either side of single nucleosome dyad radius.",
        0, 0)
    selectNucleosomeDyadRadius.initDisplayState()

    dialog.createCheckbox("Count with a nucleosome group radius (1000 bp)", 6,
                          0)

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    mutationFilePaths = selections.getFilePathGroups()[
        0]  # A list of paths to bed mutation files
    nucleosomeMapNames = [
        getIsolatedParentDir(nucleosomeMapFile)
        for nucleosomeMapFile in selections.getFilePathGroups()[1]
    ]

    normalizationMethod = normalizationSelector.getControllerVar(
    )  # The normalization method to be used.
    if normalizationMethod == "Custom Background":
        customBackgroundDir = selections.getFilePaths("customBackground")[
            0]  # Where to find raw counts files to use as custom background
        generateCustomBackgroundNow = selections.getToggleStates(
            "customBackground"
        )[0]  # Whether or not to generate the custom background counts on the fly
    else:
        customBackgroundDir = None
        generateCustomBackgroundNow = False

    useSingleNucRadius = selectNucleosomeDyadRadius.getControllerVar(
    )  # Whether or not to generate data with a 73 bp single nuc dyad radius
    if useSingleNucRadius:
        includeLinker = selections.getToggleStates(
            "singleNuc"
        )[0]  # Whether or not to include 30 bp linker DNA in nucleosome dyad positions
    else:
        includeLinker = False
    useNucGroupRadius = selections.getToggleStates(
    )[1]  # Whether or not to generate data with a 1000 bp nuc group dyad radius
    includeAlternativeScaling = selections.getToggleStates()[
        0]  # Whether or not to include scaling independent of nucleosome map

    # If requested, generate the background counts file(s).
    if generateCustomBackgroundNow:
        generateCustomBackground(customBackgroundDir, nucleosomeMapNames,
                                 useSingleNucRadius, includeLinker,
                                 useNucGroupRadius)

    runAnalysisSuite(mutationFilePaths, nucleosomeMapNames,
                     normalizationMethod, customBackgroundDir,
                     useSingleNucRadius, includeLinker, useNucGroupRadius,
                     includeAlternativeScaling)
Esempio n. 29
0
    # Send the output files to the custom bed parser.
    parseCustomBed(xRSeqOutputFilePaths, genomeFilePath, False, False, False)
 

if __name__ == "__main__":

    # Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory())
    dialog.createMultipleFileSelector("XR-seq bigwig data (plus strand):",0,
                                      "+.bigWig",("BigWig Files",".bigWig"))
    dialog.createMultipleFileSelector("XR-seq bed data (alternative to bigwig):",1,"aligned_reads.bed",
                                      ("Bed Files",".bed"), additionalFileEndings=("rep1.bed","rep2.bed"))

    dialog.createFileSelector("Lesion Call Parameter File:", 2, ("Tab Seperated Values",".tsv"))

    dialog.createFileSelector("Genome Fasta File:",3,("Fasta Files",".fa"))

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    xRSeqBigWigPlusReadsFilePaths: List[str] = list(selections.getFilePathGroups())[0]
    xRSeqBedReadsFilePaths: List[str] = list(selections.getFilePathGroups())[1]
    callParamsFilePath = selections.getIndividualFilePaths()[0]
    genomeFilePath = selections.getIndividualFilePaths()[1]

    parseXRSeq(xRSeqBigWigPlusReadsFilePaths + xRSeqBedReadsFilePaths, callParamsFilePath, genomeFilePath)
def main():

    #Create the Tkinter UI
    dialog = TkinterDialog(workingDirectory=getDataDirectory(),
                           scrollable=True)
    dialog.createMultipleFileSelector(
        "Nucleosome Mutation Counts files:",
        0,
        DataTypeStr.normNucCounts + ".tsv",
        ("Tab Seperated Values Files", ".tsv"),
        additionalFileEndings=(DataTypeStr.rawNucCounts + ".tsv", ))
    dialog.createFileSelector("Output File",
                              1, ("R Data File", ".rda"),
                              ("Tab Separated Values File", ".tsv"),
                              newFile=True)

    dialog.createCheckbox(
        "Use expected periodicity from nucleosome maps instead of peak periodicity",
        2, 0)
    dialog.createCheckbox(
        "Align both DNA strands to run 5' to 3' before running the analysis",
        3, 0)
    dialog.createLabel('', 4, 0)

    mainGroupSearchRefine = dialog.createDynamicSelector(5, 0)
    mainGroupSearchRefine.initCheckboxController("Filter counts files")
    mainGroupSearchRefine.initDisplay(True).createNucMutGroupSubDialog(
        "MainGroup", 0)
    mainGroupSearchRefine.initDisplayState()

    periodicityComparison = dialog.createDynamicSelector(6, 0)
    periodicityComparison.initCheckboxController(
        "Compare periodicities between two groups")
    periodicityGroupType = periodicityComparison.initDisplay(
        True, "periodicityGroupType")

    periodicityGroupTypeSelector = periodicityGroupType.createDynamicSelector(
        0, 0)
    periodicityGroupTypeSelector.initDropdownController(
        "Compare periodicities...",
        ("Within original selection", "Against a newly selected group"))
    periodicityGroupsWithin = periodicityGroupTypeSelector.initDisplay(
        "Within original selection", "withinGroup")
    secondaryPeriodicityGroup = periodicityGroupTypeSelector.initDisplay(
        "Against a newly selected group", "secondaryGroupFilePaths")

    # Create two "sub-dialogs" for each of the groups, allowing the user to specify the make-up of that group for the "withinGroup" dialog.
    for i, dialogID in enumerate(("Sub-Group 1", "Sub-Group 2")):
        periodicityGroupsWithin.createNucMutGroupSubDialog(dialogID, i + 1)

    # Create one multiple file selector and one sub-dialog for the "secondaryGroup" dialog
    secondaryPeriodicityGroup.createMultipleFileSelector(
        "Nucleosome Mutation Counts files:",
        0,
        DataTypeStr.normNucCounts + ".tsv",
        ("Tab Seperated Values Files", ".tsv"),
        additionalFileEndings=(DataTypeStr.rawNucCounts + ".tsv", ))

    secondaryPeriodicityGroupSearchRefine = secondaryPeriodicityGroup.createDynamicSelector(
        2, 0)
    secondaryPeriodicityGroupSearchRefine.initCheckboxController(
        "Filter counts files")
    secondaryPeriodicityGroupSearchRefine.initDisplay(
        True).createNucMutGroupSubDialog("Secondary Group", 0)
    secondaryPeriodicityGroupSearchRefine.initDisplayState()

    periodicityGroupTypeSelector.initDisplayState()
    periodicityComparison.initDisplayState()

    # Run the UI
    dialog.mainloop()

    # If no input was received (i.e. the UI was terminated prematurely), then quit!
    if dialog.selections is None: quit()

    # Get the user's input from the dialog.
    selections: Selections = dialog.selections
    nucleosomeMutationCountsFilePaths = selections.getFilePathGroups()[0]
    if periodicityGroupTypeSelector.getControllerVar(
    ) == "Against a newly selected group":
        secondaryNucMutCountsFilePaths = selections.getFilePathGroups(
            "secondaryGroupFilePaths")[0]
    outputFilePath = list(selections.getIndividualFilePaths())[0]

    # Get the default periodicity value, testing the string to see if it is a valid float, if necessary.
    overridePeakPeriodWithExpected = bool(selections.getToggleStates()[0])
    alignStrands = bool(selections.getToggleStates()[1])

    # If group comparisons were requested, get the respective groups.
    filePathGroups: List[list] = list()
    for i in range(3):
        filePathGroups.append(list())

    groups = ["MainGroup"]
    if periodicityComparison.getControllerVar():
        if periodicityGroupTypeSelector.getControllerVar(
        ) == "Within original selection":
            groups += ("Sub-Group 1", "Sub-Group 2")
        else:
            groups.append("Secondary Group")

    for i, dialogID in enumerate(groups):

        # Was filtering even requested for the main group?
        if i == 0 and not mainGroupSearchRefine.getControllerVar():
            filePathGroups[0] = nucleosomeMutationCountsFilePaths
            continue

        # If we are examining the secondary group, check to see if filtering was even requested.
        if dialogID == "Secondary Group" and not secondaryPeriodicityGroupSearchRefine.getControllerVar(
        ):
            assert i == 1, "Secondary group encountered on unexpected iteration of for loop: " + str(
                i)
            filePathGroups[2] = secondaryNucMutCountsFilePaths
            filePathGroups[1] = filePathGroups[0].copy()
            filePathGroups[0] += filePathGroups[2]
            continue

        # Determine what normalization methods were requested
        normalizationMethods = list()
        normalizationSelections = selections.getToggleStates(dialogID)[:5]
        if normalizationSelections[0]: normalizationMethods += (1, 2)
        if normalizationSelections[1]: normalizationMethods += (3, 4)
        if normalizationSelections[2]: normalizationMethods += (5, 6)
        if normalizationSelections[3]: normalizationMethods.append(-1)
        if normalizationSelections[4]: normalizationMethods.append(0)

        # Determine what microsatellite stability states were requested.
        acceptableMSCohorts = dict()
        if selections.getToggleStates(dialogID)[7]:
            MSSelection = selections.getDropdownSelections(dialogID + "MS")[0]
            if MSSelection == "MSS": acceptableMSCohorts["MSS"] = None
            elif MSSelection == "MSI": acceptableMSCohorts["MSI"] = None
            else:
                acceptableMSCohorts["MSS"] = None
                acceptableMSCohorts["MSI"] = None

        # Determine what mutation signature states were requested.
        acceptableMutSigCohorts = dict()
        if selections.getToggleStates(dialogID)[8]:
            with open(
                    selections.getIndividualFilePaths(dialogID + "MutSig")[0],
                    'r') as mutSigsFile:
                for line in mutSigsFile:
                    acceptableMutSigCohorts["mut_sig_" + line.strip()] = None

        # Check for custom cohort input
        acceptableCustomCohorts = dict()
        if selections.getToggleStates(dialogID)[9]:

            customCohortsFilePath = selections.getIndividualFilePaths(
                dialogID + "CustomCohorts")[0]
            with open(customCohortsFilePath, 'r') as customCohortsFile:

                for line in customCohortsFile:
                    acceptableCustomCohorts[line.strip()] = None

        # Check for nucleosome map input
        acceptableNucleosomeMaps = dict()
        if selections.getToggleStates(dialogID)[10]:

            acceptableNucMapsFilePath = selections.getIndividualFilePaths(
                dialogID + "NucleosomeMaps")[0]
            with open(acceptableNucMapsFilePath, 'r') as acceptableNucMapsFile:

                for line in acceptableNucMapsFile:
                    acceptableNucleosomeMaps[line.strip()] = None

        # Get the file paths associated with the given parameters.
        if dialogID != "Secondary Group":
            filePathGroups[i] += getFilePathGroup(
                nucleosomeMutationCountsFilePaths, normalizationMethods,
                selections.getToggleStates(dialogID)[5],
                selections.getToggleStates(dialogID)[6], acceptableMSCohorts,
                acceptableMutSigCohorts, acceptableCustomCohorts,
                acceptableNucleosomeMaps)
        else:
            assert i == 1, "Secondary group encountered on unexpected iteration of for loop: " + str(
                i)
            filePathGroups[2] += getFilePathGroup(
                secondaryNucMutCountsFilePaths, normalizationMethods,
                selections.getToggleStates(dialogID)[5],
                selections.getToggleStates(dialogID)[6], acceptableMSCohorts,
                acceptableMutSigCohorts, acceptableCustomCohorts,
                acceptableNucleosomeMaps)
            filePathGroups[1] = filePathGroups[0].copy()
            filePathGroups[0] += filePathGroups[2]

        #If this is the first pass through the loop, set the file paths list to the newly filtered list.
        if i == 0: nucleosomeMutationCountsFilePaths = filePathGroups[0]

    runNucleosomeMutationAnalysis(filePathGroups[0], outputFilePath,
                                  overridePeakPeriodWithExpected, alignStrands,
                                  filePathGroups[1], filePathGroups[2])