Ejemplo n.º 1
0
    def indexing(self):
        # ---- Parameter for Indexing ----
        BinIndex = libConfig.config()
        BinIndex.queryStr = "binHISAT2-BUILD"
        BinIndex.folderStr = "config/"
        BinIndex.modeStr = "UPDATE"
        BinIndex.load()
        # ---- Initialization for Indexing ----

        self.commandStr = BinIndex.storeDict["command"]

        Target = libConfig.config()
        Target.queryStr = self.titleStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        self.folderStr = Target.storeDict["checkFolder"]
        self.seqPathStr = Target.storeDict["seqPath"]
        self.indexHeaderStr = Target.storeDict["indexHeader"]
        self.threadStr = Target.storeDict["thread"]

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        # ---- Action ----
        pathlib.Path(self.folderStr).mkdir(parents=True, exist_ok=True)

        Print = libPrint.timer()
        Print.logFilenameStr = "02-hisat2-index-{title}".format(
            title=self.titleStr)
        Print.folderStr = "log/"
        Print.testingBool = self.testingBool
        Print.startLog()

        infoDict = {
            'seqPath': self.seqPathStr,
            'indexHeader': self.indexHeaderStr,
            'thread': self.threadStr,
        }
        CommandStr = self.commandStr.format(**infoDict)
        Print.phraseStr = CommandStr
        Print.runCommand()

        Print.stopLog()
    for branchStr in branchList:
        infoDict = {
            "branch": branchStr,
            "method": methodStr,
            "annotate": annotateStr,
            "trim": trimStr,
            "omic": omicStr,
            "note": noteStr
        }
        sourcePathStr = sourceFilePathStr.format(**infoDict)
        resultPathStr = resultFilePathStr.format(**infoDict)
        logFolderPath = logFolderPathStr.format(**infoDict)
        logPathStr = logFilePathStr.format(**infoDict)

        pathlib.Path(logFolderPath).mkdir(parents=True, exist_ok=True)
        Print = libPrint.timer()
        Print.logFilenameStr = logPathStr
        Print.folderStr = logFolderPath
        Print.testingBool = False
        Print.startLog()

        Print.printing(
            "branch = {branch}\nmethod = {method}\nannotate = {annotate}\ntrim = {trim}\ntype = {omic}"
            .format(**infoDict))
        Print.printing("[SQL-load] open expression database")
        Connect = sqlite3.connect(sourcePathStr)
        Cursor = Connect.cursor()
        selectStr = "SELECT * FROM Expression"
        expExc = Cursor.execute(selectStr)

        Print.printing("[Compare] adjust combination")
Ejemplo n.º 3
0
    def trimming(self):
        # ---- Parameter ----
        BinTrim = libConfig.config()
        BinTrim.queryStr = "binTrimmomatic"
        BinTrim.folderStr = "config/"
        BinTrim.modeStr = "UPDATE"
        BinTrim.load()

        ExpRep = libConfig.config()
        ExpRep.queryStr = self.queryStr
        ExpRep.folderStr = "config/"
        ExpRep.modeStr = "UPDATE"
        ExpRep.load()

        # ---- Initialization ----
        commandStr = BinTrim.storeDict["command"]

        conditionList = ExpRep.storeDict.get("conditionsList", [])
        groupList = ExpRep.storeDict.get("group", [])
        replicationList = ExpRep.storeDict.get("replication", [])
        directionList = ExpRep.storeDict.get("direction", [])

        branchStr = ExpRep.storeDict.get("branch", "")
        pairStr = ExpRep.storeDict.get("pairPostfix", "")
        unpairStr = ExpRep.storeDict.get("unpairPostfix", "")
        modeStr = ExpRep.storeDict.get("mode", "")

        inputFileNameStr = ExpRep.storeDict.get("[trim]inputFileName", "")
        outputFileNameStr = ExpRep.storeDict.get("[trim]outputFileName", "")
        fileTypeStr = ExpRep.storeDict.get("[trim]fileType", "")
        checkFolderList = ExpRep.storeDict.get("checkFolder", [])

        if not ExpRep.storeDict.get("testing", True):
            testingBool = False
        else:
            testingBool = True

        # ---- Action ----
        for folderStr in checkFolderList:
            pathlib.Path(folderStr).mkdir(parents=True, exist_ok=True)

        if type(conditionList) == type(list()) and conditionList != []:
            for conditionDict in conditionList:
                conditionStr = conditionDict['trim']
                Print = libPrint.timer()
                Print.logFilenameStr = "03-trim-{branch}-{cond}".format(
                    branch=branchStr, cond=conditionStr)
                Print.folderStr = "log/"
                Print.testingBool = testingBool
                Print.startLog()

                TrimPara = libConfig.config()
                TrimPara.queryStr = conditionStr
                TrimPara.folderStr = "config/"
                TrimPara.modeStr = "UPDATE"
                TrimPara.load()
                headerStr = TrimPara.storeDict.get('header', "")

                for groupStr in groupList:
                    for replicationStr in replicationList:
                        if modeStr == "pairEnd":
                            inputFileList = list()
                            outputFileList = list()
                            for directionStr in directionList:
                                inputStr = inputFileNameStr.format(
                                    group=groupStr,
                                    replication=replicationStr,
                                    direction=directionStr,
                                    fileType=fileTypeStr)
                                inputFileList.append(inputStr)
                                outputPairStr = outputFileNameStr.format(
                                    condition=headerStr,
                                    direction=directionStr,
                                    group=groupStr,
                                    replication=replicationStr,
                                    pairType=pairStr,
                                    fileType=fileTypeStr,
                                )
                                outputFileList.append(outputPairStr)
                                outputUnPairStr = outputFileNameStr.format(
                                    condition=headerStr,
                                    direction=directionStr,
                                    group=groupStr,
                                    replication=replicationStr,
                                    pairType=unpairStr,
                                    fileType=fileTypeStr,
                                )
                                outputFileList.append(outputUnPairStr)

                            fileList = inputFileList + outputFileList
                            fileStr = " ".join(fileList)

                            commandDict = dict()
                            commandDict.update(TrimPara.storeDict)
                            commandDict.update({
                                'files': fileStr,
                                'mode': "PE",
                            })
                            CommandStr = commandStr.format(**commandDict)
                            Print.phraseStr = CommandStr
                            Print.runCommand()

                        elif modeStr == "singleEnd":
                            inputStr = inputFileNameStr.format(
                                group=groupStr,
                                replication=replicationStr,
                                fileType=fileTypeStr)
                            outputStr = outputFileNameStr.format(
                                condition=headerStr,
                                group=groupStr,
                                replication=replicationStr,
                                fileType=fileTypeStr,
                            )

                            fileStr = "{} {}".format(inputStr, outputStr)

                            commandDict = dict()
                            commandDict.update(TrimPara.storeDict)
                            commandDict.update({
                                'files': fileStr,
                                'mode': "SE",
                            })
                            CommandStr = commandStr.format(**commandDict)
                            Print.phraseStr = CommandStr
                            Print.runCommand()

                Print.stopLog()
Ejemplo n.º 4
0
    def aligning(self):
        BinHISAT2 = libConfig.config()
        BinHISAT2.queryStr = "binHISAT2-RUN"
        BinHISAT2.folderStr = "config/"
        BinHISAT2.modeStr = "UPDATE"
        BinHISAT2.load()

        SAMconvert = libConfig.config()
        SAMconvert.queryStr = "binSAMtools-CONVERT"
        SAMconvert.folderStr = "config/"
        SAMconvert.modeStr = "UPDATE"
        SAMconvert.load()

        SAMsort = libConfig.config()
        SAMsort.queryStr = "binSAMtools-SORT"
        SAMsort.folderStr = "config/"
        SAMsort.modeStr = "UPDATE"
        SAMsort.load()

        Remove = libConfig.config()
        Remove.queryStr = "commandRM"
        Remove.folderStr = "config/"
        Remove.modeStr = "UPDATE"
        Remove.load()

        expRep = libConfig.config()
        expRep.queryStr = self.queryStr
        expRep.folderStr = "config/"
        expRep.modeStr = "UPDATE"
        expRep.load()

        branchStr = expRep.storeDict["branch"]
        pairPostfixStr = expRep.storeDict["pairPostfix"]
        unpairPostfixStr = expRep.storeDict["unpairPostfix"]
        groupList = expRep.storeDict["group"]
        modeStr = expRep.storeDict["mode"]
        replicationList = expRep.storeDict["replication"]
        conditionList = expRep.storeDict["conditionsList"]
        for conditionDict in conditionList:
            annotateConditionStr = conditionDict["genome"]
            trimConditionStr = conditionDict["trim"]
            hisat2ConditionStr = conditionDict["map"]
            directionDict = expRep.storeDict["[hisat2]direction"]

            fileTypeStr = expRep.storeDict["[trim]fileType"]
            inputFileNameStr = expRep.storeDict["[hisat2]inputFileName"]
            outputFolderStr = expRep.storeDict["[hisat2]outputFolder"]
            outputFileNameStr = expRep.storeDict["[hisat2]outputFileName"]

            if not expRep.storeDict.get("testing", True):
                self.testingBool = False
            else:
                self.testingBool = True

            finalOutputFolderStr = outputFolderStr.format(
                annotate=annotateConditionStr, trim=trimConditionStr)
            pathlib.Path(finalOutputFolderStr).mkdir(parents=True,
                                                     exist_ok=True)

            Print = libPrint.timer()
            Print.logFilenameStr = "04-hs1-hisat2-{branch}-{hisat2cond}-{annotateCon}-{trimCon}".format(
                branch=branchStr,
                hisat2cond=hisat2ConditionStr,
                annotateCon=annotateConditionStr,
                trimCon=trimConditionStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()
            for groupStr in groupList:
                for replicationStr in replicationList:
                    finalDict = dict()

                    Para = libConfig.config()  #parameters
                    Para.queryStr = hisat2ConditionStr
                    Para.folderStr = "config/"
                    Para.modeStr = "UPDATE"
                    Para.load()
                    finalDict.update(Para.storeDict)

                    Spec = libConfig.config()  #parameters
                    Spec.queryStr = annotateConditionStr
                    Spec.folderStr = "config/"
                    Spec.modeStr = "UPDATE"
                    Spec.load()

                    finalDict.update(
                        {"indexHeader": Spec.storeDict["indexHeader"]})

                    if modeStr == "pairEnd":
                        pairForwardDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['1'],
                            "pairType": pairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        pairReverseDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['2'],
                            "pairType": pairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        unpairForwardDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['1'],
                            "pairType": unpairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        unpairReverseDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['2'],
                            "pairType": unpairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                    elif modeStr == "singleEnd":
                        unpairDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "fileType": fileTypeStr,
                        }

                    samDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": ".sam",
                    }
                    samFileStr = outputFileNameStr.format(**samDict)
                    bamDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": ".bam",
                    }
                    bamFileStr = outputFileNameStr.format(**bamDict)
                    sortedBAMDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": "-sorted.bam",
                    }
                    sortedBAMFileStr = outputFileNameStr.format(
                        **sortedBAMDict)

                    if pathlib.Path(samFileStr).exists():
                        Print.phraseStr = "SAM File existed: " + samFileStr
                        Print.printTimeStamp()
                    elif not pathlib.Path(samFileStr).exists(
                    ) and not pathlib.Path(bamFileStr).exists(
                    ) and not pathlib.Path(sortedBAMFileStr).exists():
                        if modeStr == "pairEnd":
                            commandStr = BinHISAT2.storeDict.get(
                                "command-PE", "")
                            finalDict.update({
                                "pairForwardFASTQ":
                                inputFileNameStr.format(**pairForwardDict),
                                "pairReverseFASTQ":
                                inputFileNameStr.format(**pairReverseDict),
                                "unpairForwardFASTQ":
                                inputFileNameStr.format(**unpairForwardDict),
                                "unpairReverseFASTQ":
                                inputFileNameStr.format(**unpairReverseDict),
                                "outputSAM":
                                samFileStr
                            })
                            finalCommandStr = commandStr.format(**finalDict)
                            Print.phraseStr = finalCommandStr
                            Print.runCommand()
                        elif modeStr == "singleEnd":
                            commandStr = BinHISAT2.storeDict.get(
                                "command-SE", "")
                            finalDict.update({
                                "unpairFASTQ":
                                inputFileNameStr.format(**unpairDict),
                                "outputSAM":
                                samFileStr
                            })
                            finalCommandStr = commandStr.format(**finalDict)
                            Print.phraseStr = finalCommandStr
                            Print.runCommand()

                    if pathlib.Path(bamFileStr).exists():
                        Print.phraseStr = "BAM File existed: " + bamFileStr
                        Print.printTimeStamp()
                    elif not pathlib.Path(bamFileStr).exists(
                    ) and not pathlib.Path(sortedBAMFileStr).exists():
                        commandStr = SAMconvert.storeDict.get("command", "")
                        finalDict.update({
                            "outputBAM": bamFileStr,
                            "inputSAM": samFileStr,
                        })
                        finalCommandStr = commandStr.format(**finalDict)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(samFileStr).exists() and pathlib.Path(
                            bamFileStr).exists():
                        commandStr = Remove.storeDict.get("command", "")
                        finalCommandStr = commandStr.format(target=samFileStr)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(sortedBAMFileStr).exists():
                        Print.phraseStr = "Sorted BAM File existed: " + sortedBAMFileStr
                        Print.printTimeStamp()
                    else:
                        commandStr = SAMsort.storeDict.get("command", "")
                        finalDict.update({
                            "outputBAM": sortedBAMFileStr,
                            "inputBAM": bamFileStr,
                        })
                        finalCommandStr = commandStr.format(**finalDict)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(bamFileStr).exists() and pathlib.Path(
                            sortedBAMFileStr).exists():
                        commandStr = Remove.storeDict.get("command", "")
                        finalCommandStr = commandStr.format(target=bamFileStr)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

            Print.stopLog()
Ejemplo n.º 5
0
    def summaring(self):
        FLAGstat = libConfig.config()
        FLAGstat.queryStr = "binSAMtools-FLAGSTAT"
        FLAGstat.folderStr = "config/"
        FLAGstat.modeStr = "UPDATE"
        FLAGstat.load()

        expRep = libConfig.config()
        expRep.queryStr = self.branchStr
        expRep.folderStr = "config/"
        expRep.modeStr = "UPDATE"
        expRep.load()

        trimConditionList = expRep.storeDict.get("[trim]condition", [])
        hisat2ConditionList = expRep.storeDict.get("[hisat2]Condition", [])
        annotateConditionList = expRep.storeDict.get("conditionList", [])
        groupList = expRep.storeDict.get("group", [])
        replicationList = expRep.storeDict.get("replication", [])

        outputFolderStr = expRep.storeDict.get("[hisat2]outputFolder", "")
        outputFileNameStr = expRep.storeDict.get("[hisat2]outputFileName", "")

        if not expRep.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for trimConditionStr in trimConditionList:
            for conditionList in annotateConditionList:
                annotateConditionStr = conditionList[0]
                finalOutputFolderStr = outputFolderStr.format(
                    annotateCondition=annotateConditionStr,
                    trimCondition=trimConditionStr)
                pathlib.Path(finalOutputFolderStr).mkdir(parents=True,
                                                         exist_ok=True)

                for hisat2ConditionStr in hisat2ConditionList:
                    Print = libPrint.timer()
                    Print.logFilenameStr = "04-hs2-hisat2-{hisat2cond}-{annotateCon}-{trimCon}".format(
                        hisat2cond=hisat2ConditionStr,
                        annotateCon=annotateConditionStr,
                        trimCon=trimConditionStr,
                    )
                    Print.folderStr = "log/"
                    Print.testingBool = self.testingBool
                    Print.startLog()

                    for groupStr in groupList:
                        for replicationStr in replicationList:
                            sortedBAMDict = {
                                "annotateCondition": annotateConditionStr,
                                "trimCondition": trimConditionStr,
                                "hisat2Condition": hisat2ConditionStr,
                                "group": groupStr,
                                "replication": replicationStr,
                                "fileType": "-sorted.bam",
                            }
                            sortedBAMFileStr = outputFileNameStr.format(
                                **sortedBAMDict)

                            if pathlib.Path(sortedBAMFileStr).exists():
                                commandStr = FLAGstat.storeDict.get(
                                    "command", "")
                                finalCommandStr = commandStr.format(
                                    BAMfile=sortedBAMFileStr)
                                Print.phraseStr = finalCommandStr
                                Print.runCommand()

                    Print.stopLog()
Ejemplo n.º 6
0
    def estimating(self):
        # ---- Parameter for Assembling ----
        BinMap = libConfig.config()
        BinMap.queryStr = "binStringTie-ESTIMATE"
        BinMap.folderStr = "config/"
        BinMap.modeStr = "UPDATE"
        BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = Target.storeDict.get("branch", "")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        hisat2ConditionStr = Target.storeDict.get("[hisat2]Condition", "")
        conditionList = Target.storeDict.get("conditionList", [])
        inputFileNameStr = Target.storeDict.get(
            "[{}]inputFileName".format(self.headerStr), "")
        mergedFileNameStr = Target.storeDict.get(
            "[{}]mergedFileName".format(self.headerStr), "")
        balgownFolderStr = Target.storeDict.get(
            "[{}]ballgownFolder".format(self.headerStr), "")
        gtfFileNameStr = Target.storeDict.get(
            "[{}]gtfFileName".format(self.headerStr), "")
        tsvFileNameStr = Target.storeDict.get(
            "[{}]tsvFileName".format(self.headerStr), "")

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for conditionTup in conditionList:
            antCondStr = conditionTup[0]
            trimCondStr = conditionTup[1]

            Annotate = libConfig.config()
            Annotate.queryStr = antCondStr
            Annotate.folderStr = "config/"
            Annotate.modeStr = "UPDATE"
            Annotate.load()

            threadStr = Annotate.storeDict.get("thread", "")
            antPathStr = Annotate.storeDict.get("antPath", "")

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "05-{stringtie}-estimating-{branch}-{annotate}-{trim}".format(
                stringtie=self.headerStr,
                branch=branchStr,
                annotate=antCondStr,
                trim=trimCondStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            for groupStr in groupList:
                for repliStr in replicationList:
                    ballgownPathStr = balgownFolderStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)
                    pathlib.Path(ballgownPathStr).mkdir(parents=True,
                                                        exist_ok=True)

                    bamPathStr = inputFileNameStr.format(
                        annotateCondition=antCondStr,
                        hisat2Condition=hisat2ConditionStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    mergeFileNameStr = mergedFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr)

                    gtfPathStr = gtfFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    tsvPathStr = tsvFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    if self.directEstimating:
                        CommandStr = commandStr.format(
                            thread=threadStr,
                            mergePath=antPathStr,
                            bamfile=bamPathStr,
                            ballgownPath=ballgownPathStr,
                            gtffile=gtfPathStr,
                            tsvfile=tsvPathStr)
                    else:
                        CommandStr = commandStr.format(
                            thread=threadStr,
                            mergePath=mergeFileNameStr,
                            bamfile=bamPathStr,
                            ballgownPath=ballgownPathStr,
                            gtffile=gtfPathStr,
                            tsvfile=tsvPathStr)

                    Print.phraseStr = CommandStr
                    Print.runCommand()

            Print.stopLog()
Ejemplo n.º 7
0
    def importingStringtie(self):
        # ---- Initialization for Converting ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = self.branchStr

        controlStr = Target.storeDict.get("controlSample", "")
        controlSafeStr = controlStr.replace("-", "_")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        patternStr = Target.storeDict.get("samplePattern", "")

        sampleList = list()
        for groupStr in groupList:
            for replicationStr in replicationList:
                sampleList.append(
                    patternStr.format(group=groupStr,
                                      replication=replicationStr))

        conditionList = Target.storeDict.get("conditionList", [])
        methodList = Target.storeDict.get("methodList", [])
        geneSourceDict = Target.storeDict.get("[sqlite]geneSourceDict", dict())
        transcriptSourceDict = Target.storeDict.get(
            "[sqlite]transcriptSourceDict", dict())

        geneExpPathStr = Target.storeDict.get("[sqlite]geneSourcePathStr", "")
        transcriptExpPathStr = Target.storeDict.get(
            "[sqlite]transcriptSourcePathStr", "")
        sqlFolderStr = Target.storeDict.get("sqlFolderStr", "")
        sqlPathStr = Target.storeDict.get("sqlPathStr", "")
        sqlLogStr = Target.storeDict.get("[sqlite]logFilename", "")

        for methodStr in methodList:
            pathlib.Path(
                sqlFolderStr.format(branch=branchStr,
                                    method=methodStr)).mkdir(parents=True,
                                                             exist_ok=True)
            geneFolderStr = geneSourceDict.get(methodStr, "")
            transcriptFolderStr = transcriptSourceDict.get(methodStr, "")
            compareSet = set()

            for conditionTup in conditionList:
                antStr = conditionTup[0]
                trimStr = conditionTup[1]

                Print = libPrint.timer()
                Print.logFilenameStr = sqlLogStr.format(ant=antStr,
                                                        trim=trimStr)
                Print.folderStr = sqlFolderStr.format(branch=branchStr,
                                                      method=methodStr)
                Print.testingBool = self.testingBool
                Print.startLog()

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    Print.phraseStr = "-- Data format conversion for Gene Expression in {} --".format(
                        sampleStr)
                    Print.printTimeStamp()
                    geneSamplePath = geneExpPathStr.format(
                        folder=geneFolderStr,
                        branch=branchStr,
                        ant=antStr,
                        trim=trimStr,
                        sample=sampleStr)
                    sampleDF = pd.read_csv(geneSamplePath,
                                           delimiter="\t",
                                           header=0)
                    Print.printing("[Pandas:Read]" + geneSamplePath)

                    rowList = sampleDF.values.tolist()
                    countInt = len(rowList)

                    # check
                    compareList = list()
                    for rowInt in range(countInt):
                        insertList = []
                        sourceList = rowList[rowInt]
                        insertList.append("UUID." + str(rowInt))
                        insertList.extend(sourceList[0:7])
                        compareStr = "\t".join([str(x) for x in insertList])
                        compareList.append(compareStr)

                    if compareSet == set():
                        Print.printing("    " + sampleStr + ": Empty")
                        compareSet = set(compareList)
                    elif compareSet != set(compareList):
                        Print.printing("    " + sampleStr + ": Same")
                    elif compareSet == set(compareList):
                        Print.printing("    " + sampleStr + ": Different")

                    sqlPath = sqlPathStr.format(branch=branchStr,
                                                method=methodStr,
                                                ant=antStr,
                                                trim=trimStr)
                    createCommandStr = """CREATE TABLE GeneExpression_{}
                        ('UUID'  TEXT    PRIMARY KEY NOT NULL, 
                        'GeneID'   TEXT    NOT NULL,
                        'GeneName' TEXT    NOT NULL, 
                        'Reference' TEXT    NOT NULL, 
                        'Strand'    TEXT    NOT NULL, 
                        'Start' INTEGER NOT NULL, 
                        'End'   INTEGER NOT NULL, 
                        'Coverage'  REAL    NOT NULL, 
                        'FPKM'  REAL    NOT NULL, 
                        'TPM'   REAL    NOT NULL);""".format(sampleSafeStr)
                    insertCommandStr = "INSERT INTO GeneExpression_{} ('UUID','GeneID','GeneName','Reference','Strand','Start','End','Coverage','FPKM','TPM')\
                        VALUES (?,?,?,?,?,?,?,?,?,?)".format(sampleSafeStr)
                    self.expressionInputDict = {
                        "sqlPath": sqlPath,
                        "count": countInt,
                        "rowList": rowList,
                        "createCommand": createCommandStr,
                        "insertCommand": insertCommandStr,
                    }
                    self.exportingExpression(Print)
                    # Transcript
                    Print.phraseStr = "-- Data format conversion for Transcript Expression in {} --".format(
                        sampleStr)
                    Print.printTimeStamp()

                    transcriptSamplePath = transcriptExpPathStr.format(
                        folder=transcriptFolderStr,
                        branch=branchStr,
                        ant=antStr,
                        trim=trimStr,
                        sample=sampleStr)
                    sampleDF = pd.read_csv(transcriptSamplePath,
                                           delimiter="\t",
                                           header=0)
                    Print.printing("[Pandas:Read] " + transcriptSamplePath)

                    rowList = sampleDF.values.tolist()
                    countInt = len(rowList)

                    # check
                    compareList = list()
                    for rowInt in range(countInt):
                        insertList = []
                        sourceList = rowList[rowInt]
                        insertList.append("UUID." + str(rowInt))
                        insertList.extend(sourceList[0:10])
                        compareStr = "\t".join([str(x) for x in insertList])
                        compareList.append(compareStr)

                    if compareSet == set():
                        Print.printing("    " + sampleStr + ": Empty")
                        compareSet = set(compareList)
                    elif compareSet != set(compareList):
                        Print.printing("    " + sampleStr + ": Same")
                    elif compareSet == set(compareList):
                        Print.printing("    " + sampleStr + ": Different")

                    sqlPath = sqlPathStr.format(branch=branchStr,
                                                method=methodStr,
                                                ant=antStr,
                                                trim=trimStr)
                    createCommandStr = """CREATE TABLE TranscriptExpression_{}
                        ('UUID'     TEXT    PRIMARY KEY NOT NULL, 
                        'TranscriptID'  INTEGER    NOT NULL,
                        'Chromosome'    TEXT, 
                        'Strand'    TEXT    NOT NULL, 
                        'Start' INTEGER NOT NULL, 
                        'End'   INTEGER NOT NULL, 
                        'TranscriptName'    TEXT    NOT NULL, 
                        'ExonCount'    INTEGER  NOT NULL, 
                        'Length'    INTEGER  NOT NULL, 
                        'GeneID'    TEXT  NOT NULL, 
                        'GeneName'    TEXT  NOT NULL, 
                        'Coverage'  REAL    NOT NULL, 
                        'FPKM'  REAL    NOT NULL);""".format(sampleSafeStr)
                    insertCommandStr = "INSERT INTO TranscriptExpression_{} ('UUID','TranscriptID','Chromosome','Strand','Start','End','TranscriptName','ExonCount','Length','GeneID','GeneName','Coverage','FPKM')\
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)".format(
                        sampleSafeStr)
                    self.expressionInputDict = {
                        "sqlPath": sqlPath,
                        "count": countInt,
                        "rowList": rowList,
                        "createCommand": createCommandStr,
                        "insertCommand": insertCommandStr,
                    }
                    self.exportingExpression(Print)

                Print.phraseStr = "-- Summarising for Gene Expression --"
                Print.printTimeStamp()

                createComStr = "CREATE TABLE GeneExpressionSummary ({})"
                createColumnList = [
                    "'UUID'  TEXT PRIMARY KEY NOT NULL",
                    "'GeneID' TEXT",
                    "'GeneName' TEXT",
                ]
                insertColumnList = ["UUID", "GeneID", "GeneName"]
                for targetStr in ["FPKM", "TPM"]:
                    for sampleStr in sampleList:
                        sampleSafeStr = sampleStr.replace("-", "_")
                        columnStr = "{target}_{sample} REAL".format(
                            target=targetStr, sample=sampleSafeStr)
                        createColumnList.append(columnStr)
                        insertColumnList.append("{target}_{sample}".format(
                            target=targetStr, sample=sampleSafeStr))

                Connect = sqlite3.connect(sqlPath)
                Cursor = Connect.cursor()
                ReturnMsg = Cursor.execute(
                    createComStr.format(",".join(createColumnList)))  # pylint: disable=unused-variable
                Connect.commit()
                Print.printing("[SQLite3:CreateTable] " + sqlPath)

                resultDict = dict()
                controlExc = Cursor.execute(
                    "SELECT UUID, GeneID, GeneName from GeneExpression_{}".
                    format(controlSafeStr))
                for rowList in controlExc:
                    uuid, geneid, genename = rowList
                    subDict = {
                        "UUID": uuid,
                        "GeneID": geneid,
                        "GeneName": genename
                    }
                    resultDict.update({uuid: subDict})

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    sampleExc = Cursor.execute(
                        "SELECT UUID, FPKM, TPM  from GeneExpression_{}".
                        format(sampleSafeStr))
                    for rowList in sampleExc:
                        uuid, fpkm, tpm = rowList
                        subDict = resultDict[uuid]
                        subDict.update({
                            "FPKM_{}".format(sampleSafeStr): fpkm,
                            "TPM_{}".format(sampleSafeStr): tpm
                        })
                        resultDict.update({uuid: subDict})

                insertComStr = "INSERT INTO GeneExpressionSummary ({column}) VALUES ({value})"
                for uuid in resultDict.keys():
                    valueList = list()
                    for posInt in range(len(insertColumnList)):
                        valueList.append(
                            resultDict[uuid][insertColumnList[posInt]])

                    insertCommand = insertComStr.format(
                        column=",".join(insertColumnList),
                        value=(("?," * (len(valueList) - 1))) + "?")
                    ReturnMsg = Cursor.execute(insertCommand, valueList)

                Connect.commit()
                Print.printing("[SQLite3:Insert] " + sqlPath)
                Connect.close()
                Print.printing("[SQLite3:Close]\n")

                # Transcript
                Print.phraseStr = "-- Summarising for Gene Expression --"
                Print.printTimeStamp()

                createComStr = "CREATE TABLE TranscriptExpressionSummary ({})"
                createColumnList = [
                    "'UUID'  TEXT PRIMARY KEY NOT NULL",
                    "'TranscriptID' INTEGER",
                    "'TranscriptName' TEXT",
                    "'GeneID' TEXT",
                    "'GeneName' TEXT",
                ]
                insertColumnList = [
                    "UUID", "TranscriptID", "TranscriptName", "GeneID",
                    "GeneName"
                ]
                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    columnStr = "FPKM_{sample} REAL".format(
                        sample=sampleSafeStr)
                    createColumnList.append(columnStr)
                    insertColumnList.append(
                        "FPKM_{sample}".format(sample=sampleSafeStr))

                Connect = sqlite3.connect(sqlPath)
                Cursor = Connect.cursor()
                ReturnMsg = Cursor.execute(
                    createComStr.format(",".join(createColumnList)))
                Connect.commit()
                Print.printing("[SQLite3:CreateTable] " + sqlPath)

                resultDict = dict()
                controlExc = Cursor.execute(
                    "SELECT UUID, TranscriptID, TranscriptName, GeneID, GeneName from TranscriptExpression_{}"
                    .format(controlSafeStr))
                for rowList in controlExc:
                    uuid, tid, tname, geneid, genename = rowList
                    subDict = {
                        "UUID": uuid,
                        "TranscriptID": tid,
                        "TranscriptName": tname,
                        "GeneID": geneid,
                        "GeneName": genename
                    }
                    resultDict.update({uuid: subDict})

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    sampleExc = Cursor.execute(
                        "SELECT UUID, FPKM  from TranscriptExpression_{}".
                        format(sampleSafeStr))
                    for rowList in sampleExc:
                        uuid, fpkm = rowList
                        subDict = resultDict[uuid]
                        subDict.update({"FPKM_{}".format(sampleSafeStr): fpkm})
                        resultDict.update({uuid: subDict})

                insertComStr = "INSERT INTO TranscriptExpressionSummary ({column}) VALUES ({value})"
                for uuid in resultDict.keys():
                    valueList = list()
                    for posInt in range(len(insertColumnList)):
                        valueList.append(
                            resultDict[uuid][insertColumnList[posInt]])

                    insertCommand = insertComStr.format(
                        column=",".join(insertColumnList),
                        value=(("?," * (len(valueList) - 1))) + "?")
                    ReturnMsg = Cursor.execute(insertCommand, valueList)

                Connect.commit()
                Print.printing("[SQLite3:Insert] " + sqlPath)
                Connect.close()
                Print.printing("[SQLite3:Close]\n")

                Print.stopLog()
Ejemplo n.º 8
0
    def assembling(self):
        # ---- Parameter for Assembling ----
        if self.withoutAnnotation:
            BinMap = libConfig.config()
            BinMap.queryStr = "binStringTie-RUN-withoutAnnotation"
            BinMap.folderStr = "config/"
            BinMap.modeStr = "UPDATE"
            BinMap.load()
        else:
            BinMap = libConfig.config()
            BinMap.queryStr = "binStringTie-RUN"
            BinMap.folderStr = "config/"
            BinMap.modeStr = "UPDATE"
            BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = Target.storeDict.get("branch", "")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        hisat2ConditionStr = Target.storeDict.get("[hisat2]Condition", "")
        conditionList = Target.storeDict.get("conditionList", [])
        inputFileNameStr = Target.storeDict.get(
            "[{}]inputFileName".format(self.headerStr), "")
        outputFileNameStr = Target.storeDict.get(
            "[{}]outputFileName".format(self.headerStr), "")
        outputFolderStr = Target.storeDict.get(
            "[{}]outputFolder".format(self.headerStr), "")

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for conditionTup in conditionList:
            antCondStr = conditionTup[0]
            trimCondStr = conditionTup[1]

            Annotate = libConfig.config()
            Annotate.queryStr = antCondStr
            Annotate.folderStr = "config/"
            Annotate.modeStr = "UPDATE"
            Annotate.load()

            threadStr = Annotate.storeDict.get("thread", "")
            antPathStr = Annotate.storeDict.get("antPath", "")

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "05-{stringtie}-assembling-{branch}-{annotate}-{trim}".format(
                stringtie=self.headerStr,
                branch=branchStr,
                annotate=antCondStr,
                trim=trimCondStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            for groupStr in groupList:
                for repliStr in replicationList:
                    outputFolderStr = outputFolderStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr)
                    pathlib.Path(outputFolderStr).mkdir(parents=True,
                                                        exist_ok=True)

                    outputFilenameStr = outputFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    inputFilenameStr = inputFileNameStr.format(
                        annotateCondition=antCondStr,
                        hisat2Condition=hisat2ConditionStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    CommandStr = commandStr.format(
                        bamfile=inputFilenameStr,
                        outputfile=outputFilenameStr,
                        thread=threadStr,
                        antPath=antPathStr)

                    Print.phraseStr = CommandStr
                    Print.runCommand()

            Print.stopLog()
Ejemplo n.º 9
0
    def converting(self):
        # ---- Parameter ----
        BinGFF = libConfig.config()
        BinGFF.queryStr = "binCufflinks-gffread"
        BinGFF.folderStr = "config/"
        BinGFF.modeStr = "UPDATE"
        BinGFF.load()

        Copying = libConfig.config()
        Copying.queryStr = "commandCP"
        Copying.folderStr = "config/"
        Copying.modeStr = "UPDATE"
        Copying.load()

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        gffreadStr = BinGFF.storeDict["command"]
        copyStr = Copying.storeDict["command"]

        branchStr = self.branchStr
        conditionsList = [
            n for n in Target.storeDict["conditionsList"]
            if n["transcriptome"] == "gffRead"
        ]
        gtfDict = Target.storeDict["gtfDict"]

        for conditionDict in conditionsList:
            genomeStr = conditionDict["genome"]
            trimStr = conditionDict["trim"]
            transcriptomeStr = conditionDict["transcriptome"]

            folderStr = gtfDict[transcriptomeStr]['folder']
            infoDict = {
                "branch": self.branchStr,
                "annotate": genomeStr,
                "trim": trimStr,
                "folder": folderStr,
            }
            targetFolderStr = Target.storeDict["transcriptomeFolder"]
            targetStr = Target.storeDict["transcriptomeGTF"]

            Spec = libConfig.config()  #parameters
            Spec.queryStr = genomeStr
            Spec.folderStr = "config/"
            Spec.modeStr = "UPDATE"
            Spec.load()

            inputStr = Spec.storeDict["antPath"]
            outputStr = Spec.storeDict["gtfPath"]
            outputFolderStr = Spec.storeDict["dbgaPath"]

            Print = libPrint.timer()
            Print.logFilenameStr = "05-gffConversion-{branch}-{annotate}".format(
                branch=branchStr,
                annotate=genomeStr,
            )
            Print.folderStr = outputFolderStr
            Print.testingBool = self.testingBool
            Print.startLog()

            targetPath = targetStr.format(**infoDict)

            if not pathlib.Path(outputStr).exists():
                CommandStr = gffreadStr.format(inputFile=inputStr,
                                               outputFile=outputStr)
                Print.phraseStr = CommandStr
                Print.runCommand()

            folderPath = targetFolderStr.format(**infoDict)
            pathlib.Path(folderPath).mkdir(parents=True, exist_ok=True)

            CommandStr = copyStr.format(output=outputStr, target=targetPath)
            Print.phraseStr = CommandStr
            Print.runCommand()

            Print.stopLog()
Ejemplo n.º 10
0
    def diffing(self):
        # ---- Parameter for Assembling ----
        BinMap = libConfig.config()
        BinMap.queryStr = "binCuffDiff-RUN"
        BinMap.folderStr = "config/"
        BinMap.modeStr = "UPDATE"
        BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        groupList = Target.storeDict["group"]
        replicationList = Target.storeDict["replication"]
        threadStr = Target.storeDict["thread"]
        bamFileNameStr = Target.storeDict["[hisat2]outputFileName"]
        gtfFileNameStr = Target.storeDict["transcriptomeGTF"]
        gtfDict = Target.storeDict["gtfDict"]
        resultFolderStr = Target.storeDict["[CuffDiff]resultFolder"]

        conditionsList = Target.storeDict["conditionsList"]
        for conditionDict in conditionsList:
            genomeStr = conditionDict["genome"]
            trimStr = conditionDict["trim"]
            transcriptomeStr = conditionDict["transcriptome"]
            folderStr = gtfDict[transcriptomeStr]['folder']
            hisat2ConditionStr = conditionDict["map"]

            if not Target.storeDict.get("testing", True):
                self.testingBool = False
            else:
                self.testingBool = True

            infoDict = {
                "branch": self.branchStr,
                "method": transcriptomeStr,
                "annotate": genomeStr,
                "trim": trimStr,
                "folder": folderStr,
                "hisat2Condition": hisat2ConditionStr,
                "fileType": "-sorted.bam",
            }

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "07-CuffDiff-{branch}-from({method})-{annotate}-{trim}".format(
                **infoDict)
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            resultPathStr = resultFolderStr.format(**infoDict)
            pathlib.Path(resultPathStr).mkdir(parents=True, exist_ok=True)

            gtfFileStr = gtfFileNameStr.format(**infoDict)

            bamGroupList = list()
            for groupStr in groupList:
                bamFileList = list()
                for repliStr in replicationList:
                    bamFileDict = dict()
                    bamFileDict.update(infoDict)
                    bamFileDict.update({
                        "group": groupStr,
                        "replication": repliStr,
                    })

                    bamFileStr = bamFileNameStr.format(**bamFileDict)
                    bamFileList.append(bamFileStr)

                bamGroupList.append(",".join(bamFileList))
            bamSampleStr = " ".join(bamGroupList)

            infoDict.update({
                "thread": threadStr,
                "outputFolder": resultPathStr,
                "labelList": ",".join(groupList),
                "mergedGTF": gtfFileStr,
                "bamFiles": bamSampleStr,
            })
            CommandStr = commandStr.format(**infoDict)

            Print.phraseStr = CommandStr
            Print.runCommand()

            Print.stopLog()
Ejemplo n.º 11
0
    def converting(self):
        sourceFilesList = self.argumentDict.get("files", [])
        referColumnNameStr = self.argumentDict.get("refer_column", "")
        prefixStr = self.argumentDict.get("prefix", "")
        headerList = self.argumentDict.get("header", [])
        headlessBoo = self.argumentDict.get("headless", True)
        delimiterStr = self.argumentDict.get("delimiter", "\t")

        Print = libPrint.timer()
        Print.logFilenameStr = self.logFilenameStr
        Print.folderStr = self.folderStr
        Print.testingBool = self.testingBool
        Print.startLog()

        Print.phraseStr = "Total files: " + pprint.pformat(sourceFilesList)
        Print.printTimeStamp()
        #
        for sourceFilenameStr in sourceFilesList:
            #
            Print.phraseStr = "{Now processing} " + sourceFilenameStr
            Print.printTimeStamp()
            #
            tempFilenamelist = sourceFilenameStr.split(".")
            tempFilenamelist[-1] = "json"
            resultFilenameStr = ".".join(tempFilenamelist)

            resultFileBo = pathlib.Path(resultFilenameStr).exists()

            if not resultFileBo:
                #
                Print.phraseStr = "{Loading Files} " + sourceFilenameStr
                Print.printTimeStamp()
                #
                linesList = open(sourceFilenameStr).read().splitlines()
                firstLineBo = True
                maxDigitInt = len(linesList)
                lineNumInt = 0
                while firstLineBo and lineNumInt < maxDigitInt:
                    lineStr = linesList[0]
                    if lineStr[0] == "#":
                        del linesList[0]
                    else:
                        firstLineStr = linesList[0]
                        firstLineBo = False
                    lineNumInt = lineNumInt + 1

                positionDict = dict()  # {numbering:key}

                idKeyValueDict = dict()
                relationDict = {
                    "{key:{value:[id]}}": dict(),
                    "{key:{id:[value]}}": dict()
                }
                #
                Print.phraseStr = "{Create Header/Key List} " + sourceFilenameStr
                Print.printTimeStamp()
                #
                if not headlessBoo:
                    if headerList == []:
                        tempLineStr = linesList.pop(0)
                        headerList = tempLineStr.split(delimiterStr)
                else:
                    column_temp_list = firstLineStr.split(delimiterStr)
                    maxDigitInt = len(str(len(column_temp_list)))
                    for column_num in range(len(column_temp_list)):
                        digit_num = len(str(len(column_num)))
                        if digit_num != maxDigitInt:
                            diff_digit_num = maxDigitInt - digit_num
                        else:
                            diff_digit_num = 0
                            #
                        headerList.append("Column_" + ("0" * diff_digit_num) +
                                          str(column_num))
                #
                Print.phraseStr = "{Assign Key's Position} " + sourceFilenameStr
                Print.printTimeStamp()
                #
                for number in range(len(headerList)):
                    if headerList[number] not in positionDict.values():
                        positionDict.update({number: headerList[number]})
                    if headerList[number] not in relationDict.get(
                            "{key:{value:[id]}}").keys():
                        relationDict.get("{key:{value:[id]}}").update(
                            {headerList[number]: {}})
                    if headerList[number] not in relationDict.get(
                            "{key:{id:[value]}}").keys():
                        relationDict.get("{key:{id:[value]}}").update(
                            {headerList[number]: {}})

                referColumnExistBoo = False
                if referColumnNameStr != "":
                    referColumnExistBoo = True
                #
                Print.phraseStr = "{Start Conversion} " + sourceFilenameStr
                Print.printTimeStamp()
                #
                lineIdInt = 0
                CurrentlineCountInt = 0
                TotalLineCountInt = len(linesList)
                for lineStr in linesList:
                    CurrentlineCountInt = CurrentlineCountInt + 1
                    if lineStr[0] != "#":
                        #
                        wordStr = "[" + str(CurrentlineCountInt) + "/" + str(
                            TotalLineCountInt) + "]"
                        print(wordStr, end="\r")
                        #
                        tempValueList = lineStr.split(delimiterStr)
                        tempKeyValueDict = dict()
                        idStr = ""
                        #
                        lineIdInt = lineIdInt + 1
                        if not referColumnExistBoo:
                            idStr = prefixStr + str(lineIdInt)
                            #
                        if len(tempValueList) == len(positionDict.keys()):
                            for number in range(len(list(
                                    positionDict.keys()))):
                                keyStr = positionDict.get(number)
                                tempKeyValueDict.update(
                                    {keyStr: tempValueList[number]})
                                if keyStr == referColumnNameStr and referColumnExistBoo:
                                    idStr = tempValueList[number]
                                    #
                            if idStr != "":
                                targetValueDict = idKeyValueDict.get(
                                    idStr, dict())
                                for keyStr in tempKeyValueDict.keys():
                                    tempList = targetValueDict.get(
                                        keyStr, list())
                                    tempList.append(tempKeyValueDict[keyStr])
                                    targetValueDict.update(
                                        {keyStr: list(set(tempList))})
                                    #
                                idKeyValueDict.update({idStr: targetValueDict})
                            else:
                                # print(lineStr)
                                Print.phraseStr = "[{}/{}] Line without id".format(
                                    str(CurrentlineCountInt),
                                    str(TotalLineCountInt))
                                Print.printPhrase()
                        else:
                            print('line: ' + str(lineIdInt))
                #
                Print.phraseStr = "{Rearrange Relation Dict.} " + sourceFilenameStr
                Print.printTimeStamp()
                #
                MakingRelation = relationGeneration()
                MakingRelation.inputDict = dict()
                MakingRelation.inputDict.update(idKeyValueDict)
                MakingRelation.logFilenameStr = self.logFilenameStr + "-relation"
                MakingRelation.generating()
                relationDict = MakingRelation.outputDict
                #
                with open(resultFilenameStr, "w") as result_file_handle:
                    json.dump(idKeyValueDict,
                              result_file_handle,
                              indent=4,
                              sort_keys=True)

                filenameStr = resultFilenameStr.replace(
                    ".json", "-KeyValueIdDict.json")
                with open(filenameStr, "w") as relation_file_handle:
                    json.dump(relationDict["{key:{value:[id]}}"],
                              relation_file_handle,
                              indent=4,
                              sort_keys=True)

                filenameStr = resultFilenameStr.replace(
                    ".json", "-KeyIdValueDict.json")
                with open(filenameStr, "w") as relation_file_handle:
                    json.dump(relationDict["{key:{id:[value]}}"],
                              relation_file_handle,
                              indent=4,
                              sort_keys=True)

                filenameStr = resultFilenameStr.replace(
                    ".json", "-KeyMetadata.json")
                with open(filenameStr, "w") as relation_file_handle:
                    json.dump(relationDict["metadata"],
                              relation_file_handle,
                              indent=4,
                              sort_keys=True)

        Print.stopLog()
Ejemplo n.º 12
0
    def generating(self):
        Print = libPrint.timer()
        Print.logFilenameStr = self.logFilenameStr
        Print.folderStr = self.folderStr
        Print.testingBool = self.testingBool
        Print.startLog()

        valueIdDict = dict()
        idValueDict = dict()
        metaDict = dict()

        for idStr in list(self.inputDict.keys()):
            keyValueDict = self.inputDict.get(idStr)

            for keyStr in list(keyValueDict.keys()):
                sourceValueList = keyValueDict.get(keyStr)
                for valueStr in sourceValueList:
                    tempValueIdDict = valueIdDict.get(keyStr, {})
                    tempIdValueDict = idValueDict.get(keyStr, {})

                    valueList = tempIdValueDict.get(idStr, [])
                    valueList.append(valueStr)
                    tempIdValueDict.update({idStr: valueList})

                    idList = tempValueIdDict.get(valueStr, [])
                    idList.append(idStr)
                    tempValueIdDict.update({valueStr: idList})

                    valueIdDict.update({keyStr: tempValueIdDict})
                    idValueDict.update({keyStr: tempIdValueDict})

        targetValueIdDict = dict()
        keyValueCountDict = dict()
        for keyStr in valueIdDict.keys():
            targetValueDict = dict()
            valueIdCountDict = dict()
            for valueStr in valueIdDict[keyStr].keys():
                idList = valueIdDict[keyStr][valueStr]
                targetSet = sorted(list(set(idList)))
                if targetSet != [""] and idList != []:
                    targetValueDict.update({valueStr: targetSet})
                    #
                    valueCountInt = valueIdCountDict.get(len(targetSet), 0)
                    valueCountInt = valueCountInt + 1
                    valueIdCountDict.update({len(targetSet): valueCountInt})
            targetValueIdDict.update({keyStr: targetValueDict})
            keyValueCountDict.update({keyStr: valueIdCountDict})

        targetIdValueDict = dict()
        keyIdCountDict = dict()
        for keyStr in idValueDict.keys():
            targetIdDict = dict()
            idValueCountDict = dict()
            for idStr in idValueDict[keyStr].keys():
                valueList = idValueDict[keyStr][idStr]
                targetSet = sorted(list(set(valueList)))
                if targetSet != [""] and valueList != []:
                    targetIdDict.update({idStr: targetSet})
                    #
                    idCountInt = idValueCountDict.get(len(targetSet), 0)
                    idCountInt = idCountInt + 1
                    idValueCountDict.update({len(targetSet): idCountInt})
            targetIdValueDict.update({keyStr: targetIdDict})
            keyIdCountDict.update({keyStr: idValueCountDict})

        metaDict.update({"count(id):valueAmount": keyValueCountDict})
        metaDict.update({"count(value):idAmount": keyIdCountDict})
        self.outputDict = {
            "{key:{value:[id]}}": targetValueIdDict,
            "{key:{id:[value]}}": targetIdValueDict,
            "metadata": metaDict,
        }

        Print.stopLog()