Esempio n. 1
0
    def aligning(self):
        BinHISAT2 = libConfig.config()
        BinHISAT2.queryStr = "binHISAT2-RUN"
        BinHISAT2.folderStr = "config/"
        BinHISAT2.modeStr = "UPDATE"
        BinHISAT2.load()

        SAMconvert = libConfig.config()
        SAMconvert.queryStr = "binSAMtools-CONVERT"
        SAMconvert.folderStr = "config/"
        SAMconvert.modeStr = "UPDATE"
        SAMconvert.load()

        SAMsort = libConfig.config()
        SAMsort.queryStr = "binSAMtools-SORT"
        SAMsort.folderStr = "config/"
        SAMsort.modeStr = "UPDATE"
        SAMsort.load()

        Remove = libConfig.config()
        Remove.queryStr = "commandRM"
        Remove.folderStr = "config/"
        Remove.modeStr = "UPDATE"
        Remove.load()

        expRep = libConfig.config()
        expRep.queryStr = self.queryStr
        expRep.folderStr = "config/"
        expRep.modeStr = "UPDATE"
        expRep.load()

        branchStr = expRep.storeDict["branch"]
        pairPostfixStr = expRep.storeDict["pairPostfix"]
        unpairPostfixStr = expRep.storeDict["unpairPostfix"]
        groupList = expRep.storeDict["group"]
        modeStr = expRep.storeDict["mode"]
        replicationList = expRep.storeDict["replication"]
        conditionList = expRep.storeDict["conditionsList"]
        for conditionDict in conditionList:
            annotateConditionStr = conditionDict["genome"]
            trimConditionStr = conditionDict["trim"]
            hisat2ConditionStr = conditionDict["map"]
            directionDict = expRep.storeDict["[hisat2]direction"]

            fileTypeStr = expRep.storeDict["[trim]fileType"]
            inputFileNameStr = expRep.storeDict["[hisat2]inputFileName"]
            outputFolderStr = expRep.storeDict["[hisat2]outputFolder"]
            outputFileNameStr = expRep.storeDict["[hisat2]outputFileName"]

            if not expRep.storeDict.get("testing", True):
                self.testingBool = False
            else:
                self.testingBool = True

            finalOutputFolderStr = outputFolderStr.format(
                annotate=annotateConditionStr, trim=trimConditionStr)
            pathlib.Path(finalOutputFolderStr).mkdir(parents=True,
                                                     exist_ok=True)

            Print = libPrint.timer()
            Print.logFilenameStr = "04-hs1-hisat2-{branch}-{hisat2cond}-{annotateCon}-{trimCon}".format(
                branch=branchStr,
                hisat2cond=hisat2ConditionStr,
                annotateCon=annotateConditionStr,
                trimCon=trimConditionStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()
            for groupStr in groupList:
                for replicationStr in replicationList:
                    finalDict = dict()

                    Para = libConfig.config()  #parameters
                    Para.queryStr = hisat2ConditionStr
                    Para.folderStr = "config/"
                    Para.modeStr = "UPDATE"
                    Para.load()
                    finalDict.update(Para.storeDict)

                    Spec = libConfig.config()  #parameters
                    Spec.queryStr = annotateConditionStr
                    Spec.folderStr = "config/"
                    Spec.modeStr = "UPDATE"
                    Spec.load()

                    finalDict.update(
                        {"indexHeader": Spec.storeDict["indexHeader"]})

                    if modeStr == "pairEnd":
                        pairForwardDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['1'],
                            "pairType": pairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        pairReverseDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['2'],
                            "pairType": pairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        unpairForwardDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['1'],
                            "pairType": unpairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                        unpairReverseDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "direction": directionDict['2'],
                            "pairType": unpairPostfixStr,
                            "fileType": fileTypeStr,
                        }
                    elif modeStr == "singleEnd":
                        unpairDict = {
                            "trim": trimConditionStr,
                            "group": groupStr,
                            "replication": replicationStr,
                            "fileType": fileTypeStr,
                        }

                    samDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": ".sam",
                    }
                    samFileStr = outputFileNameStr.format(**samDict)
                    bamDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": ".bam",
                    }
                    bamFileStr = outputFileNameStr.format(**bamDict)
                    sortedBAMDict = {
                        "annotate": annotateConditionStr,
                        "trim": trimConditionStr,
                        "hisat2Condition": hisat2ConditionStr,
                        "group": groupStr,
                        "replication": replicationStr,
                        "fileType": "-sorted.bam",
                    }
                    sortedBAMFileStr = outputFileNameStr.format(
                        **sortedBAMDict)

                    if pathlib.Path(samFileStr).exists():
                        Print.phraseStr = "SAM File existed: " + samFileStr
                        Print.printTimeStamp()
                    elif not pathlib.Path(samFileStr).exists(
                    ) and not pathlib.Path(bamFileStr).exists(
                    ) and not pathlib.Path(sortedBAMFileStr).exists():
                        if modeStr == "pairEnd":
                            commandStr = BinHISAT2.storeDict.get(
                                "command-PE", "")
                            finalDict.update({
                                "pairForwardFASTQ":
                                inputFileNameStr.format(**pairForwardDict),
                                "pairReverseFASTQ":
                                inputFileNameStr.format(**pairReverseDict),
                                "unpairForwardFASTQ":
                                inputFileNameStr.format(**unpairForwardDict),
                                "unpairReverseFASTQ":
                                inputFileNameStr.format(**unpairReverseDict),
                                "outputSAM":
                                samFileStr
                            })
                            finalCommandStr = commandStr.format(**finalDict)
                            Print.phraseStr = finalCommandStr
                            Print.runCommand()
                        elif modeStr == "singleEnd":
                            commandStr = BinHISAT2.storeDict.get(
                                "command-SE", "")
                            finalDict.update({
                                "unpairFASTQ":
                                inputFileNameStr.format(**unpairDict),
                                "outputSAM":
                                samFileStr
                            })
                            finalCommandStr = commandStr.format(**finalDict)
                            Print.phraseStr = finalCommandStr
                            Print.runCommand()

                    if pathlib.Path(bamFileStr).exists():
                        Print.phraseStr = "BAM File existed: " + bamFileStr
                        Print.printTimeStamp()
                    elif not pathlib.Path(bamFileStr).exists(
                    ) and not pathlib.Path(sortedBAMFileStr).exists():
                        commandStr = SAMconvert.storeDict.get("command", "")
                        finalDict.update({
                            "outputBAM": bamFileStr,
                            "inputSAM": samFileStr,
                        })
                        finalCommandStr = commandStr.format(**finalDict)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(samFileStr).exists() and pathlib.Path(
                            bamFileStr).exists():
                        commandStr = Remove.storeDict.get("command", "")
                        finalCommandStr = commandStr.format(target=samFileStr)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(sortedBAMFileStr).exists():
                        Print.phraseStr = "Sorted BAM File existed: " + sortedBAMFileStr
                        Print.printTimeStamp()
                    else:
                        commandStr = SAMsort.storeDict.get("command", "")
                        finalDict.update({
                            "outputBAM": sortedBAMFileStr,
                            "inputBAM": bamFileStr,
                        })
                        finalCommandStr = commandStr.format(**finalDict)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

                    if pathlib.Path(bamFileStr).exists() and pathlib.Path(
                            sortedBAMFileStr).exists():
                        commandStr = Remove.storeDict.get("command", "")
                        finalCommandStr = commandStr.format(target=bamFileStr)
                        Print.phraseStr = finalCommandStr
                        Print.runCommand()

            Print.stopLog()
Esempio n. 2
0
    def trimming(self):
        # ---- Parameter ----
        BinTrim = libConfig.config()
        BinTrim.queryStr = "binTrimmomatic"
        BinTrim.folderStr = "config/"
        BinTrim.modeStr = "UPDATE"
        BinTrim.load()

        ExpRep = libConfig.config()
        ExpRep.queryStr = self.queryStr
        ExpRep.folderStr = "config/"
        ExpRep.modeStr = "UPDATE"
        ExpRep.load()

        # ---- Initialization ----
        commandStr = BinTrim.storeDict["command"]

        conditionList = ExpRep.storeDict.get("conditionsList", [])
        groupList = ExpRep.storeDict.get("group", [])
        replicationList = ExpRep.storeDict.get("replication", [])
        directionList = ExpRep.storeDict.get("direction", [])

        branchStr = ExpRep.storeDict.get("branch", "")
        pairStr = ExpRep.storeDict.get("pairPostfix", "")
        unpairStr = ExpRep.storeDict.get("unpairPostfix", "")
        modeStr = ExpRep.storeDict.get("mode", "")

        inputFileNameStr = ExpRep.storeDict.get("[trim]inputFileName", "")
        outputFileNameStr = ExpRep.storeDict.get("[trim]outputFileName", "")
        fileTypeStr = ExpRep.storeDict.get("[trim]fileType", "")
        checkFolderList = ExpRep.storeDict.get("checkFolder", [])

        if not ExpRep.storeDict.get("testing", True):
            testingBool = False
        else:
            testingBool = True

        # ---- Action ----
        for folderStr in checkFolderList:
            pathlib.Path(folderStr).mkdir(parents=True, exist_ok=True)

        if type(conditionList) == type(list()) and conditionList != []:
            for conditionDict in conditionList:
                conditionStr = conditionDict['trim']
                Print = libPrint.timer()
                Print.logFilenameStr = "03-trim-{branch}-{cond}".format(
                    branch=branchStr, cond=conditionStr)
                Print.folderStr = "log/"
                Print.testingBool = testingBool
                Print.startLog()

                TrimPara = libConfig.config()
                TrimPara.queryStr = conditionStr
                TrimPara.folderStr = "config/"
                TrimPara.modeStr = "UPDATE"
                TrimPara.load()
                headerStr = TrimPara.storeDict.get('header', "")

                for groupStr in groupList:
                    for replicationStr in replicationList:
                        if modeStr == "pairEnd":
                            inputFileList = list()
                            outputFileList = list()
                            for directionStr in directionList:
                                inputStr = inputFileNameStr.format(
                                    group=groupStr,
                                    replication=replicationStr,
                                    direction=directionStr,
                                    fileType=fileTypeStr)
                                inputFileList.append(inputStr)
                                outputPairStr = outputFileNameStr.format(
                                    condition=headerStr,
                                    direction=directionStr,
                                    group=groupStr,
                                    replication=replicationStr,
                                    pairType=pairStr,
                                    fileType=fileTypeStr,
                                )
                                outputFileList.append(outputPairStr)
                                outputUnPairStr = outputFileNameStr.format(
                                    condition=headerStr,
                                    direction=directionStr,
                                    group=groupStr,
                                    replication=replicationStr,
                                    pairType=unpairStr,
                                    fileType=fileTypeStr,
                                )
                                outputFileList.append(outputUnPairStr)

                            fileList = inputFileList + outputFileList
                            fileStr = " ".join(fileList)

                            commandDict = dict()
                            commandDict.update(TrimPara.storeDict)
                            commandDict.update({
                                'files': fileStr,
                                'mode': "PE",
                            })
                            CommandStr = commandStr.format(**commandDict)
                            Print.phraseStr = CommandStr
                            Print.runCommand()

                        elif modeStr == "singleEnd":
                            inputStr = inputFileNameStr.format(
                                group=groupStr,
                                replication=replicationStr,
                                fileType=fileTypeStr)
                            outputStr = outputFileNameStr.format(
                                condition=headerStr,
                                group=groupStr,
                                replication=replicationStr,
                                fileType=fileTypeStr,
                            )

                            fileStr = "{} {}".format(inputStr, outputStr)

                            commandDict = dict()
                            commandDict.update(TrimPara.storeDict)
                            commandDict.update({
                                'files': fileStr,
                                'mode': "SE",
                            })
                            CommandStr = commandStr.format(**commandDict)
                            Print.phraseStr = CommandStr
                            Print.runCommand()

                Print.stopLog()
Esempio n. 3
0
    def importingStringtie(self):
        # ---- Initialization for Converting ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = self.branchStr

        controlStr = Target.storeDict.get("controlSample", "")
        controlSafeStr = controlStr.replace("-", "_")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        patternStr = Target.storeDict.get("samplePattern", "")

        sampleList = list()
        for groupStr in groupList:
            for replicationStr in replicationList:
                sampleList.append(
                    patternStr.format(group=groupStr,
                                      replication=replicationStr))

        conditionList = Target.storeDict.get("conditionList", [])
        methodList = Target.storeDict.get("methodList", [])
        geneSourceDict = Target.storeDict.get("[sqlite]geneSourceDict", dict())
        transcriptSourceDict = Target.storeDict.get(
            "[sqlite]transcriptSourceDict", dict())

        geneExpPathStr = Target.storeDict.get("[sqlite]geneSourcePathStr", "")
        transcriptExpPathStr = Target.storeDict.get(
            "[sqlite]transcriptSourcePathStr", "")
        sqlFolderStr = Target.storeDict.get("sqlFolderStr", "")
        sqlPathStr = Target.storeDict.get("sqlPathStr", "")
        sqlLogStr = Target.storeDict.get("[sqlite]logFilename", "")

        for methodStr in methodList:
            pathlib.Path(
                sqlFolderStr.format(branch=branchStr,
                                    method=methodStr)).mkdir(parents=True,
                                                             exist_ok=True)
            geneFolderStr = geneSourceDict.get(methodStr, "")
            transcriptFolderStr = transcriptSourceDict.get(methodStr, "")
            compareSet = set()

            for conditionTup in conditionList:
                antStr = conditionTup[0]
                trimStr = conditionTup[1]

                Print = libPrint.timer()
                Print.logFilenameStr = sqlLogStr.format(ant=antStr,
                                                        trim=trimStr)
                Print.folderStr = sqlFolderStr.format(branch=branchStr,
                                                      method=methodStr)
                Print.testingBool = self.testingBool
                Print.startLog()

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    Print.phraseStr = "-- Data format conversion for Gene Expression in {} --".format(
                        sampleStr)
                    Print.printTimeStamp()
                    geneSamplePath = geneExpPathStr.format(
                        folder=geneFolderStr,
                        branch=branchStr,
                        ant=antStr,
                        trim=trimStr,
                        sample=sampleStr)
                    sampleDF = pd.read_csv(geneSamplePath,
                                           delimiter="\t",
                                           header=0)
                    Print.printing("[Pandas:Read]" + geneSamplePath)

                    rowList = sampleDF.values.tolist()
                    countInt = len(rowList)

                    # check
                    compareList = list()
                    for rowInt in range(countInt):
                        insertList = []
                        sourceList = rowList[rowInt]
                        insertList.append("UUID." + str(rowInt))
                        insertList.extend(sourceList[0:7])
                        compareStr = "\t".join([str(x) for x in insertList])
                        compareList.append(compareStr)

                    if compareSet == set():
                        Print.printing("    " + sampleStr + ": Empty")
                        compareSet = set(compareList)
                    elif compareSet != set(compareList):
                        Print.printing("    " + sampleStr + ": Same")
                    elif compareSet == set(compareList):
                        Print.printing("    " + sampleStr + ": Different")

                    sqlPath = sqlPathStr.format(branch=branchStr,
                                                method=methodStr,
                                                ant=antStr,
                                                trim=trimStr)
                    createCommandStr = """CREATE TABLE GeneExpression_{}
                        ('UUID'  TEXT    PRIMARY KEY NOT NULL, 
                        'GeneID'   TEXT    NOT NULL,
                        'GeneName' TEXT    NOT NULL, 
                        'Reference' TEXT    NOT NULL, 
                        'Strand'    TEXT    NOT NULL, 
                        'Start' INTEGER NOT NULL, 
                        'End'   INTEGER NOT NULL, 
                        'Coverage'  REAL    NOT NULL, 
                        'FPKM'  REAL    NOT NULL, 
                        'TPM'   REAL    NOT NULL);""".format(sampleSafeStr)
                    insertCommandStr = "INSERT INTO GeneExpression_{} ('UUID','GeneID','GeneName','Reference','Strand','Start','End','Coverage','FPKM','TPM')\
                        VALUES (?,?,?,?,?,?,?,?,?,?)".format(sampleSafeStr)
                    self.expressionInputDict = {
                        "sqlPath": sqlPath,
                        "count": countInt,
                        "rowList": rowList,
                        "createCommand": createCommandStr,
                        "insertCommand": insertCommandStr,
                    }
                    self.exportingExpression(Print)
                    # Transcript
                    Print.phraseStr = "-- Data format conversion for Transcript Expression in {} --".format(
                        sampleStr)
                    Print.printTimeStamp()

                    transcriptSamplePath = transcriptExpPathStr.format(
                        folder=transcriptFolderStr,
                        branch=branchStr,
                        ant=antStr,
                        trim=trimStr,
                        sample=sampleStr)
                    sampleDF = pd.read_csv(transcriptSamplePath,
                                           delimiter="\t",
                                           header=0)
                    Print.printing("[Pandas:Read] " + transcriptSamplePath)

                    rowList = sampleDF.values.tolist()
                    countInt = len(rowList)

                    # check
                    compareList = list()
                    for rowInt in range(countInt):
                        insertList = []
                        sourceList = rowList[rowInt]
                        insertList.append("UUID." + str(rowInt))
                        insertList.extend(sourceList[0:10])
                        compareStr = "\t".join([str(x) for x in insertList])
                        compareList.append(compareStr)

                    if compareSet == set():
                        Print.printing("    " + sampleStr + ": Empty")
                        compareSet = set(compareList)
                    elif compareSet != set(compareList):
                        Print.printing("    " + sampleStr + ": Same")
                    elif compareSet == set(compareList):
                        Print.printing("    " + sampleStr + ": Different")

                    sqlPath = sqlPathStr.format(branch=branchStr,
                                                method=methodStr,
                                                ant=antStr,
                                                trim=trimStr)
                    createCommandStr = """CREATE TABLE TranscriptExpression_{}
                        ('UUID'     TEXT    PRIMARY KEY NOT NULL, 
                        'TranscriptID'  INTEGER    NOT NULL,
                        'Chromosome'    TEXT, 
                        'Strand'    TEXT    NOT NULL, 
                        'Start' INTEGER NOT NULL, 
                        'End'   INTEGER NOT NULL, 
                        'TranscriptName'    TEXT    NOT NULL, 
                        'ExonCount'    INTEGER  NOT NULL, 
                        'Length'    INTEGER  NOT NULL, 
                        'GeneID'    TEXT  NOT NULL, 
                        'GeneName'    TEXT  NOT NULL, 
                        'Coverage'  REAL    NOT NULL, 
                        'FPKM'  REAL    NOT NULL);""".format(sampleSafeStr)
                    insertCommandStr = "INSERT INTO TranscriptExpression_{} ('UUID','TranscriptID','Chromosome','Strand','Start','End','TranscriptName','ExonCount','Length','GeneID','GeneName','Coverage','FPKM')\
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)".format(
                        sampleSafeStr)
                    self.expressionInputDict = {
                        "sqlPath": sqlPath,
                        "count": countInt,
                        "rowList": rowList,
                        "createCommand": createCommandStr,
                        "insertCommand": insertCommandStr,
                    }
                    self.exportingExpression(Print)

                Print.phraseStr = "-- Summarising for Gene Expression --"
                Print.printTimeStamp()

                createComStr = "CREATE TABLE GeneExpressionSummary ({})"
                createColumnList = [
                    "'UUID'  TEXT PRIMARY KEY NOT NULL",
                    "'GeneID' TEXT",
                    "'GeneName' TEXT",
                ]
                insertColumnList = ["UUID", "GeneID", "GeneName"]
                for targetStr in ["FPKM", "TPM"]:
                    for sampleStr in sampleList:
                        sampleSafeStr = sampleStr.replace("-", "_")
                        columnStr = "{target}_{sample} REAL".format(
                            target=targetStr, sample=sampleSafeStr)
                        createColumnList.append(columnStr)
                        insertColumnList.append("{target}_{sample}".format(
                            target=targetStr, sample=sampleSafeStr))

                Connect = sqlite3.connect(sqlPath)
                Cursor = Connect.cursor()
                ReturnMsg = Cursor.execute(
                    createComStr.format(",".join(createColumnList)))  # pylint: disable=unused-variable
                Connect.commit()
                Print.printing("[SQLite3:CreateTable] " + sqlPath)

                resultDict = dict()
                controlExc = Cursor.execute(
                    "SELECT UUID, GeneID, GeneName from GeneExpression_{}".
                    format(controlSafeStr))
                for rowList in controlExc:
                    uuid, geneid, genename = rowList
                    subDict = {
                        "UUID": uuid,
                        "GeneID": geneid,
                        "GeneName": genename
                    }
                    resultDict.update({uuid: subDict})

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    sampleExc = Cursor.execute(
                        "SELECT UUID, FPKM, TPM  from GeneExpression_{}".
                        format(sampleSafeStr))
                    for rowList in sampleExc:
                        uuid, fpkm, tpm = rowList
                        subDict = resultDict[uuid]
                        subDict.update({
                            "FPKM_{}".format(sampleSafeStr): fpkm,
                            "TPM_{}".format(sampleSafeStr): tpm
                        })
                        resultDict.update({uuid: subDict})

                insertComStr = "INSERT INTO GeneExpressionSummary ({column}) VALUES ({value})"
                for uuid in resultDict.keys():
                    valueList = list()
                    for posInt in range(len(insertColumnList)):
                        valueList.append(
                            resultDict[uuid][insertColumnList[posInt]])

                    insertCommand = insertComStr.format(
                        column=",".join(insertColumnList),
                        value=(("?," * (len(valueList) - 1))) + "?")
                    ReturnMsg = Cursor.execute(insertCommand, valueList)

                Connect.commit()
                Print.printing("[SQLite3:Insert] " + sqlPath)
                Connect.close()
                Print.printing("[SQLite3:Close]\n")

                # Transcript
                Print.phraseStr = "-- Summarising for Gene Expression --"
                Print.printTimeStamp()

                createComStr = "CREATE TABLE TranscriptExpressionSummary ({})"
                createColumnList = [
                    "'UUID'  TEXT PRIMARY KEY NOT NULL",
                    "'TranscriptID' INTEGER",
                    "'TranscriptName' TEXT",
                    "'GeneID' TEXT",
                    "'GeneName' TEXT",
                ]
                insertColumnList = [
                    "UUID", "TranscriptID", "TranscriptName", "GeneID",
                    "GeneName"
                ]
                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    columnStr = "FPKM_{sample} REAL".format(
                        sample=sampleSafeStr)
                    createColumnList.append(columnStr)
                    insertColumnList.append(
                        "FPKM_{sample}".format(sample=sampleSafeStr))

                Connect = sqlite3.connect(sqlPath)
                Cursor = Connect.cursor()
                ReturnMsg = Cursor.execute(
                    createComStr.format(",".join(createColumnList)))
                Connect.commit()
                Print.printing("[SQLite3:CreateTable] " + sqlPath)

                resultDict = dict()
                controlExc = Cursor.execute(
                    "SELECT UUID, TranscriptID, TranscriptName, GeneID, GeneName from TranscriptExpression_{}"
                    .format(controlSafeStr))
                for rowList in controlExc:
                    uuid, tid, tname, geneid, genename = rowList
                    subDict = {
                        "UUID": uuid,
                        "TranscriptID": tid,
                        "TranscriptName": tname,
                        "GeneID": geneid,
                        "GeneName": genename
                    }
                    resultDict.update({uuid: subDict})

                for sampleStr in sampleList:
                    sampleSafeStr = sampleStr.replace("-", "_")
                    sampleExc = Cursor.execute(
                        "SELECT UUID, FPKM  from TranscriptExpression_{}".
                        format(sampleSafeStr))
                    for rowList in sampleExc:
                        uuid, fpkm = rowList
                        subDict = resultDict[uuid]
                        subDict.update({"FPKM_{}".format(sampleSafeStr): fpkm})
                        resultDict.update({uuid: subDict})

                insertComStr = "INSERT INTO TranscriptExpressionSummary ({column}) VALUES ({value})"
                for uuid in resultDict.keys():
                    valueList = list()
                    for posInt in range(len(insertColumnList)):
                        valueList.append(
                            resultDict[uuid][insertColumnList[posInt]])

                    insertCommand = insertComStr.format(
                        column=",".join(insertColumnList),
                        value=(("?," * (len(valueList) - 1))) + "?")
                    ReturnMsg = Cursor.execute(insertCommand, valueList)

                Connect.commit()
                Print.printing("[SQLite3:Insert] " + sqlPath)
                Connect.close()
                Print.printing("[SQLite3:Close]\n")

                Print.stopLog()
Esempio n. 4
0
    def summaring(self):
        FLAGstat = libConfig.config()
        FLAGstat.queryStr = "binSAMtools-FLAGSTAT"
        FLAGstat.folderStr = "config/"
        FLAGstat.modeStr = "UPDATE"
        FLAGstat.load()

        expRep = libConfig.config()
        expRep.queryStr = self.branchStr
        expRep.folderStr = "config/"
        expRep.modeStr = "UPDATE"
        expRep.load()

        trimConditionList = expRep.storeDict.get("[trim]condition", [])
        hisat2ConditionList = expRep.storeDict.get("[hisat2]Condition", [])
        annotateConditionList = expRep.storeDict.get("conditionList", [])
        groupList = expRep.storeDict.get("group", [])
        replicationList = expRep.storeDict.get("replication", [])

        outputFolderStr = expRep.storeDict.get("[hisat2]outputFolder", "")
        outputFileNameStr = expRep.storeDict.get("[hisat2]outputFileName", "")

        if not expRep.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for trimConditionStr in trimConditionList:
            for conditionList in annotateConditionList:
                annotateConditionStr = conditionList[0]
                finalOutputFolderStr = outputFolderStr.format(
                    annotateCondition=annotateConditionStr,
                    trimCondition=trimConditionStr)
                pathlib.Path(finalOutputFolderStr).mkdir(parents=True,
                                                         exist_ok=True)

                for hisat2ConditionStr in hisat2ConditionList:
                    Print = libPrint.timer()
                    Print.logFilenameStr = "04-hs2-hisat2-{hisat2cond}-{annotateCon}-{trimCon}".format(
                        hisat2cond=hisat2ConditionStr,
                        annotateCon=annotateConditionStr,
                        trimCon=trimConditionStr,
                    )
                    Print.folderStr = "log/"
                    Print.testingBool = self.testingBool
                    Print.startLog()

                    for groupStr in groupList:
                        for replicationStr in replicationList:
                            sortedBAMDict = {
                                "annotateCondition": annotateConditionStr,
                                "trimCondition": trimConditionStr,
                                "hisat2Condition": hisat2ConditionStr,
                                "group": groupStr,
                                "replication": replicationStr,
                                "fileType": "-sorted.bam",
                            }
                            sortedBAMFileStr = outputFileNameStr.format(
                                **sortedBAMDict)

                            if pathlib.Path(sortedBAMFileStr).exists():
                                commandStr = FLAGstat.storeDict.get(
                                    "command", "")
                                finalCommandStr = commandStr.format(
                                    BAMfile=sortedBAMFileStr)
                                Print.phraseStr = finalCommandStr
                                Print.runCommand()

                    Print.stopLog()
Esempio n. 5
0
    def assembling(self):
        # ---- Parameter for Assembling ----
        if self.withoutAnnotation:
            BinMap = libConfig.config()
            BinMap.queryStr = "binStringTie-RUN-withoutAnnotation"
            BinMap.folderStr = "config/"
            BinMap.modeStr = "UPDATE"
            BinMap.load()
        else:
            BinMap = libConfig.config()
            BinMap.queryStr = "binStringTie-RUN"
            BinMap.folderStr = "config/"
            BinMap.modeStr = "UPDATE"
            BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = Target.storeDict.get("branch", "")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        hisat2ConditionStr = Target.storeDict.get("[hisat2]Condition", "")
        conditionList = Target.storeDict.get("conditionList", [])
        inputFileNameStr = Target.storeDict.get(
            "[{}]inputFileName".format(self.headerStr), "")
        outputFileNameStr = Target.storeDict.get(
            "[{}]outputFileName".format(self.headerStr), "")
        outputFolderStr = Target.storeDict.get(
            "[{}]outputFolder".format(self.headerStr), "")

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for conditionTup in conditionList:
            antCondStr = conditionTup[0]
            trimCondStr = conditionTup[1]

            Annotate = libConfig.config()
            Annotate.queryStr = antCondStr
            Annotate.folderStr = "config/"
            Annotate.modeStr = "UPDATE"
            Annotate.load()

            threadStr = Annotate.storeDict.get("thread", "")
            antPathStr = Annotate.storeDict.get("antPath", "")

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "05-{stringtie}-assembling-{branch}-{annotate}-{trim}".format(
                stringtie=self.headerStr,
                branch=branchStr,
                annotate=antCondStr,
                trim=trimCondStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            for groupStr in groupList:
                for repliStr in replicationList:
                    outputFolderStr = outputFolderStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr)
                    pathlib.Path(outputFolderStr).mkdir(parents=True,
                                                        exist_ok=True)

                    outputFilenameStr = outputFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    inputFilenameStr = inputFileNameStr.format(
                        annotateCondition=antCondStr,
                        hisat2Condition=hisat2ConditionStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    CommandStr = commandStr.format(
                        bamfile=inputFilenameStr,
                        outputfile=outputFilenameStr,
                        thread=threadStr,
                        antPath=antPathStr)

                    Print.phraseStr = CommandStr
                    Print.runCommand()

            Print.stopLog()
Esempio n. 6
0
    def estimating(self):
        # ---- Parameter for Assembling ----
        BinMap = libConfig.config()
        BinMap.queryStr = "binStringTie-ESTIMATE"
        BinMap.folderStr = "config/"
        BinMap.modeStr = "UPDATE"
        BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        branchStr = Target.storeDict.get("branch", "")
        groupList = Target.storeDict.get("group", [])
        replicationList = Target.storeDict.get("replication", [])
        hisat2ConditionStr = Target.storeDict.get("[hisat2]Condition", "")
        conditionList = Target.storeDict.get("conditionList", [])
        inputFileNameStr = Target.storeDict.get(
            "[{}]inputFileName".format(self.headerStr), "")
        mergedFileNameStr = Target.storeDict.get(
            "[{}]mergedFileName".format(self.headerStr), "")
        balgownFolderStr = Target.storeDict.get(
            "[{}]ballgownFolder".format(self.headerStr), "")
        gtfFileNameStr = Target.storeDict.get(
            "[{}]gtfFileName".format(self.headerStr), "")
        tsvFileNameStr = Target.storeDict.get(
            "[{}]tsvFileName".format(self.headerStr), "")

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        for conditionTup in conditionList:
            antCondStr = conditionTup[0]
            trimCondStr = conditionTup[1]

            Annotate = libConfig.config()
            Annotate.queryStr = antCondStr
            Annotate.folderStr = "config/"
            Annotate.modeStr = "UPDATE"
            Annotate.load()

            threadStr = Annotate.storeDict.get("thread", "")
            antPathStr = Annotate.storeDict.get("antPath", "")

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "05-{stringtie}-estimating-{branch}-{annotate}-{trim}".format(
                stringtie=self.headerStr,
                branch=branchStr,
                annotate=antCondStr,
                trim=trimCondStr,
            )
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            for groupStr in groupList:
                for repliStr in replicationList:
                    ballgownPathStr = balgownFolderStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)
                    pathlib.Path(ballgownPathStr).mkdir(parents=True,
                                                        exist_ok=True)

                    bamPathStr = inputFileNameStr.format(
                        annotateCondition=antCondStr,
                        hisat2Condition=hisat2ConditionStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    mergeFileNameStr = mergedFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr)

                    gtfPathStr = gtfFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    tsvPathStr = tsvFileNameStr.format(
                        annotateCondition=antCondStr,
                        trimCondition=trimCondStr,
                        group=groupStr,
                        replication=repliStr)

                    if self.directEstimating:
                        CommandStr = commandStr.format(
                            thread=threadStr,
                            mergePath=antPathStr,
                            bamfile=bamPathStr,
                            ballgownPath=ballgownPathStr,
                            gtffile=gtfPathStr,
                            tsvfile=tsvPathStr)
                    else:
                        CommandStr = commandStr.format(
                            thread=threadStr,
                            mergePath=mergeFileNameStr,
                            bamfile=bamPathStr,
                            ballgownPath=ballgownPathStr,
                            gtffile=gtfPathStr,
                            tsvfile=tsvPathStr)

                    Print.phraseStr = CommandStr
                    Print.runCommand()

            Print.stopLog()
Esempio n. 7
0
    def converting(self):
        # ---- Parameter ----
        BinGFF = libConfig.config()
        BinGFF.queryStr = "binCufflinks-gffread"
        BinGFF.folderStr = "config/"
        BinGFF.modeStr = "UPDATE"
        BinGFF.load()

        Copying = libConfig.config()
        Copying.queryStr = "commandCP"
        Copying.folderStr = "config/"
        Copying.modeStr = "UPDATE"
        Copying.load()

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        if not Target.storeDict.get("testing", True):
            self.testingBool = False
        else:
            self.testingBool = True

        gffreadStr = BinGFF.storeDict["command"]
        copyStr = Copying.storeDict["command"]

        branchStr = self.branchStr
        conditionsList = [
            n for n in Target.storeDict["conditionsList"]
            if n["transcriptome"] == "gffRead"
        ]
        gtfDict = Target.storeDict["gtfDict"]

        for conditionDict in conditionsList:
            genomeStr = conditionDict["genome"]
            trimStr = conditionDict["trim"]
            transcriptomeStr = conditionDict["transcriptome"]

            folderStr = gtfDict[transcriptomeStr]['folder']
            infoDict = {
                "branch": self.branchStr,
                "annotate": genomeStr,
                "trim": trimStr,
                "folder": folderStr,
            }
            targetFolderStr = Target.storeDict["transcriptomeFolder"]
            targetStr = Target.storeDict["transcriptomeGTF"]

            Spec = libConfig.config()  #parameters
            Spec.queryStr = genomeStr
            Spec.folderStr = "config/"
            Spec.modeStr = "UPDATE"
            Spec.load()

            inputStr = Spec.storeDict["antPath"]
            outputStr = Spec.storeDict["gtfPath"]
            outputFolderStr = Spec.storeDict["dbgaPath"]

            Print = libPrint.timer()
            Print.logFilenameStr = "05-gffConversion-{branch}-{annotate}".format(
                branch=branchStr,
                annotate=genomeStr,
            )
            Print.folderStr = outputFolderStr
            Print.testingBool = self.testingBool
            Print.startLog()

            targetPath = targetStr.format(**infoDict)

            if not pathlib.Path(outputStr).exists():
                CommandStr = gffreadStr.format(inputFile=inputStr,
                                               outputFile=outputStr)
                Print.phraseStr = CommandStr
                Print.runCommand()

            folderPath = targetFolderStr.format(**infoDict)
            pathlib.Path(folderPath).mkdir(parents=True, exist_ok=True)

            CommandStr = copyStr.format(output=outputStr, target=targetPath)
            Print.phraseStr = CommandStr
            Print.runCommand()

            Print.stopLog()
Esempio n. 8
0
    def diffing(self):
        # ---- Parameter for Assembling ----
        BinMap = libConfig.config()
        BinMap.queryStr = "binCuffDiff-RUN"
        BinMap.folderStr = "config/"
        BinMap.modeStr = "UPDATE"
        BinMap.load()

        commandStr = BinMap.storeDict["command"]

        # ---- Initialization for Assembling ----
        Target = libConfig.config()
        Target.queryStr = self.branchStr
        Target.folderStr = "config/"
        Target.modeStr = "UPDATE"
        Target.load()

        groupList = Target.storeDict["group"]
        replicationList = Target.storeDict["replication"]
        threadStr = Target.storeDict["thread"]
        bamFileNameStr = Target.storeDict["[hisat2]outputFileName"]
        gtfFileNameStr = Target.storeDict["transcriptomeGTF"]
        gtfDict = Target.storeDict["gtfDict"]
        resultFolderStr = Target.storeDict["[CuffDiff]resultFolder"]

        conditionsList = Target.storeDict["conditionsList"]
        for conditionDict in conditionsList:
            genomeStr = conditionDict["genome"]
            trimStr = conditionDict["trim"]
            transcriptomeStr = conditionDict["transcriptome"]
            folderStr = gtfDict[transcriptomeStr]['folder']
            hisat2ConditionStr = conditionDict["map"]

            if not Target.storeDict.get("testing", True):
                self.testingBool = False
            else:
                self.testingBool = True

            infoDict = {
                "branch": self.branchStr,
                "method": transcriptomeStr,
                "annotate": genomeStr,
                "trim": trimStr,
                "folder": folderStr,
                "hisat2Condition": hisat2ConditionStr,
                "fileType": "-sorted.bam",
            }

            # ---- Action ----
            Print = libPrint.timer()
            Print.logFilenameStr = "07-CuffDiff-{branch}-from({method})-{annotate}-{trim}".format(
                **infoDict)
            Print.folderStr = "log/"
            Print.testingBool = self.testingBool
            Print.startLog()

            resultPathStr = resultFolderStr.format(**infoDict)
            pathlib.Path(resultPathStr).mkdir(parents=True, exist_ok=True)

            gtfFileStr = gtfFileNameStr.format(**infoDict)

            bamGroupList = list()
            for groupStr in groupList:
                bamFileList = list()
                for repliStr in replicationList:
                    bamFileDict = dict()
                    bamFileDict.update(infoDict)
                    bamFileDict.update({
                        "group": groupStr,
                        "replication": repliStr,
                    })

                    bamFileStr = bamFileNameStr.format(**bamFileDict)
                    bamFileList.append(bamFileStr)

                bamGroupList.append(",".join(bamFileList))
            bamSampleStr = " ".join(bamGroupList)

            infoDict.update({
                "thread": threadStr,
                "outputFolder": resultPathStr,
                "labelList": ",".join(groupList),
                "mergedGTF": gtfFileStr,
                "bamFiles": bamSampleStr,
            })
            CommandStr = commandStr.format(**infoDict)

            Print.phraseStr = CommandStr
            Print.runCommand()

            Print.stopLog()
#!/usr/bin/env python3
import libConfig
# ---- Configuration of Indexing Conditions ----
SpeDataBase = libConfig.config()
SpeDataBase.queryStr = "speciesDatabase"
SpeDataBase.folderStr = "config/"
SpeDataBase.queryDict = {
    "from": "binHISAT2-BUILD",
    "dbgaPath": "userData/dbga-GenomeAnnotation/speciesDatabase/",
    "seqPath":
    "userData/dbgs-GenomeSequence/speciesDatabase/speciesDatabase.fn",
    "antPath":
    "userData/dbga-GenomeAnnotation/speciesDatabase/speciesDatabase.gff3",
    "gtfPath":
    "userData/dbga-GenomeAnnotation/speciesDatabase/speciesDatabase.gtf",
    "indexHeader": "largeData/02-hisat2Index/speciesDatabase",
    "checkFolder": "largeData/02-hisat2Index/",
    "thread": "6",
    "testing": False,
}
SpeDataBase.modeStr = "UPDATE"
SpeDataBase.save()

SpeDataBaseTwo = libConfig.config()
SpeDataBaseTwo.queryStr = "speciesDatabase2"
SpeDataBaseTwo.folderStr = "config/"
SpeDataBaseTwo.queryDict = {
    "from": "binHISAT2-BUILD",
    "dbgaPath": "userData/dbga-GenomeAnnotation/speciesDatabase2/",
    "seqPath":
    "userData/dbgs-GenomeSequence/speciesDatabase2/speciesDatabase2.fn",
Esempio n. 10
0
    hisat2-build [options]* <reference_in> <ht2_base>

Main arguments
    <reference_in>
        A comma-separated list of FASTA files containing the reference sequences to be 
        aligned to, or, if -c is specified, the sequences themselves. 
        E.g., <reference_in> might be chr1.fa,chr2.fa,chrX.fa,chrY.fa, or, 
        if -c is specified, this might be GGTCATCCT,ACGGGTCGT,CCGTTCTATGCGGCTTA.

    <ht2_base>
        The basename of the index files to write. 
        By default, hisat2-build writes files named NAME.1.ht2, NAME.2.ht2, NAME.3.ht2, NAME.4.ht2, 
        NAME.5.ht2, NAME.6.ht2, NAME.7.ht2, and NAME.8.ht2 where NAME is <ht2_base>.
"""

HISAT = libConfig.config()
HISAT.queryStr = "binHISAT2-BUILD"
HISAT.folderStr = "config/"
HISAT.queryDict = {
    "command": "bin/hisat2/hisat2-build -p {thread} {seqPath} {indexHeader}"
}
HISAT.modeStr = "OVERWRITE"
HISAT.save()

# ---- Configuration of Trimming Command ----
"""
FROM: http://www.usadellab.org/cms/?page=trimmomatic

java -jar <path to trimmomatic.jar> PE [-threads <threads] [-phred33 | -phred64] [-trimlog <logFile>] \
    <input 1> <input 2> <paired output 1> <unpaired output 1> <paired output 2> <unpaired output 2> \
    <step 1> ...
Esempio n. 11
0
#!/usr/bin/env python3
import libConfig
# ---- Configuration of Experiment Design ---- 
expRep = libConfig.config()
expRep.queryStr = "speciesTreatment"
expRep.folderStr = "config/"
# Arguments/Parameters below need to modify depend on 
# your experiment design and naming style
expRep.queryDict = {
    "branch" : "speciesTreatment",
    "group" : ["control","treat1","treat2","treat3","treat4"],
    "direction" : ["R1","R2"],
    "replication" : ["1","2","3"],
    "pairPostfix" : "pair",
    "unpairPostfix" : "unpair",
    "mode" : "pairEnd",
    "thread" : "6",
    "conditionsList" : [
        {
            "genome" : "speciesDatabase",
            "trim" : "trimQ30",
            "transcriptome" : "dsStringtie",
            "map" : "hisat2ForStringtie",
        },
        {
            "genome" : "speciesDatabase",
            "trim" : "trimQ30",
            "transcriptome" : "gffRead",
            "map" : "hisat2ForCufflinks",
        }
    ],