コード例 #1
0
    def __pslCheck(self):
        """Runs pslCheck program on this table and sends result to reporter's filehandle."""
        self.reporter.beginStep(self.db, self.table, "pslCheck")
        # Get baseColorUseSequence setting
        tdbCommand = ["tdbQuery", "select baseColorUseSequence from " + self.db +\
                " where track='" + self.table +"' or table='"+ self.table +"'"]
        tdbOut, tdbErr = qaUtils.runCommand(tdbCommand)
        # Turn tdbOut into array so we can extract different pieces
        tdbOut = tdbOut.replace("\n", "")
        tdbOut = tdbOut.split(" ")
        # tdbOut[1] should be what option is used for baseColorUseSequence
        # We want to fully run pslCheck for those entries with "table" and "extFile"
        if len(tdbOut) > 2:
            if tdbOut[1] == "table":
                # Get sizes of sequences in specified table
                sizesOut = qaUtils.callHgsql(
                    self.db, "select name, length(seq) from " + tdbOut[2])
            elif tdbOut[1] == "extFile":
                # Extract sizes from named seq table
                sizesOut = qaUtils.callHgsql(
                    self.db, "select acc, size from " + tdbOut[2])

            # Write itemSizes to file
            itemSizes = open("%s.sizes" % self.table, 'w')
            itemSizes.write(sizesOut)
            itemSizes.close()
            # Check if $db.sizes file exists
            if not os.path.isfile("%s.chrom.sizes" % self.db):
                # If it doens't exist, get chrom sizes from chromInfo
                chromSizesOut = qaUtils.callHgsql(self.db,
                                                  "select * from chromInfo")
                chromSizes = open("%s.chrom.sizes" % self.db, 'w')
                chromSizes.write(chromSizesOut)
                chromSizes.close()
            # Run more in-depth version of pslCheck
            command = ("pslCheck", "-querySizes=%s.sizes" % self.table ,\
                    "-targetSizes=%s.chrom.sizes" % self.db, "-db=" + self.db, self.table)
            self.reporter.writeCommand(command)
            commandOut, commandErr, commandReturnCode = qaUtils.runCommandNoAbort(
                command)
            # Write output to file
            self.reporter.fh.write(commandErr)
            # Clean up intermediate item sizes file
            os.remove("%s.sizes" % self.table)

        # For everything else, use generic set of steps
        else:
            command = ["pslCheck", "db=" + self.db, self.table]
            self.reporter.writeCommand(command)
            commandOut, commandErr, commandReturnCode = qaUtils.runCommandNoAbort(
                command)
            self.reporter.fh.write(commandErr)

        if commandReturnCode:
            self.recordError()
        else:
            self.recordPass()
        self.reporter.endStep()
コード例 #2
0
def checkGapOverlap(db, table, checkUnbridged=False):
    """Check if any items in a table over lap with bridged and unbridged gaps"""
    tableBedTemp = makeBedFromTable(db, table)

    gapOverlapOut = ""
    gapTableList = checkSplit(db, "gap")
    gapFileName = str(db) + ".gap.bed.temp"
    gapFile = open(gapFileName, 'w')
    for tbl in gapTableList:
        hgsqlGapOut = qaUtils.callHgsql(
            db, "select chrom, chromStart, chromEnd from " + tbl)
        gapFile.write(hgsqlGapOut)
    gapFile.close()

    gapOverFile = str(db) + ".gapOver.bed"
    bedIntCmd = [
        "bedIntersect", "-aHitAny", gapFileName, tableBedTemp.name, gapOverFile
    ]
    qaUtils.runCommand(bedIntCmd)

    gapOverUrls = constructOutputUrls(db, table, gapOverFile)
    gapOverlapOut += gapOverUrls
    # Remove intermediate files
    os.remove(gapOverFile)
    os.remove(gapFileName)

    # If checkUnbridged is set, then also output overlap with unbridged gaps
    if checkUnbridged == True:
        # Create temp file to store unbridged gaps
        gapUnbrFileName = str(db) + ".gap.unbr.bed.temp"
        gapUnbrFile = open(gapUnbrFileName, 'w')
        # Get unbridged gaps
        for tbl in gapTableList:
            hgsqlGapOut = qaUtils.callHgsql(
                db, "select chrom, chromStart, chromEnd from " + tbl +
                " where bridge='no'")
            gapUnbrFile.write(hgsqlGapOut)
        gapUnbrFile.close()

        gapUnbrOverFile = str(db) + ".gapOver.unbr.bed"
        bedIntCmd = [
            "bedIntersect", "-aHitAny", gapUnbrFileName, tableBedTemp.name,
            gapUnbrOverFile
        ]
        qaUtils.runCommand(bedIntCmd)

        gapUnbrOverUrls = constructOutputUrls(db, table, gapUnbrOverFile)
        gapOverlapOut += gapUnbrOverUrls
        # Remove intermediate files
        os.remove(gapUnbrOverFile)
        os.remove(gapUnbrFileName)

    return gapOverlapOut
コード例 #3
0
ファイル: pslQa.py プロジェクト: ucscGenomeBrowser/kent
    def __pslCheck(self):
        """Runs pslCheck program on this table and sends result to reporter's filehandle."""
        self.reporter.beginStep(self.db, self.table, "pslCheck")
        # Get baseColorUseSequence setting
        tdbCommand = ["tdbQuery", "select baseColorUseSequence from " + self.db +\
                " where track='" + self.table +"' or table='"+ self.table +"'"]
        tdbOut, tdbErr = qaUtils.runCommand(tdbCommand)
        # Turn tdbOut into array so we can extract different pieces
        tdbOut = tdbOut.replace("\n","")
        tdbOut = tdbOut.split(" ")
        # tdbOut[1] should be what option is used for baseColorUseSequence
        # We want to fully run pslCheck for those entries with "table" and "extFile"
        if len(tdbOut) > 2:
            if tdbOut[1] == "table":
                # Get sizes of sequences in specified table
                sizesOut = qaUtils.callHgsql(self.db, "select name, length(seq) from " + tdbOut[2])
            elif tdbOut[1] == "extFile":
                # Extract sizes from named seq table
                sizesOut = qaUtils.callHgsql(self.db, "select acc, size from " + tdbOut[2])

            # Write itemSizes to tempfile
            itemSizesTemp = tempfile.NamedTemporaryFile(mode='w')
            itemSizes = open(itemSizesTemp.name, 'w')
            itemSizes.write(sizesOut)
            itemSizes.close()
            # Write chromSizes to tempfile
            chromSizesTemp = tempfile.NamedTemporaryFile(mode='w')
            chromSizesOut = qaUtils.callHgsql(self.db, "select chrom,size from chromInfo")
            chromSizes = open(chromSizesTemp.name, 'w')
            chromSizes.write(chromSizesOut)
            chromSizes.close()

            # Run more in-depth version of pslCheck
            command = ("pslCheck", "-querySizes=" + itemSizesTemp.name ,\
                    "-targetSizes=" + chromSizesTemp.name, "-db=" + self.db, self.table)
            self.reporter.writeCommand(command)
            commandOut, commandErr, commandReturnCode = qaUtils.runCommandNoAbort(command)
            # Write output to file
            self.reporter.fh.write(commandErr)

        # For everything else, use generic set of steps
        else:
            command = ["pslCheck", "db=" + self.db, self.table]
            self.reporter.writeCommand(command)
            commandOut, commandErr, commandReturnCode = qaUtils.runCommandNoAbort(command)
            self.reporter.fh.write(commandErr)

        if commandReturnCode:
            self.recordError()
        else:
            self.recordPass()
        self.reporter.endStep()
コード例 #4
0
ファイル: trackUtils.py プロジェクト: ucscGenomeBrowser/kent
def checkSplit(db, table):
    """Check if table is split by chromosome"""
    splitCheck = qaUtils.callHgsql(db, "show tables like '" + table + "'")
    splitCheck = splitCheck.strip()

    splitTableList = []
    if splitCheck == table:
        splitTableList.append(table)
    elif splitCheck == "":
        hgsqlTblsOut = qaUtils.callHgsql(db, "show tables like 'chr%" + table + "%'")
        splitTableList = hgsqlTblsOut.split('\n')
        splitTableList.remove("")
    return splitTableList
コード例 #5
0
def checkSplit(db, table):
    """Check if table is split by chromosome"""
    splitCheck = qaUtils.callHgsql(db, "show tables like '" + table + "'")
    splitCheck = splitCheck.strip()

    splitTableList = []
    if splitCheck == table:
        splitTableList.append(table)
    elif splitCheck == "":
        hgsqlTblsOut = qaUtils.callHgsql(
            db, "show tables like 'chr%" + table + "%'")
        splitTableList = hgsqlTblsOut.split('\n')
        splitTableList.remove("")
    return splitTableList
コード例 #6
0
ファイル: tableCheck.py プロジェクト: yuzhenpeng/kent
def checkTableIndex(database, tables):
    """ Check if each table has an index or not."""
    notgbdbtablelist = tables - getGbdbTables(database, tables)
    tablelist = list()
    missing = set()
    output = []

    if not notgbdbtablelist:
        output.append("No tables require an index")
        output.append("")
        return (output, missing)

    for i in notgbdbtablelist:
        hgsqlOut = qaUtils.callHgsql(database, "show indexes from " + i)
        index = hgsqlOut.split()
        if index:
            pass
        else:
            missing.add(i)
    if missing:
        output.append("Tables missing an index:")
        for i in missing:
            output.append(i)
        output.append("")
    else:
        output.append("No missing indices")
        output.append("")

    return (output, missing)
コード例 #7
0
 def __getChromCountsFromDatabase(self):
     """Returns a list of ['chrom', 'size (bases)', 'count', 'count/megabase'] lists for table"""
     chromCol = self.__getChromCol()
     # first half of the UNION below gets rows that do not appear in table at all
     query = (
         "(SELECT chrom, size, 0 as count FROM chromInfo WHERE chrom NOT IN"
         + " (select distinct "
         + chromCol
         + " from "
         + self.table
         + "))"
         + " UNION"
         + " (SELECT inf.chrom, inf.size, count(tab."
         + chromCol
         + ") as count"
         + " FROM chromInfo as inf, "
         + self.table
         + " as tab"
         + " WHERE inf.chrom=tab."
         + chromCol
         + " GROUP BY chrom)"
     )
     hgsqlOut = qaUtils.callHgsql(self.db, query)
     rows = hgsqlOut.split("\n")  # makes list of strings with tabs embedded
     rows.remove("")  # get rid of the empty list item created by the final newline
     rowsList = []
     for row in rows:
         rowsList.append(row.split("\t"))  # result is list of ['chrom', 'size', 'count']
     for row in rowsList:
         # calculate count/size (in megabases) as floats, put in rowsList[3]
         row.append(float(row[2]) * 1000000 / float(row[1]))
     return rowsList
コード例 #8
0
ファイル: trackUtils.py プロジェクト: ucscGenomeBrowser/kent
def checkCanPack(db, table):
    """Check if a track can be set to pack"""
    visibility = "pack"
    canPackOut = qaUtils.callHgsql(db, "select canPack from trackDb where tableName='" + table + "'")

    if canPackOut != 1:
        visibility = "full"

    return visibility
コード例 #9
0
ファイル: tableCheck.py プロジェクト: yuzhenpeng/kent
def getGbdbTables(database, tableset):
    """ Remove tables that aren't pointers to Gbdb files."""
    sep = "','"
    tablestr = sep.join(tableset)
    tablestr = "'" + tablestr + "'"
    hgsqlOut = qaUtils.callHgsql(database, "select table_name from information_schema.columns where\
                                 table_name in ("+ tablestr + ") and column_name = 'fileName'")
    gbdbtableset = set(hgsqlOut.split())
    return gbdbtableset
コード例 #10
0
def checkCanPack(db, table):
    """Check if a table can be set to pack"""
    visibility = "pack"
    canPackOut = qaUtils.callHgsql(
        db, "select canPack from trackDb where tableName='" + table + "'")

    if canPackOut != 1:
        visibility = "full"

    return visibility
コード例 #11
0
ファイル: tableQa.py プロジェクト: bowhan/kent
 def __checkTableDescription(self):
     """Checks for an autoSql definition for this table in the tableDescriptions table."""
     # Ideally this would also check to see if each column in the table is described.
     self.reporter.beginStep(self.db, self.table, "checking for table descriptions")
     self.reporter.writeStepInfo()
     sqlOut = qaUtils.callHgsql(self.db, "select autoSqlDef from tableDescriptions where\
                               tableName='" + self.table + "'")
     if sqlOut.strip() == '':
         self.reporter.writeLine("No table description for " + self.db + "." + self.table)
         self.recordError()
     else:
         self.recordPass()
     self.reporter.endStep()
コード例 #12
0
ファイル: tableQa.py プロジェクト: Pfiver/RNA-Seqlyze
 def __checkTableDescription(self):
     """Checks for an autoSql definition for this table in the tableDescriptions table."""
     # Ideally this would also check to see if each column in the table is described.
     self.reporter.beginStep(self.db, self.table,
                             "checking for table descriptions")
     self.reporter.writeStepInfo()
     error = False
     sqlOut = qaUtils.callHgsql(
         self.db, "select autoSqlDef from tableDescriptions where\
                               tableName='" + self.table + "'")
     if sqlOut.strip() == '':
         self.reporter.writeLine("ERROR: No table description for " +
                                 self.db + "." + self.table)
         error = True
     self.recordPassOrError(error)
     self.reporter.endStep()
コード例 #13
0
def makeBedFromTable(db, table):
    """Make BED3 tempfile out of a table"""
    # Get chrom, start, and end column names
    trackType = tableTypeUtils.getTrackType(db, table)
    columnNames = tableTypeUtils.getChrStartEndCols(trackType)

    tableList = checkSplit(db, table)
    tableBedTemp = tempfile.NamedTemporaryFile(mode='w')
    tableBed = open(tableBedTemp.name, 'w')
    for tbl in tableList:
        hgsqlCmd = "select " + columnNames[0] + ", " + columnNames[1] + ", " \
                    + columnNames[2] + " from " + tbl
        hgsqlOut = qaUtils.callHgsql(db, hgsqlCmd)
        tableBed.write(hgsqlOut)
    tableBed.close()

    return tableBedTemp
コード例 #14
0
ファイル: trackUtils.py プロジェクト: ucscGenomeBrowser/kent
def makeBedFromTable(db, table, whereOpt=""):
    """Make BED3 out of a table"""
    # Get chrom, start, and end column names
    trackType = getTrackType(db, table)
    columnNames = getChrStartEndCols(trackType)

    tableList = checkSplit(db, table)

    # whereOpt allows you to select only certain items from table, e.g. only unbridged gaps
    if whereOpt:
        whereOpt = " where " + whereOpt
    output = ""
    for tbl in tableList:
            hgsqlCmd = "select " + columnNames[0] + ", " + columnNames[1] + ", " \
                        + columnNames[2] + " from " + tbl + whereOpt
            hgsqlOut = qaUtils.callHgsql(db, hgsqlCmd)
            output += hgsqlOut
    return output
コード例 #15
0
def makeBedFromTable(db, table, whereOpt=""):
    """Make BED3 out of a table"""
    # Get chrom, start, and end column names
    trackType = getTrackType(db, table)
    columnNames = getChrStartEndCols(trackType)

    tableList = checkSplit(db, table)

    # whereOpt allows you to select only certain items from table, e.g. only unbridged gaps
    if whereOpt:
        whereOpt = " where " + whereOpt
    output = ""
    for tbl in tableList:
        hgsqlCmd = "select " + columnNames[0] + ", " + columnNames[1] + ", " \
                    + columnNames[2] + " from " + tbl + whereOpt
        hgsqlOut = qaUtils.callHgsql(db, hgsqlCmd)
        output += hgsqlOut
    return output
コード例 #16
0
 def __getChromCountsFromDatabase(self):
     """Returns a list of ['chrom', 'size (bases)', 'count', 'count/megabase'] lists for table"""
     chromCol = trackUtils.getChromCol(self.tableType)
     # first half of the UNION below gets rows that do not appear in table at all 
     query = "(SELECT chrom, size, 0 as count FROM chromInfo WHERE chrom NOT IN" +\
         " (select distinct " + chromCol + " from " + self.table + "))" +\
         " UNION" +\
         " (SELECT inf.chrom, inf.size, count(tab." + chromCol + ") as count" +\
         " FROM chromInfo as inf, " + self.table + " as tab" +\
         " WHERE inf.chrom=tab." + chromCol + " GROUP BY chrom)"
     hgsqlOut = qaUtils.callHgsql(self.db, query)
     rows = hgsqlOut.split("\n") # makes list of strings with tabs embedded
     rows.remove('') # get rid of the empty list item created by the final newline
     rowsList = []
     for row in rows:
         rowsList.append(row.split("\t")) # result is list of ['chrom', 'size', 'count']
     for row in rowsList:
         # calculate count/size (in megabases) as floats, put in rowsList[3]
         row.append(float(row[2])*1000000/float(row[1]))
     return rowsList
コード例 #17
0
ファイル: tableCheck.py プロジェクト: yuzhenpeng/kent
def checkTableDescriptions(database, tables):
    """ Check if each table has a description or not."""
    tablelist = list()
    missing = set()
    output = []
    orstr = ""
    for i in tables:
        tablelist.append("tableName = '%s'" % i)
        orsep = " OR "
        orstr = orsep.join(tablelist)
    hgsqlOut = qaUtils.callHgsql(database, "select tableName from tableDescriptions where " + orstr)
    described = set(hgsqlOut.split())
    missing = tables - described
    if missing:
        output.append("Tables missing a description:")
        for i in missing:
            output.append(i)
        output.append("")
    else:
        output.append("No tables missing a description")
        output.append("")
    return (output, missing)
コード例 #18
0
ファイル: tableCheck.py プロジェクト: yuzhenpeng/kent
def countPerChrom(database, tables):
    """ Count the amount of rows per chromosome."""
    notgbdbtablelist = tables - getGbdbTables(database, tables)
    tablecounts = dict()
    output = []
    globalseen = set()
    localseen = dict()

    hgsqlOut = qaUtils.callHgsql(database, "select chrom from chromInfo")
    chrlist = set(hgsqlOut.split())

    notPositionalTable = set()
    if not notgbdbtablelist:
        output.append("No tables to count chroms")
        output.append("")
        return (output, tablecounts)
    for i in notgbdbtablelist:
        counts = dict()

        cmd = "hgsql %s -e \"select chrom from %s\"" % (database, i)
        p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
        cmdoutput = p.stdout.read()

        chrs = cmdoutput.split("\n")[1:-1]

        localseen[i] = set()

        if not chrs:
            notPositionalTable.add(i)
            continue
        for j in chrs:
            globalseen.add(j)
            if counts.has_key(j):
                counts[j] = counts[j] + 1
            else:
                localseen[i].add(j)
                counts[j] = 1
        tablecounts[i] = counts

    for i in sorted(tablecounts):
        output.append(i)
        used = set()
        for j in sorted_nicely(tablecounts[i]):
            output.append("%s = %s" % (j, tablecounts[i][j]))

        notused = chrlist - (localseen[i] | (chrlist - globalseen))
        if notused:
            output.append("Seen by others, but not used here:")
            for j in sorted_nicely(notused):
                output.append(j)
        output.append("")
    globalnotused = chrlist - globalseen
    if globalnotused:
        output.append("Not seen anywhere:")
        for i in sorted_nicely(globalnotused):
            output.append(i)
    output.append("")
    if notPositionalTable:
        output.append("Not a positional table:")
        for i in notPositionalTable:
            output.append(i)
    return (output, tablecounts)
コード例 #19
0
ファイル: tableQa.py プロジェクト: bowhan/kent
 def __rowCount(self):
     """Adds the number of rows in this table to the sumRow."""
     rowCount = qaUtils.callHgsql(self.db, "select count(*) from " + self.table)
     self.sumRow.setCount(rowCount.strip())
コード例 #20
0
ファイル: tableQa.py プロジェクト: Pfiver/RNA-Seqlyze
 def __rowCount(self):
     """Adds the number of rows in this table to the sumRow."""
     rowCount = qaUtils.callHgsql(self.db,
                                  "select count(*) from " + self.table)
     self.sumRow.setCount(rowCount.strip())