コード例 #1
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def hgLoadSqlTab(db, tableName, sqlName, tabFname, optString=""):
    if isfile(tabFname):
        cmd = "hgLoadSqlTab %s %s %s %s %s" % (db, tableName, sqlName,
                                               tabFname, optString)
        maxCommon.runCommand(cmd, verbose=False)
    else:
        logging.warn("file %s not found" % tabFname)
コード例 #2
0
ファイル: msr.py プロジェクト: strbean/pubMunch-BRCA
    def allResults(self):
        """ given a list of rows with sentences as their -1 field, run these through
        the MSR pipeline 
        """
        tstart = datetime.now()
        inFh, tempFnameIn = writeMsrIn(self.rows)
        logging.info("Running MSR pipeline on %d sentences" % len(self.rows))
        #logging.info("Running MSR pipeline on %s " % sentences)
        ofh2, tempFnameOut = pubGeneric.makeTempFile("msrNlpOut", ".txt")

        cmd = "%s/runMsr.sh %s %s" % (msrDir, tempFnameIn, tempFnameOut)
        maxCommon.runCommand(cmd)

        joinedRows = []
        logging.info("Parsing MSR output")
        for msrRow in parseMsrOut(tempFnameOut):
            textRow = list(self.rows[int(msrRow.chunkSentId)])
            textRow.extend(msrRow)
            joinedRows.append(textRow)
        inFh.close()
        ofh2.close()
        logging.debug("results " + repr(joinedRows))

        tend = datetime.now()
        secs = (tend - tstart).seconds
        logging.info("msr runtime: %d" % secs)
        return joinedRows
コード例 #3
0
ファイル: msr.py プロジェクト: Moxikai/pubMunch
    def allResults(self):
        """ given a list of rows with sentences as their -1 field, run these through
        the MSR pipeline 
        """
        tstart = datetime.now()
        inFh, tempFnameIn = writeMsrIn(self.rows)
        logging.info("Running MSR pipeline on %d sentences" % len(self.rows))
        #logging.info("Running MSR pipeline on %s " % sentences)
        ofh2, tempFnameOut = pubGeneric.makeTempFile("msrNlpOut", ".txt")

        cmd = "%s/runMsr.sh %s %s" % (msrDir, tempFnameIn, tempFnameOut)
        maxCommon.runCommand(cmd)

        joinedRows = []
        logging.info("Parsing MSR output")
        for msrRow in parseMsrOut(tempFnameOut):
            textRow = list(self.rows[int(msrRow.chunkSentId)])
            textRow.extend(msrRow)
            joinedRows.append(textRow)
        inFh.close()
        ofh2.close()
        logging.debug("results " + repr(joinedRows))

        tend = datetime.now()
        secs = (tend-tstart).seconds
        logging.info("msr runtime: %d" % secs)
        return joinedRows
コード例 #4
0
ファイル: pubStore.py プロジェクト: joepickrell/pubMunch
 def _gzipAndMove(self, fname, finalName):
     " gzip fname and move to finalName "
     gzName = fname+".gz"
     if isfile(gzName):
         os.remove(gzName)
     maxCommon.runCommand("gzip %s" % fname)
     logging.debug("compressing and copying files table to %s" % finalName)
     shutil.copyfile(gzName, finalName)
     os.remove(gzName)
コード例 #5
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def listTables(db, expr):
    " return list of table names that match mysql expr "
    tmpFile = tempfile.NamedTemporaryFile(prefix="pubBlat.dropTables")
    tmpName = tmpFile.name
    cmd = """hgsql %s -NB -e 'show tables like "%s"' > %s """ % (db, expr, tmpName)
    maxCommon.runCommand(cmd)

    lines = open(tmpName).readlines()
    lines = [l.strip() for l in lines]
    return lines
コード例 #6
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def listTables(db, expr):
    " return list of table names that match mysql expr "
    tmpFile = tempfile.NamedTemporaryFile(prefix="pubBlat.dropTables")
    tmpName = tmpFile.name
    cmd = """hgsql %s -NB -e 'show tables like "%s"' > %s """ % (db, expr,
                                                                 tmpName)
    maxCommon.runCommand(cmd)

    lines = open(tmpName).readlines()
    lines = [l.strip() for l in lines]
    return lines
コード例 #7
0
ファイル: pubKeyVal.py プロジェクト: maximilianh/pubMunch
def startRedis(dbFname):
    """ starts redis on current server as daemon.
    Creates status files with filename dbName".pid" and dbName".host". Returns the port.

    >>> import pubGeneric
    >>> pubGeneric.setupLogging(__file__, None)
    >>> h, p = startRedis("/tmp/test.tab.gz")
    >>> r = redis.Redis(port=p)
    >>> r.set("hello", "world")
    True
    >>> r.get("hello")
    'world'
    >>> r.get("world")
    >>> r.shutdown()
    """
    dbFname = abspath(dbFname)
    pidFname = dbFname + ".pid"
    port = findFreePort()
    dirName = dirname(dbFname)
    baseName = basename(dbFname) + ".rdb"

    hostFname = dbFname + ".host"
    hostname = socket.gethostbyname("localhost")
    hostDesc = hostname + ":" + str(port)
    open(hostFname, "w").write(hostDesc)
    logging.info("Wrote redis host info %s to %s" % (hostDesc, hostFname))
    maxCommon.delOnExit(hostFname)
    maxCommon.delOnExit(pidFname)
    atexit.register(shutdownRedisServers)
    global redisPorts
    redisPorts.append(port)

    cmd = ["redis-server", "--daemonize", "yes", "--pidfile", pidFname, \
        "--port", str(port), "--rdbchecksum", "no", "--dir", dirName,
        "--dbfilename", baseName, "--maxmemory", "200gb"]
    logging.info("Starting up redis server on localhost")
    maxCommon.runCommand(cmd)

    # wait until startup is complete
    redisStart = True
    while redisStart:
        try:
            r = redis.Redis(port=port)
            dbSize = r.dbsize()
            redisStart = False
        except redis.ConnectionError:
            logging.info("Waiting for 1 sec for redis startup completion")
            time.sleep(1)
            pass
    logging.info("Redis startup completed, dbSize=%d" % dbSize)

    return "localhost", port
コード例 #8
0
ファイル: pubKeyVal.py プロジェクト: maximilianh/pubMunch
def startRedis(dbFname):
    """ starts redis on current server as daemon.
    Creates status files with filename dbName".pid" and dbName".host". Returns the port.

    >>> import pubGeneric
    >>> pubGeneric.setupLogging(__file__, None)
    >>> h, p = startRedis("/tmp/test.tab.gz")
    >>> r = redis.Redis(port=p)
    >>> r.set("hello", "world")
    True
    >>> r.get("hello")
    'world'
    >>> r.get("world")
    >>> r.shutdown()
    """
    dbFname = abspath(dbFname)
    pidFname  = dbFname+".pid"
    port      = findFreePort()
    dirName   = dirname(dbFname)
    baseName  = basename(dbFname)+".rdb"

    hostFname = dbFname+".host"
    hostname  = socket.gethostbyname("localhost")
    hostDesc  = hostname+":"+str(port)
    open(hostFname, "w").write(hostDesc)
    logging.info("Wrote redis host info %s to %s" % (hostDesc, hostFname))
    maxCommon.delOnExit(hostFname)
    maxCommon.delOnExit(pidFname)
    atexit.register(shutdownRedisServers)
    global redisPorts
    redisPorts.append(port)

    cmd = ["redis-server", "--daemonize", "yes", "--pidfile", pidFname, \
        "--port", str(port), "--rdbchecksum", "no", "--dir", dirName,
        "--dbfilename", baseName, "--maxmemory", "200gb"]
    logging.info("Starting up redis server on localhost")
    maxCommon.runCommand(cmd)

    # wait until startup is complete
    redisStart = True
    while redisStart:
        try:
            r = redis.Redis(port=port)
            dbSize = r.dbsize()
            redisStart=False
        except redis.ConnectionError:
            logging.info("Waiting for 1 sec for redis startup completion")
            time.sleep(1)
            pass
    logging.info("Redis startup completed, dbSize=%d" % dbSize)

    return "localhost", port
コード例 #9
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def hgGetAllRows(db, tableName, tempDir):
    " return all rows of table as a list of tuples "
    query = "SELECT * from %s" % tableName
    tempFile = tempfile.NamedTemporaryFile(prefix="maxMysql_hgGetAllRows", dir=tempDir)
    cmd = 'hgsql %s -NB -e "%s" > %s' % (db, query, tempFile.name)
    maxCommon.runCommand(cmd)

    data = []
    for line in open(tempFile.name, "r"):
        row = line.strip("\n").split("\t")
        data.append(row)

    return data
コード例 #10
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def hgGetAllRows(db, tableName, tempDir):
    " return all rows of table as a list of tuples "
    query = "SELECT * from %s" % tableName
    tempFile = tempfile.NamedTemporaryFile(prefix="maxMysql_hgGetAllRows",
                                           dir=tempDir)
    cmd = 'hgsql %s -NB -e "%s" > %s' % (db, query, tempFile.name)
    maxCommon.runCommand(cmd)

    data = []
    for line in open(tempFile.name, "r"):
        row = line.strip("\n").split("\t")
        data.append(row)

    return data
コード例 #11
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def renameTables(db, fromList, toList, checkExists=False):
    " rename tables from old to new, fromToList is a list of 2-tuples "
    assert(len(fromList)==len(toList))
    logging.debug("Renaming mysql tables %s to %s" % (fromList, toList))
    parts = []
    for oldName, newName in zip(fromList, toList):
        if (not checkExists) or (checkExists and tableExists(db, oldName)):
            parts.append("%s TO %s" % (oldName, newName))
        else:
            logging.debug("Could not find table %s, %s" % (db, oldName))
    if len(parts)==0:
        logging.debug("No table found, not renaming anything")
        return
    sqlCmd = "RENAME TABLE "+", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #12
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def renameTables(db, fromList, toList, checkExists=False):
    " rename tables from old to new, fromToList is a list of 2-tuples "
    assert (len(fromList) == len(toList))
    logging.debug("Renaming mysql tables %s to %s" % (fromList, toList))
    parts = []
    for oldName, newName in zip(fromList, toList):
        if (not checkExists) or (checkExists and tableExists(db, oldName)):
            parts.append("%s TO %s" % (oldName, newName))
        else:
            logging.debug("Could not find table %s, %s" % (db, oldName))
    if len(parts) == 0:
        logging.debug("No table found, not renaming anything")
        return
    sqlCmd = "RENAME TABLE " + ", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #13
0
ファイル: pubConvBing.py プロジェクト: maximilianh/pubMunch
def indexTsv(zipFname, tsvName, outFname):
    """ unzip a zipfile, recompress all the tsvs inside
    with gzip and create an .index.gz for them"""

    #def indexTsv(zipFname, tsvName, outFname, bgzipPath):

    # extract to local disk
    tmpDir = pubGeneric.makeTempDir("bingData")
    maxCommon.delOnExit(tmpDir)
    logging.info("Extracting to %s" % tmpDir)
    cmd = ["unzip", "-d", tmpDir, zipFname]
    maxCommon.runCommand(cmd)

    tempFname = join(tmpDir, tsvName)
    logging.info("Indexing %s to %s" % (tempFname, outFname))
    # index lines
    ofh = gzip.open(outFname, "w")
    ifh = open(tempFname, "rb")
    offset = 0
    # the file iterator does not work  with tell()!!
    #for line in ifh:
    while True:
        line = ifh.readline()
        if line == "":
            break
        url = line[0:line.find("\t")]
        ofh.write("%s\t%d\n" % (url, offset))
        #logging.debug("url %s, offset %d" % (url, offset))
        offset = ifh.tell()
    ofh.close()

    # re-compress with gzip
    tmpFnames = glob.glob(join(tmpDir, "*.tsv"))
    assert (len(tmpFnames) == 1)
    tmpFname = tmpFnames[0]
    zipDir = dirname(zipFname)
    finalFname = join(zipDir, tsvName + ".gz")
    logging.info("Compressing to %s" % finalFname)
    #cmd = "%s %s -c > %s" % (bgzipPath, tmpFname, finalFname)
    cmd = "gzip %s -c > %s" % (tmpFname, finalFname)
    maxCommon.runCommand(cmd)
    shutil.rmtree(tmpDir)
コード例 #14
0
ファイル: pubConvBing.py プロジェクト: maximilianh/pubMunch
def indexTsv(zipFname, tsvName, outFname):
    """ unzip a zipfile, recompress all the tsvs inside
    with gzip and create an .index.gz for them"""

    #def indexTsv(zipFname, tsvName, outFname, bgzipPath):

    # extract to local disk
    tmpDir = pubGeneric.makeTempDir("bingData")
    maxCommon.delOnExit(tmpDir)
    logging.info("Extracting to %s" % tmpDir)
    cmd =["unzip", "-d",tmpDir, zipFname]
    maxCommon.runCommand(cmd)

    tempFname = join(tmpDir, tsvName)
    logging.info("Indexing %s to %s" % (tempFname, outFname))
    # index lines
    ofh = gzip.open(outFname, "w")
    ifh = open(tempFname, "rb")
    offset = 0
    # the file iterator does not work  with tell()!!
    #for line in ifh:
    while True:
        line = ifh.readline()
        if line=="":
            break
        url = line[0:line.find("\t")]
        ofh.write("%s\t%d\n" % (url, offset))
        #logging.debug("url %s, offset %d" % (url, offset))
        offset = ifh.tell()
    ofh.close()

    # re-compress with gzip
    tmpFnames = glob.glob(join(tmpDir, "*.tsv"))
    assert(len(tmpFnames)==1)
    tmpFname = tmpFnames[0]
    zipDir = dirname(zipFname)
    finalFname = join(zipDir, tsvName+".gz")
    logging.info("Compressing to %s" % finalFname)
    #cmd = "%s %s -c > %s" % (bgzipPath, tmpFname, finalFname)
    cmd = "gzip %s -c > %s" % (tmpFname, finalFname)
    maxCommon.runCommand(cmd)
    shutil.rmtree(tmpDir)
コード例 #15
0
ファイル: seqMapLocal.py プロジェクト: Moxikai/pubMunch
 def blatFasta(self, db, faFname, params=[]):
     """ blat fasta files against a db, create temporary write psl files
     returns a (file, filename) of temp file
     """
     seqDir = join(self.seqDir, db)
     outFnames  = defaultdict(list)
     logging.debug("Blatting %s against %s" % (faFname, seqDir))
     server, port = self.blatServers[db]
     tmpFh, tmpFname = pubGeneric.makeTempFile("blatOut.")
     cmd1 = ["gfClient", server, str(port), seqDir, faFname, "stdout", "-nohead"]
     cmd1.extend(params)
     cmd2 = ["sort", "-k10,10 "]
     cmd3 = ["pslCDnaFilter", "stdin", tmpFname,\
             "-globalNearBest=0", "-filterWeirdOverlapped", "-ignoreIntrons"]
     cmds = []
     cmds.append(" ".join(cmd1))
     cmds.append(" ".join(cmd2))
     cmds.append(" ".join(cmd3))
     cmd = "|".join(cmds)
     maxCommon.runCommand(cmd)
     return tmpFh, tmpFname
コード例 #16
0
def zipExtract(tmpDir, zipName, filename):
    """ extract filename in zipName to tmpDir, delete tmpfile and return as string 
    thought that this was faster than python's zipfile, but it isn't
    """
    cmd = ["unzip", "-d", tmpDir, zipName, filename]
    ret = maxCommon.runCommand(cmd, ignoreErrors=True)
    if ret!=0:
        return None
    tmpFname = join(tmpDir, filename)
    data = open(tmpFname).read()
    os.remove(tmpFname)
    return data
コード例 #17
0
def zipExtract(tmpDir, zipName, filename):
    """ extract filename in zipName to tmpDir, delete tmpfile and return as string 
    thought that this was faster than python's zipfile, but it isn't
    """
    cmd = ["unzip", "-d", tmpDir, zipName, filename]
    ret = maxCommon.runCommand(cmd, ignoreErrors=True)
    if ret != 0:
        return None
    tmpFname = join(tmpDir, filename)
    data = open(tmpFname).read()
    os.remove(tmpFname)
    return data
コード例 #18
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def renameTablesRegex(db, exprOrList, fromStr, toStr):
    " rename tables that match mysql expr or are given as a list from regex fromStr to toStr "
    if isinstance(exprOrList, str):
        tables = listTables(db, exprOrList)
    else:
        tables = exprOrList

    reFrom = re.compile(fromStr)
    renameDesc = []
    for oldTable in tables:
        newTable = reFrom.sub(toStr, oldTable)
        existTables = listTables(db, oldTable)
        if len(existTables) != 0:
            renameDesc.append([oldTable, newTable])
            logging.debug("Renaming table %s -> %s" % (oldTable, newTable))

    parts = []
    for oldName, newName in renameDesc:
        parts.append("%s TO %s" % (oldName, newName))
    sqlCmd = "RENAME TABLE " + ", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #19
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def renameTablesRegex(db, exprOrList, fromStr, toStr):
    " rename tables that match mysql expr or are given as a list from regex fromStr to toStr "
    if isinstance(exprOrList, str):
        tables = listTables(db, exprOrList)
    else:
        tables = exprOrList

    reFrom = re.compile(fromStr)
    renameDesc = []
    for oldTable in tables:
        newTable = reFrom.sub(toStr, oldTable)
        existTables = listTables(db, oldTable)
        if len(existTables)!=0:
            renameDesc.append( [oldTable, newTable] )
            logging.debug("Renaming table %s -> %s" % (oldTable, newTable))

    parts = []
    for oldName, newName in renameDesc:
        parts.append("%s TO %s" % (oldName, newName))
    sqlCmd = "RENAME TABLE "+", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #20
0
def getImages(pdfName):
    """ returns a list of tuples 
    (imgId (int), isThumbnail (int), width, height, md5sum, PNGBinarydataBlob) extracted from pdfName.
    returns two tuples per image, one is the original, one is the thumbnail.
    """
    logging.debug("Extracting images from %s" % pdfName)
    tempDir = tempfile.mkdtemp(prefix="pdfimages", dir=pubConf.getTempDir())
    maxCommon.delOnExit(tempDir)
    outStem = join(tempDir, "img")
    cmd = "pdfimages %s %s" % (pdfName, outStem)
    maxCommon.runCommand(cmd)

    # convert to png
    data = []
    imgId = 0
    for fname in glob.glob(join(tempDir, "*.ppm")):
        logging.debug("got image %s" % fname)
        x, y = pbmSize(open(fname))
        if not looksInteresting(x, y):
            logging.debug("Image is too small or too long/wide")
            continue

        logging.debug("Loading image into sqlite")
        outFname = "%s.png" % fname
        cmd = "convert %s %s" % (fname, outFname)
        maxCommon.runCommand(cmd)
        
        pngBlob = open(outFname).read()
        md5Str = makeMd5(pngBlob)

        data.append( (imgId, 0, x, y, md5Str, pngBlob) )

        # make the thumbnail
        thumbFName = "%s.thumb.png" % fname
        # see https://www.smashingmagazine.com/2015/06/efficient-image-resizing-with-imagemagick/
        # but can't use -posterize 136 on centos6
        cmd = "convert -filter Triangle -define filter:support=2 -thumbnail %d " \
            "-unsharp 0.25x0.25+8+0.065 -dither None -quality 82 -define png:compression-filter=5 " \
            "-define png:compression-level=9 -define png:compression-strategy=1 " \
            "-define png:exclude-chunk=all -interlace none -colorspace " \
            "sRGB -strip %s %s" % (WIDTH, fname, thumbFName)
        maxCommon.runCommand(cmd)

        x, y = pngDimensions(thumbFName)
        pngBlob = open(thumbFName).read()
        md5Str = makeMd5(pngBlob)

        data.append( (imgId, 1, x, y, md5Str, pngBlob) )

        imgId += 1
            
    shutil.rmtree(tempDir)
    maxCommon.ignoreOnExit(tempDir)
    return data
コード例 #21
0
ファイル: maxMysql.py プロジェクト: Moxikai/pubMunch
def truncateTable(db, table):
    logging.debug("Truncating table %s" % table)
    cmd = """hgsql %s -NB -e 'truncate table %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #22
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def truncateTable(db, table):
    logging.debug("Truncating table %s" % table)
    cmd = """hgsql %s -NB -e 'truncate table %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #23
0
ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA
def dropTable(db, table):
    logging.debug("Dropping table %s" % table)
    cmd = """hgsql %s -NB -e 'drop table if exists %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #24
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def hgLoadSqlTab(db, tableName, sqlName, tabFname, optString=""):
    if isfile(tabFname):
        cmd = "hgLoadSqlTab %s %s %s %s %s" % (db, tableName, sqlName, tabFname, optString)
        maxCommon.runCommand(cmd, verbose=False)
    else:
        logging.warn("file %s not found" % tabFname)
コード例 #25
0
ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch
def dropTable(db, table):
    logging.debug("Dropping table %s" % table)
    cmd = """hgsql %s -NB -e 'drop table if exists %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)
コード例 #26
0
def getImages(pdfName):
    """ returns a list of tuples 
    (imgId (int), isThumbnail (int), width, height, md5sum, PNGBinarydataBlob) extracted from pdfName.
    returns two tuples per image, one is the original, one is the thumbnail.
    """
    loadBlacklist()

    head = open(pdfName).read(30)
    if "<html" in head or "<HTML" in head:
        logging.info("PDF %s is an HTML file, skipping" % pdfName)
        return None

    logging.debug("Extracting images from %s" % pdfName)
    tempDir = tempfile.mkdtemp(prefix="pdfimages", dir=pubConf.getTempDir())
    maxCommon.delOnExit(tempDir)
    outStem = join(tempDir, "img")
    cmd = "pdfimages %s %s" % (pdfName, outStem)
    maxCommon.runCommand(cmd)

    # convert to png
    data = []
    imgId = 0
    for fname in glob.glob(join(tempDir, "*.ppm")):
        logging.debug("got image %s" % fname)
        x, y = pbmSize(open(fname))
        if not looksInteresting(x, y):
            logging.debug("Image is too small or too long/wide")
            continue

        logging.debug("Loading image into sqlite")
        outFname = "%s.png" % fname
        cmd = "convert %s %s" % (fname, outFname)
        maxCommon.runCommand(cmd)

        pngBlob = open(outFname).read()
        md5Str = makeMd5(pngBlob)

        print "XX", md5Str, list(md5Blacklist)[:10]
        if md5Str in md5Blacklist:
            logging.debug("Image MD5 is blacklisted")
            continue

        data.append((imgId, 0, x, y, md5Str, pngBlob))

        # make the thumbnail
        thumbFName = "%s.thumb.png" % fname
        # see https://www.smashingmagazine.com/2015/06/efficient-image-resizing-with-imagemagick/
        # but can't use -posterize 136 on centos6
        cmd = "convert -filter Triangle -define filter:support=2 -thumbnail %d " \
            "-unsharp 0.25x0.25+8+0.065 -dither None -quality 82 -define png:compression-filter=5 " \
            "-define png:compression-level=9 -define png:compression-strategy=1 " \
            "-define png:exclude-chunk=all -interlace none -colorspace " \
            "sRGB -strip %s %s" % (WIDTH, fname, thumbFName)
        maxCommon.runCommand(cmd)

        x, y = pngDimensions(thumbFName)
        pngBlob = open(thumbFName).read()
        md5Str = makeMd5(pngBlob)

        data.append((imgId, 1, x, y, md5Str, pngBlob))

        imgId += 1

    shutil.rmtree(tempDir)
    maxCommon.ignoreOnExit(tempDir)
    return data