Python runCommandの例、maxCommon.runCommand Pythonの例

コード例 #1

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def hgLoadSqlTab(db, tableName, sqlName, tabFname, optString=""):
    if isfile(tabFname):
        cmd = "hgLoadSqlTab %s %s %s %s %s" % (db, tableName, sqlName,
                                               tabFname, optString)
        maxCommon.runCommand(cmd, verbose=False)
    else:
        logging.warn("file %s not found" % tabFname)

コード例 #2

0

ファイルを表示

ファイル: msr.py プロジェクト: strbean/pubMunch-BRCA

    def allResults(self):
        """ given a list of rows with sentences as their -1 field, run these through
        the MSR pipeline 
        """
        tstart = datetime.now()
        inFh, tempFnameIn = writeMsrIn(self.rows)
        logging.info("Running MSR pipeline on %d sentences" % len(self.rows))
        #logging.info("Running MSR pipeline on %s " % sentences)
        ofh2, tempFnameOut = pubGeneric.makeTempFile("msrNlpOut", ".txt")

        cmd = "%s/runMsr.sh %s %s" % (msrDir, tempFnameIn, tempFnameOut)
        maxCommon.runCommand(cmd)

        joinedRows = []
        logging.info("Parsing MSR output")
        for msrRow in parseMsrOut(tempFnameOut):
            textRow = list(self.rows[int(msrRow.chunkSentId)])
            textRow.extend(msrRow)
            joinedRows.append(textRow)
        inFh.close()
        ofh2.close()
        logging.debug("results " + repr(joinedRows))

        tend = datetime.now()
        secs = (tend - tstart).seconds
        logging.info("msr runtime: %d" % secs)
        return joinedRows

コード例 #3

0

ファイルを表示

ファイル: msr.py プロジェクト: Moxikai/pubMunch

    def allResults(self):
        """ given a list of rows with sentences as their -1 field, run these through
        the MSR pipeline 
        """
        tstart = datetime.now()
        inFh, tempFnameIn = writeMsrIn(self.rows)
        logging.info("Running MSR pipeline on %d sentences" % len(self.rows))
        #logging.info("Running MSR pipeline on %s " % sentences)
        ofh2, tempFnameOut = pubGeneric.makeTempFile("msrNlpOut", ".txt")

        cmd = "%s/runMsr.sh %s %s" % (msrDir, tempFnameIn, tempFnameOut)
        maxCommon.runCommand(cmd)

        joinedRows = []
        logging.info("Parsing MSR output")
        for msrRow in parseMsrOut(tempFnameOut):
            textRow = list(self.rows[int(msrRow.chunkSentId)])
            textRow.extend(msrRow)
            joinedRows.append(textRow)
        inFh.close()
        ofh2.close()
        logging.debug("results " + repr(joinedRows))

        tend = datetime.now()
        secs = (tend-tstart).seconds
        logging.info("msr runtime: %d" % secs)
        return joinedRows

コード例 #4

0

ファイルを表示

ファイル: pubStore.py プロジェクト: joepickrell/pubMunch

 def _gzipAndMove(self, fname, finalName):
     " gzip fname and move to finalName "
     gzName = fname+".gz"
     if isfile(gzName):
         os.remove(gzName)
     maxCommon.runCommand("gzip %s" % fname)
     logging.debug("compressing and copying files table to %s" % finalName)
     shutil.copyfile(gzName, finalName)
     os.remove(gzName)

コード例 #5

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def listTables(db, expr):
    " return list of table names that match mysql expr "
    tmpFile = tempfile.NamedTemporaryFile(prefix="pubBlat.dropTables")
    tmpName = tmpFile.name
    cmd = """hgsql %s -NB -e 'show tables like "%s"' > %s """ % (db, expr, tmpName)
    maxCommon.runCommand(cmd)

    lines = open(tmpName).readlines()
    lines = [l.strip() for l in lines]
    return lines

コード例 #6

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def listTables(db, expr):
    " return list of table names that match mysql expr "
    tmpFile = tempfile.NamedTemporaryFile(prefix="pubBlat.dropTables")
    tmpName = tmpFile.name
    cmd = """hgsql %s -NB -e 'show tables like "%s"' > %s """ % (db, expr,
                                                                 tmpName)
    maxCommon.runCommand(cmd)

    lines = open(tmpName).readlines()
    lines = [l.strip() for l in lines]
    return lines

コード例 #7

0

ファイルを表示

ファイル: pubKeyVal.py プロジェクト: maximilianh/pubMunch

def startRedis(dbFname):
    """ starts redis on current server as daemon.
    Creates status files with filename dbName".pid" and dbName".host". Returns the port.

    >>> import pubGeneric
    >>> pubGeneric.setupLogging(__file__, None)
    >>> h, p = startRedis("/tmp/test.tab.gz")
    >>> r = redis.Redis(port=p)
    >>> r.set("hello", "world")
    True
    >>> r.get("hello")
    'world'
    >>> r.get("world")
    >>> r.shutdown()
    """
    dbFname = abspath(dbFname)
    pidFname = dbFname + ".pid"
    port = findFreePort()
    dirName = dirname(dbFname)
    baseName = basename(dbFname) + ".rdb"

    hostFname = dbFname + ".host"
    hostname = socket.gethostbyname("localhost")
    hostDesc = hostname + ":" + str(port)
    open(hostFname, "w").write(hostDesc)
    logging.info("Wrote redis host info %s to %s" % (hostDesc, hostFname))
    maxCommon.delOnExit(hostFname)
    maxCommon.delOnExit(pidFname)
    atexit.register(shutdownRedisServers)
    global redisPorts
    redisPorts.append(port)

    cmd = ["redis-server", "--daemonize", "yes", "--pidfile", pidFname, \
        "--port", str(port), "--rdbchecksum", "no", "--dir", dirName,
        "--dbfilename", baseName, "--maxmemory", "200gb"]
    logging.info("Starting up redis server on localhost")
    maxCommon.runCommand(cmd)

    # wait until startup is complete
    redisStart = True
    while redisStart:
        try:
            r = redis.Redis(port=port)
            dbSize = r.dbsize()
            redisStart = False
        except redis.ConnectionError:
            logging.info("Waiting for 1 sec for redis startup completion")
            time.sleep(1)
            pass
    logging.info("Redis startup completed, dbSize=%d" % dbSize)

    return "localhost", port

コード例 #8

0

ファイルを表示

ファイル: pubKeyVal.py プロジェクト: maximilianh/pubMunch

def startRedis(dbFname):
    """ starts redis on current server as daemon.
    Creates status files with filename dbName".pid" and dbName".host". Returns the port.

    >>> import pubGeneric
    >>> pubGeneric.setupLogging(__file__, None)
    >>> h, p = startRedis("/tmp/test.tab.gz")
    >>> r = redis.Redis(port=p)
    >>> r.set("hello", "world")
    True
    >>> r.get("hello")
    'world'
    >>> r.get("world")
    >>> r.shutdown()
    """
    dbFname = abspath(dbFname)
    pidFname  = dbFname+".pid"
    port      = findFreePort()
    dirName   = dirname(dbFname)
    baseName  = basename(dbFname)+".rdb"

    hostFname = dbFname+".host"
    hostname  = socket.gethostbyname("localhost")
    hostDesc  = hostname+":"+str(port)
    open(hostFname, "w").write(hostDesc)
    logging.info("Wrote redis host info %s to %s" % (hostDesc, hostFname))
    maxCommon.delOnExit(hostFname)
    maxCommon.delOnExit(pidFname)
    atexit.register(shutdownRedisServers)
    global redisPorts
    redisPorts.append(port)

    cmd = ["redis-server", "--daemonize", "yes", "--pidfile", pidFname, \
        "--port", str(port), "--rdbchecksum", "no", "--dir", dirName,
        "--dbfilename", baseName, "--maxmemory", "200gb"]
    logging.info("Starting up redis server on localhost")
    maxCommon.runCommand(cmd)

    # wait until startup is complete
    redisStart = True
    while redisStart:
        try:
            r = redis.Redis(port=port)
            dbSize = r.dbsize()
            redisStart=False
        except redis.ConnectionError:
            logging.info("Waiting for 1 sec for redis startup completion")
            time.sleep(1)
            pass
    logging.info("Redis startup completed, dbSize=%d" % dbSize)

    return "localhost", port

コード例 #9

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def hgGetAllRows(db, tableName, tempDir):
    " return all rows of table as a list of tuples "
    query = "SELECT * from %s" % tableName
    tempFile = tempfile.NamedTemporaryFile(prefix="maxMysql_hgGetAllRows", dir=tempDir)
    cmd = 'hgsql %s -NB -e "%s" > %s' % (db, query, tempFile.name)
    maxCommon.runCommand(cmd)

    data = []
    for line in open(tempFile.name, "r"):
        row = line.strip("\n").split("\t")
        data.append(row)

    return data

コード例 #10

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def hgGetAllRows(db, tableName, tempDir):
    " return all rows of table as a list of tuples "
    query = "SELECT * from %s" % tableName
    tempFile = tempfile.NamedTemporaryFile(prefix="maxMysql_hgGetAllRows",
                                           dir=tempDir)
    cmd = 'hgsql %s -NB -e "%s" > %s' % (db, query, tempFile.name)
    maxCommon.runCommand(cmd)

    data = []
    for line in open(tempFile.name, "r"):
        row = line.strip("\n").split("\t")
        data.append(row)

    return data

コード例 #11

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def renameTables(db, fromList, toList, checkExists=False):
    " rename tables from old to new, fromToList is a list of 2-tuples "
    assert(len(fromList)==len(toList))
    logging.debug("Renaming mysql tables %s to %s" % (fromList, toList))
    parts = []
    for oldName, newName in zip(fromList, toList):
        if (not checkExists) or (checkExists and tableExists(db, oldName)):
            parts.append("%s TO %s" % (oldName, newName))
        else:
            logging.debug("Could not find table %s, %s" % (db, oldName))
    if len(parts)==0:
        logging.debug("No table found, not renaming anything")
        return
    sqlCmd = "RENAME TABLE "+", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #12

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def renameTables(db, fromList, toList, checkExists=False):
    " rename tables from old to new, fromToList is a list of 2-tuples "
    assert (len(fromList) == len(toList))
    logging.debug("Renaming mysql tables %s to %s" % (fromList, toList))
    parts = []
    for oldName, newName in zip(fromList, toList):
        if (not checkExists) or (checkExists and tableExists(db, oldName)):
            parts.append("%s TO %s" % (oldName, newName))
        else:
            logging.debug("Could not find table %s, %s" % (db, oldName))
    if len(parts) == 0:
        logging.debug("No table found, not renaming anything")
        return
    sqlCmd = "RENAME TABLE " + ", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #13

0

ファイルを表示

ファイル: pubConvBing.py プロジェクト: maximilianh/pubMunch

def indexTsv(zipFname, tsvName, outFname):
    """ unzip a zipfile, recompress all the tsvs inside
    with gzip and create an .index.gz for them"""

    #def indexTsv(zipFname, tsvName, outFname, bgzipPath):

    # extract to local disk
    tmpDir = pubGeneric.makeTempDir("bingData")
    maxCommon.delOnExit(tmpDir)
    logging.info("Extracting to %s" % tmpDir)
    cmd = ["unzip", "-d", tmpDir, zipFname]
    maxCommon.runCommand(cmd)

    tempFname = join(tmpDir, tsvName)
    logging.info("Indexing %s to %s" % (tempFname, outFname))
    # index lines
    ofh = gzip.open(outFname, "w")
    ifh = open(tempFname, "rb")
    offset = 0
    # the file iterator does not work  with tell()!!
    #for line in ifh:
    while True:
        line = ifh.readline()
        if line == "":
            break
        url = line[0:line.find("\t")]
        ofh.write("%s\t%d\n" % (url, offset))
        #logging.debug("url %s, offset %d" % (url, offset))
        offset = ifh.tell()
    ofh.close()

    # re-compress with gzip
    tmpFnames = glob.glob(join(tmpDir, "*.tsv"))
    assert (len(tmpFnames) == 1)
    tmpFname = tmpFnames[0]
    zipDir = dirname(zipFname)
    finalFname = join(zipDir, tsvName + ".gz")
    logging.info("Compressing to %s" % finalFname)
    #cmd = "%s %s -c > %s" % (bgzipPath, tmpFname, finalFname)
    cmd = "gzip %s -c > %s" % (tmpFname, finalFname)
    maxCommon.runCommand(cmd)
    shutil.rmtree(tmpDir)

コード例 #14

0

ファイルを表示

ファイル: pubConvBing.py プロジェクト: maximilianh/pubMunch

def indexTsv(zipFname, tsvName, outFname):
    """ unzip a zipfile, recompress all the tsvs inside
    with gzip and create an .index.gz for them"""

    #def indexTsv(zipFname, tsvName, outFname, bgzipPath):

    # extract to local disk
    tmpDir = pubGeneric.makeTempDir("bingData")
    maxCommon.delOnExit(tmpDir)
    logging.info("Extracting to %s" % tmpDir)
    cmd =["unzip", "-d",tmpDir, zipFname]
    maxCommon.runCommand(cmd)

    tempFname = join(tmpDir, tsvName)
    logging.info("Indexing %s to %s" % (tempFname, outFname))
    # index lines
    ofh = gzip.open(outFname, "w")
    ifh = open(tempFname, "rb")
    offset = 0
    # the file iterator does not work  with tell()!!
    #for line in ifh:
    while True:
        line = ifh.readline()
        if line=="":
            break
        url = line[0:line.find("\t")]
        ofh.write("%s\t%d\n" % (url, offset))
        #logging.debug("url %s, offset %d" % (url, offset))
        offset = ifh.tell()
    ofh.close()

    # re-compress with gzip
    tmpFnames = glob.glob(join(tmpDir, "*.tsv"))
    assert(len(tmpFnames)==1)
    tmpFname = tmpFnames[0]
    zipDir = dirname(zipFname)
    finalFname = join(zipDir, tsvName+".gz")
    logging.info("Compressing to %s" % finalFname)
    #cmd = "%s %s -c > %s" % (bgzipPath, tmpFname, finalFname)
    cmd = "gzip %s -c > %s" % (tmpFname, finalFname)
    maxCommon.runCommand(cmd)
    shutil.rmtree(tmpDir)

コード例 #15

0

ファイルを表示

ファイル: seqMapLocal.py プロジェクト: Moxikai/pubMunch

 def blatFasta(self, db, faFname, params=[]):
     """ blat fasta files against a db, create temporary write psl files
     returns a (file, filename) of temp file
     """
     seqDir = join(self.seqDir, db)
     outFnames  = defaultdict(list)
     logging.debug("Blatting %s against %s" % (faFname, seqDir))
     server, port = self.blatServers[db]
     tmpFh, tmpFname = pubGeneric.makeTempFile("blatOut.")
     cmd1 = ["gfClient", server, str(port), seqDir, faFname, "stdout", "-nohead"]
     cmd1.extend(params)
     cmd2 = ["sort", "-k10,10 "]
     cmd3 = ["pslCDnaFilter", "stdin", tmpFname,\
             "-globalNearBest=0", "-filterWeirdOverlapped", "-ignoreIntrons"]
     cmds = []
     cmds.append(" ".join(cmd1))
     cmds.append(" ".join(cmd2))
     cmds.append(" ".join(cmd3))
     cmd = "|".join(cmds)
     maxCommon.runCommand(cmd)
     return tmpFh, tmpFname

コード例 #16

0

ファイルを表示

def zipExtract(tmpDir, zipName, filename):
    """ extract filename in zipName to tmpDir, delete tmpfile and return as string 
    thought that this was faster than python's zipfile, but it isn't
    """
    cmd = ["unzip", "-d", tmpDir, zipName, filename]
    ret = maxCommon.runCommand(cmd, ignoreErrors=True)
    if ret!=0:
        return None
    tmpFname = join(tmpDir, filename)
    data = open(tmpFname).read()
    os.remove(tmpFname)
    return data

コード例 #17

0

ファイルを表示

ファイル: pubConvSpringer.py プロジェクト: maximilianh/pubMunch

def zipExtract(tmpDir, zipName, filename):
    """ extract filename in zipName to tmpDir, delete tmpfile and return as string 
    thought that this was faster than python's zipfile, but it isn't
    """
    cmd = ["unzip", "-d", tmpDir, zipName, filename]
    ret = maxCommon.runCommand(cmd, ignoreErrors=True)
    if ret != 0:
        return None
    tmpFname = join(tmpDir, filename)
    data = open(tmpFname).read()
    os.remove(tmpFname)
    return data

コード例 #18

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def renameTablesRegex(db, exprOrList, fromStr, toStr):
    " rename tables that match mysql expr or are given as a list from regex fromStr to toStr "
    if isinstance(exprOrList, str):
        tables = listTables(db, exprOrList)
    else:
        tables = exprOrList

    reFrom = re.compile(fromStr)
    renameDesc = []
    for oldTable in tables:
        newTable = reFrom.sub(toStr, oldTable)
        existTables = listTables(db, oldTable)
        if len(existTables) != 0:
            renameDesc.append([oldTable, newTable])
            logging.debug("Renaming table %s -> %s" % (oldTable, newTable))

    parts = []
    for oldName, newName in renameDesc:
        parts.append("%s TO %s" % (oldName, newName))
    sqlCmd = "RENAME TABLE " + ", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #19

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def renameTablesRegex(db, exprOrList, fromStr, toStr):
    " rename tables that match mysql expr or are given as a list from regex fromStr to toStr "
    if isinstance(exprOrList, str):
        tables = listTables(db, exprOrList)
    else:
        tables = exprOrList

    reFrom = re.compile(fromStr)
    renameDesc = []
    for oldTable in tables:
        newTable = reFrom.sub(toStr, oldTable)
        existTables = listTables(db, oldTable)
        if len(existTables)!=0:
            renameDesc.append( [oldTable, newTable] )
            logging.debug("Renaming table %s -> %s" % (oldTable, newTable))

    parts = []
    for oldName, newName in renameDesc:
        parts.append("%s TO %s" % (oldName, newName))
    sqlCmd = "RENAME TABLE "+", ".join(parts)

    cmd = """hgsql %s -NB -e '%s'""" % (db, sqlCmd)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #20

0

ファイルを表示

def getImages(pdfName):
    """ returns a list of tuples 
    (imgId (int), isThumbnail (int), width, height, md5sum, PNGBinarydataBlob) extracted from pdfName.
    returns two tuples per image, one is the original, one is the thumbnail.
    """
    logging.debug("Extracting images from %s" % pdfName)
    tempDir = tempfile.mkdtemp(prefix="pdfimages", dir=pubConf.getTempDir())
    maxCommon.delOnExit(tempDir)
    outStem = join(tempDir, "img")
    cmd = "pdfimages %s %s" % (pdfName, outStem)
    maxCommon.runCommand(cmd)

    # convert to png
    data = []
    imgId = 0
    for fname in glob.glob(join(tempDir, "*.ppm")):
        logging.debug("got image %s" % fname)
        x, y = pbmSize(open(fname))
        if not looksInteresting(x, y):
            logging.debug("Image is too small or too long/wide")
            continue

        logging.debug("Loading image into sqlite")
        outFname = "%s.png" % fname
        cmd = "convert %s %s" % (fname, outFname)
        maxCommon.runCommand(cmd)
        
        pngBlob = open(outFname).read()
        md5Str = makeMd5(pngBlob)

        data.append( (imgId, 0, x, y, md5Str, pngBlob) )

        # make the thumbnail
        thumbFName = "%s.thumb.png" % fname
        # see https://www.smashingmagazine.com/2015/06/efficient-image-resizing-with-imagemagick/
        # but can't use -posterize 136 on centos6
        cmd = "convert -filter Triangle -define filter:support=2 -thumbnail %d " \
            "-unsharp 0.25x0.25+8+0.065 -dither None -quality 82 -define png:compression-filter=5 " \
            "-define png:compression-level=9 -define png:compression-strategy=1 " \
            "-define png:exclude-chunk=all -interlace none -colorspace " \
            "sRGB -strip %s %s" % (WIDTH, fname, thumbFName)
        maxCommon.runCommand(cmd)

        x, y = pngDimensions(thumbFName)
        pngBlob = open(thumbFName).read()
        md5Str = makeMd5(pngBlob)

        data.append( (imgId, 1, x, y, md5Str, pngBlob) )

        imgId += 1
            
    shutil.rmtree(tempDir)
    maxCommon.ignoreOnExit(tempDir)
    return data

コード例 #21

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: Moxikai/pubMunch

def truncateTable(db, table):
    logging.debug("Truncating table %s" % table)
    cmd = """hgsql %s -NB -e 'truncate table %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #22

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def truncateTable(db, table):
    logging.debug("Truncating table %s" % table)
    cmd = """hgsql %s -NB -e 'truncate table %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #23

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: strbean/pubMunch-BRCA

def dropTable(db, table):
    logging.debug("Dropping table %s" % table)
    cmd = """hgsql %s -NB -e 'drop table if exists %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #24

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def hgLoadSqlTab(db, tableName, sqlName, tabFname, optString=""):
    if isfile(tabFname):
        cmd = "hgLoadSqlTab %s %s %s %s %s" % (db, tableName, sqlName, tabFname, optString)
        maxCommon.runCommand(cmd, verbose=False)
    else:
        logging.warn("file %s not found" % tabFname)

コード例 #25

0

ファイルを表示

ファイル: maxMysql.py プロジェクト: joepickrell/pubMunch

def dropTable(db, table):
    logging.debug("Dropping table %s" % table)
    cmd = """hgsql %s -NB -e 'drop table if exists %s'""" % (db, table)
    maxCommon.runCommand(cmd, verbose=False)

コード例 #26

0

ファイルを表示

def getImages(pdfName):
    """ returns a list of tuples 
    (imgId (int), isThumbnail (int), width, height, md5sum, PNGBinarydataBlob) extracted from pdfName.
    returns two tuples per image, one is the original, one is the thumbnail.
    """
    loadBlacklist()

    head = open(pdfName).read(30)
    if "<html" in head or "<HTML" in head:
        logging.info("PDF %s is an HTML file, skipping" % pdfName)
        return None

    logging.debug("Extracting images from %s" % pdfName)
    tempDir = tempfile.mkdtemp(prefix="pdfimages", dir=pubConf.getTempDir())
    maxCommon.delOnExit(tempDir)
    outStem = join(tempDir, "img")
    cmd = "pdfimages %s %s" % (pdfName, outStem)
    maxCommon.runCommand(cmd)

    # convert to png
    data = []
    imgId = 0
    for fname in glob.glob(join(tempDir, "*.ppm")):
        logging.debug("got image %s" % fname)
        x, y = pbmSize(open(fname))
        if not looksInteresting(x, y):
            logging.debug("Image is too small or too long/wide")
            continue

        logging.debug("Loading image into sqlite")
        outFname = "%s.png" % fname
        cmd = "convert %s %s" % (fname, outFname)
        maxCommon.runCommand(cmd)

        pngBlob = open(outFname).read()
        md5Str = makeMd5(pngBlob)

        print "XX", md5Str, list(md5Blacklist)[:10]
        if md5Str in md5Blacklist:
            logging.debug("Image MD5 is blacklisted")
            continue

        data.append((imgId, 0, x, y, md5Str, pngBlob))

        # make the thumbnail
        thumbFName = "%s.thumb.png" % fname
        # see https://www.smashingmagazine.com/2015/06/efficient-image-resizing-with-imagemagick/
        # but can't use -posterize 136 on centos6
        cmd = "convert -filter Triangle -define filter:support=2 -thumbnail %d " \
            "-unsharp 0.25x0.25+8+0.065 -dither None -quality 82 -define png:compression-filter=5 " \
            "-define png:compression-level=9 -define png:compression-strategy=1 " \
            "-define png:exclude-chunk=all -interlace none -colorspace " \
            "sRGB -strip %s %s" % (WIDTH, fname, thumbFName)
        maxCommon.runCommand(cmd)

        x, y = pngDimensions(thumbFName)
        pngBlob = open(thumbFName).read()
        md5Str = makeMd5(pngBlob)

        data.append((imgId, 1, x, y, md5Str, pngBlob))

        imgId += 1

    shutil.rmtree(tempDir)
    maxCommon.ignoreOnExit(tempDir)
    return data