def main(): fileName = arsutils.getRemoveCmdArg("-file") zipCode = arsutils.getRemoveCmdArg("-reverseZip") if None == fileName and None == zipCode: usage() sys.exit(0) if 1 != len(sys.argv): usage() sys.exit(0) if None != zipCode: retrieveYpReverseAreaCode(zipCode) return fo = open(fileName, "rb") htmlTxt = fo.read() fo.close() (resultType, resultBody) = reversePhoneLookup(htmlTxt, fileName, fDebug=True) if MODULE_DOWN == resultType: print "module down" if PARSING_FAILED == resultType: print "parsing failed" if RESULTS_DATA == resultType: print "got BOXOFFICE" print udfPrettyPrint(resultBody)
def main(): itemId = arsutils.getRemoveCmdArg("-item") if None == itemId: itemId = arsutils.getRemoveCmdArg("--item") if itemId: (resultType, resultBody) = getLyricsItem(itemId, None, dbgLevel=1) else: if 2 != len(sys.argv): usage() sys.exit(0) arg = sys.argv[1] print "arg=%s" % arg argParts = arg.split(";") if 5 != len(argParts): print "len(argParts) = %d, not 5" % len(argParts) usage() sys.exit(0) artist,title,album,composer,fullText = argParts (resultType, resultBody) = getLyricsSearch(artist, title, album, composer, fullText, None, dbgLevel=1) if MODULE_DOWN == resultType: print "module down" if UNKNOWN_FORMAT == resultType: print "unknown format" if NO_RESULTS == resultType: print "no results" if LYRICS_ITEM == resultType: print "got LYRICS_ITEM" #print udfPrettyPrint(resultBody) if LYRICS_SEARCH == resultType: print "got LYRICS_SEARCH" print udfPrettyPrint(resultBody)
def main(): global g_dbName dbs = iPediaServer.getIpediaDbList() if 0==len(dbs): print "No databases available" dbs.sort() fListDbs = arsutils.fDetectRemoveCmdFlag("-listdbs") if fListDbs: for dbName in dbs: print dbName sys.exit(0) dbName=arsutils.getRemoveCmdArg("-db") if dbName: if dbName in dbs: print "Using database '%s'" % dbName else: print "Database '%s' doesn't exist" % dbName print "Available databases:" for name in dbs: print " %s" % name sys.exit(0) else: dbName=dbs[-1] # use the latest database print "Using database '%s'" % dbName g_dbName = dbName validateRedirects() closeConn()
def main(): global g_dbName dbs = iPediaServer.getIpediaDbList() if 0 == len(dbs): print "No databases available" dbs.sort() fListDbs = arsutils.fDetectRemoveCmdFlag("-listdbs") if fListDbs: for dbName in dbs: print dbName sys.exit(0) dbName = arsutils.getRemoveCmdArg("-db") if dbName: if dbName in dbs: print "Using database '%s'" % dbName else: print "Database '%s' doesn't exist" % dbName print "Available databases:" for name in dbs: print " %s" % name sys.exit(0) else: dbName = dbs[-1] # use the latest database print "Using database '%s'" % dbName g_dbName = dbName validateRedirects() closeConn()
def main(): # _test() # return spider = fDetectRemoveCmdFlag("-spider") reindex = fDetectRemoveCmdFlag("-reindex") update = fDetectRemoveCmdFlag("-update") force = fDetectRemoveCmdFlag("-force") test_query = getRemoveCmdArg("-test") # undocumented flags just for testing ;) backup = fDetectRemoveCmdFlag("--backup-index") use_old_index = fDetectRemoveCmdFlag("--update-old-index") update_index = fDetectRemoveCmdFlag("--force-index-update") bump_version = fDetectRemoveCmdFlag("--bump_version") if test_query: formats = FORMATS_ALL if -1 != test_query.find(";"): test_query, formats = [s.strip() for s in test_query.split(";", 1)] type = SEARCH_ANY if -1 != test_query.find(":"): t, q = [s.strip() for s in test_query.split(":", 1)] if t in SEARCH_TYPES: type, test_query = t, q for doc in _find_proxy(test_query, formats, type): d = _doc_to_tuple(doc) if 0 != len(d[1]): print "%s, \"%s (%s)\"" % (d[2], d[0], d[1]) else: print "%s, \"%s\"" % (d[2], d[0]) return if spider and reindex: update = True if update: update_all(force, force_index_update = update_index, update_old_index = use_old_index ) return if spider: spider_all(force) return if reindex: reindex_all(use_old_index ) return if backup: _backup_index() return if bump_version: _bump_version() return print """ usage: ebooks.py (-spider | -reindex | -update | -test "phrase") [-force] Options: -spider - performs only incremental spidering of all data -reindex - only reindexes existing spidered data. -update - performs spidering and reindexing (only if fresh data was spidered). -force - (only with -spider or -update) discards previously spidered data prior to spidering. -test - tests searching in the index. """ return 1
def main(): itemId = arsutils.getRemoveCmdArg("-item") if None == itemId: itemId = arsutils.getRemoveCmdArg("--item") if itemId: (resultType, resultBody) = getLyricsItem(itemId, None, dbgLevel=1) else: if 2 != len(sys.argv): usage() sys.exit(0) arg = sys.argv[1] print "arg=%s" % arg argParts = arg.split(";") if 5 != len(argParts): print "len(argParts) = %d, not 5" % len(argParts) usage() sys.exit(0) artist, title, album, composer, fullText = argParts (resultType, resultBody) = getLyricsSearch(artist, title, album, composer, fullText, None, dbgLevel=1) if MODULE_DOWN == resultType: print "module down" if UNKNOWN_FORMAT == resultType: print "unknown format" if NO_RESULTS == resultType: print "no results" if LYRICS_ITEM == resultType: print "got LYRICS_ITEM" #print udfPrettyPrint(resultBody) if LYRICS_SEARCH == resultType: print "got LYRICS_SEARCH" print udfPrettyPrint(resultBody)
def main(): fDumpCache = arsutils.fDetectRemoveCmdFlag("-dump-cache") if not fDumpCache: fDumpCache = arsutils.fDetectRemoveCmdFlag("--dump-cache") if fDumpCache: dumpCache() sys.exit(0) fileName = arsutils.getRemoveCmdArg("-file") if None != fileName: if 1 != len(sys.argv): usage() print sys.argv sys.exit(0) fo = open(fileName, "rb") htmlTxt = fo.read() fo.close() (resultType, resultBody) = parseGas(htmlTxt, url=fileName, dbgLevel=1) else: if 2 != len(sys.argv): usage() sys.exit(0) zipCode = sys.argv[1] print "zipCode: %s" % zipCode (resultType, resultBody) = getGasPricesForZip(cookielib.CookieJar(), zipCode, dbgLevel=1) if RETRIEVE_FAILED == resultType: print "retrieve failed" if PARSING_FAILED == resultType: htmlTxt = resultBody[1] #print htmlTxt print "parsing failed" if NO_RESULTS == resultType: print "no results" if LOCATION_UNKNOWN == resultType: print "location unknown" if GAS_DATA == resultType: print udfPrettyPrint(resultBody)
elif "KJKLAP1" == computerName: g_serverToUse = "kjk" elif "TLAP" == computerName: g_serverToUse = "kjk" elif "RABBAN" == computerName: g_serverToUse = "andrzej" elif "GIZMO" == computerName: g_serverToUse = "pc" else: print "Don't know the server for computer %s" % computerName sys.exit(0) if __name__ == "__main__": serverToUse = None serverToUse = arsutils.getRemoveCmdArg("-server") if None == serverToUse: detectAndSetServerToUse() else: if not g_serverList.has_key(serverToUse): print "server %s is not known. Known servers:" % serverToUse for serverName in g_serverList.keys(): print " %s" % serverName sys.exit(0) else: g_serverToUse = serverToUse if arsutils.fDetectRemoveCmdFlag("-raw"): g_showRawOutput = True printUsedServer()
txt = article.getTxt() converted = articleconvert.convertArticle(title,txt) print "TITLE: %s" % title print "ORIGINAL: %s" % txt print "CONVERTED: %s" % converted return if __name__=="__main__": fRandom = arsutils.fDetectRemoveCmdFlag("-random") fDump = arsutils.fDetectRemoveCmdFlag("-dump") if fDump and fRandom: print "Can't use -dump and -random at the same time" usageAndExit() title = arsutils.getRemoveCmdArg("-title") fSave = arsutils.fDetectRemoveCmdFlag("-save") fForceConvert = arsutils.fDetectRemoveCmdFlag("-forceconvert") if title and fRandom: print "Can't use -title and -random at the same time" usageAndExit() if not title and not fRandom: print "Have to provide either -title or -random" usageAndExit() # now we should only have file name if len(sys.argv) != 2: print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
if count % 20000 == 0: print "processed %d articles, found %d small" % (count,len(articles)) print "Articles without comma in orig: %d" % countNoComma avgSize = float(totalSizeNoComma)/float(countNoComma) print "Average size: %.2f" % avgSize return articles def dumpArticles(fileName,articles): fo = open(fileName, "wb") for article in articles: fo.write("!'%s'\n" % article.getTitle().strip()) fo.write("'%s'\n" % (article.getText().strip()) ) fo.close() if __name__=="__main__": size = arsutils.getRemoveCmdArg("-size") if None == size: size = DEFAULT_SIZE else: size = int(size) # now we should only have file name if len(sys.argv) != 2: print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump" usageAndExit() fileName = sys.argv[1] fileNameOutConv = wikipediasql.getSmallConverterFileName(fileName) fileNameOutOrig = wikipediasql.getSmallOrigFileName(fileName) articles = findConvertedArticlesUnderThreshold(fileName,size) dumpArticles(fileNameOutConv,articles)
print "Articles without comma in orig: %d" % countNoComma avgSize = float(totalSizeNoComma) / float(countNoComma) print "Average size: %.2f" % avgSize return articles def dumpArticles(fileName, articles): fo = open(fileName, "wb") for article in articles: fo.write("!'%s'\n" % article.getTitle().strip()) fo.write("'%s'\n" % (article.getText().strip())) fo.close() if __name__ == "__main__": size = arsutils.getRemoveCmdArg("-size") if None == size: size = DEFAULT_SIZE else: size = int(size) # now we should only have file name if len(sys.argv) != 2: print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump" usageAndExit() fileName = sys.argv[1] fileNameOutConv = wikipediasql.getSmallConverterFileName(fileName) fileNameOutOrig = wikipediasql.getSmallOrigFileName(fileName) articles = findConvertedArticlesUnderThreshold(fileName, size) dumpArticles(fileNameOutConv, articles)
converted = articleconvert.convertArticle(title, txt) print "TITLE: %s" % title print "ORIGINAL: %s" % txt print "CONVERTED: %s" % converted return if __name__ == "__main__": fRandom = arsutils.fDetectRemoveCmdFlag("-random") fDump = arsutils.fDetectRemoveCmdFlag("-dump") if fDump and fRandom: print "Can't use -dump and -random at the same time" usageAndExit() title = arsutils.getRemoveCmdArg("-title") fSave = arsutils.fDetectRemoveCmdFlag("-save") fForceConvert = arsutils.fDetectRemoveCmdFlag("-forceconvert") if title and fRandom: print "Can't use -title and -random at the same time" usageAndExit() if not title and not fRandom: print "Have to provide either -title or -random" usageAndExit() # now we should only have file name if len(sys.argv) != 2: print "Have to provide *.sql or *.sql.bz2 file name with wikipedia dump"
g_serverToUse = "kjk" elif "KJKLAP1" == computerName: g_serverToUse = "kjk" elif "TLAP" == computerName: g_serverToUse = "kjk" elif "RABBAN" == computerName: g_serverToUse = "andrzej" elif "GIZMO" == computerName: g_serverToUse = "pc" else: print "Don't know the server for computer %s" % computerName sys.exit(0) if __name__=="__main__": serverToUse = None serverToUse = arsutils.getRemoveCmdArg("-server") if None == serverToUse: detectAndSetServerToUse() else: if not g_serverList.has_key(serverToUse): print "server %s is not known. Known servers:" % serverToUse for serverName in g_serverList.keys(): print " %s" % serverName sys.exit(0) else: g_serverToUse = serverToUse if arsutils.fDetectRemoveCmdFlag("-raw"): g_showRawOutput = True printUsedServer()