Exemple #1
0
def tycFromPage():

    idbloom = getBloom()

    homeUrl = 'http://www.tianyancha.com/'
    driver = getWebDriver()
    driver.get(homeUrl)
Exemple #2
0
def getQichachaInvestDigests():
    idbloom = getBloom()
    conn, csor = getConnCsor()
    csor.execute('select uid from com_invest')
    ids = csor.fetchall()
    [idbloom.add(mid[0]) for mid in ids]
    # if ids[0][0] in idbloom:
    print 'load exists ids ok'

    return idbloom
Exemple #3
0
def getQichachaDigests():
    idbloom = loadBloomFromFile('qichachaUIDs')
    if idbloom:
        print 'load bloom from file succ, no need load from db'
        # return idbloom
    else:
        print 'no dump bloom file,  load from db'
        idbloom = getBloom(2000 * 10000)
        # idbloom = getBloom()
        conn, csor = getConnCsor()
        csor.execute('select id from com_base_copy')
        # csor.execute('select id from com_base_copy limit 10')
        ids = csor.fetchall()
        [idbloom.add(mid[0]) for mid in ids]
        # if ids[0][0] in idbloom:
        print 'load exists ids ok, generate dump bloom file'
        dumpBloomToFile(idbloom, fileName='qichachaUIDs')
    return idbloom