Example #1
0
def job_news_court_patent(p_name, skip=2):
    pt = Partage()
    coll = db[db_name][table_name_insert]
    # for i, dc in enumerate(xlsx_infos()):
    for i, dc in enumerate(comp_infos()):
        print dc
        if i <= skip:
            continue

        if 'id' not in dc:
            log(log_normal, '%s company not id ' % dc['name'].encode('u8'),
                'id:%d' % i)
            raise KeyError("`d` dictionary hasn't key `id`")

        name = dc['name'].encode('u8')
        result = pt.main((name, ))
        db_insert(
            coll, {
                'id': dc['id'],
                'date': datetime.datetime.now(),
                'job': result[0],
                'news': result[1],
                'court': result[2],
                'tm': result[3],
                'name': name,
                'web': dc['web']
            })
        print p_name, '=>', 'id:', i, name, '\n', result, '\n\n'
        log(log_normal, strftime('%Y-%m-%d %H:%M:%S %A'), 'id: %d' % i, name,
            str(result))
        time.sleep(randint(3, 8))
    else:
        log(log_normal, 'all ok!')
    db.close()
Example #2
0
def data_gather_copy():
    from_coll = db[db_name][table_name_insert]
    copy_coll = db[db_name][table_gather_copy]
    for doc in from_coll.find().sort([('_id', -1)]):
        doc.pop('_id', None)
        if not copy_coll.find_one(doc):
            copy_coll.insert(doc)
    db.close()
Example #3
0
def comp_infos():
    need = {'base.name': 1, 'base.web': 1}
    coll_get = db[db_name][table_name_source]
    all_infos = []
    for d in coll_get.find({'base.typ': '0'}, need).sort([('_id', -1)]):
        if not d['base'].get('name'):
            continue

        if not d['base'].get('web'):
            d['base']['web'] = ''
        all_infos.append({'id': d['_id'], 'name': d['base']['name'], 'web': d['base']['web']})
    db.close()
    return iter(all_infos)
Example #4
0
def alexa_rank(p_name, skip=2):
    pt = Partage()
    coll = db[db_name][table_name_insert]
    # for k, d in enumerate(xlsx_infos()):
    for k, d in enumerate(comp_infos()):
        if k <= skip:
            continue

        if 'id' not in d:
            log(log_alexa, '%s company not id ' % d['name'].encode('u8'),
                'id:%d' % k)
            raise KeyError("`d` dictionary hasn't key `id`")

        rets, name, web = {
            'rank': '0',
            'ip': '0',
            'pv': '0'
        }, d['name'].encode('u8'), d['web'].encode('u8')
        if web:
            rets = pt.aleax(web)
            t = randint(50, 70)
            print p_name, '=>', k, name, web, '\n', rets, '\nnow sleeping %d seconds!\n' % t
            log(log_alexa, strftime('%Y-%m-%d %H:%M:%S %A'), name,
                'id: %d' % k, web, str(rets))
            time.sleep(t)
        db_insert(
            coll, {
                'id': d['id'],
                'date': datetime.datetime.now(),
                'rank': rets.get('rank'),
                'ip': rets.get('ip'),
                'pv': rets.get('pv')
            })
    else:
        log(log_alexa, 'all ok !')
    db.close()
Example #5
0
def weibo(p_name, skip=-1):
    wb = WeiboCrawler(account, password, cookie_file)
    if wb.login():
        coll = db[db_name][table_name_insert]
        # for k, d in enumerate(xlsx_infos()):
        for k, d in enumerate(comp_infos()):
            if k <= skip:
                continue

            if 'id' not in d:
                log(log_weibo, '%s company not id ' % d['name'].encode('u8'))
                raise KeyError("`d` dictionary hasn't key `id`")
            if k % 200 == 0:
                wb.login()

            ret = wb.main(d['name'].encode('u8'))
            db_insert(
                coll, {
                    'date': datetime.now(),
                    'weibo': ret[0][1],
                    'fans': ret[0][0],
                    'talk': ret[1],
                    'id': d['id']
                })
            t = randint(40, 60)
            print p_name, '=>', k + 1, d[
                'name'], '\n', ret, '\nnow sleeping %d seconds!\n' % t
            time.sleep(t)
            log(log_weibo, strftime('%Y-%m-%d %H:%M:%S %A'), 'id: %d' % k,
                d['name'].encode('u8'), str(ret), 'sleeping %d seconds!' % t)
        else:
            log(log_weibo, 'all ok')
    else:
        log(log_weibo, 'login error!')
    data_gather_copy()
    db.close()