Beispiel #1
0
def gossip_list_page(page=0):
    if page <= 0:
        abort(404)
    total = Gossip.select().count()
    total_page = int(math.ceil(total * 1.0 / config.ITEMS_PER_PAGE))
    gossip_list = Gossip.select().order_by(Gossip.t.desc(),
                                           Gossip.id.desc()).paginate(
                                               page, config.ITEMS_PER_PAGE)
    return render_template("gossip_list.html",
                           page=page,
                           total_page=total_page,
                           gossip_list=gossip_list)
Beispiel #2
0
def load_gossip_page(page, uid=crawler.uid):
    param = {
        "id": uid,
        "page": page,
        "guest": crawler.uid,
    }
    r = crawler.get_json(config.GOSSIP_URL, params=param, method='POST')

    for c in r['array']:
        local_pic = get_image(
            c['tinyUrl']) if 'tinyUrl' in c else config.DEFAULT_HEAD_PIC

        gossip = {
            'id': c['id'],
            'uid': uid,
            't': datetime.strptime(c['time'], "%Y-%m-%d %H:%M"),
            'guestId': c['guestId'],
            'guestName': c['guestName'],
            'headPic': local_pic,  # 居然保存的是当时的头像,这里不能往 User 表里塞了
            'attachSnap': get_image(c.get('headUrl', '')),
            'attachPic': get_image(c.get('largeUrl', '')),
            'whisper': c['whisper'] == 'true',
            'wap': c['wap'] == 'true',
            'gift': c['giftImg'] if c['gift'] == 'true' else '',
            'content': ''
        }

        # 内容出现在好几个地方,body, filterdBody, filterOriginBody
        # filterOriginBody 是连表情都没转义的
        # filterdBody 加了表情转义,但也加了那个坑爹的 <span style="color:#000000">
        #     还有手机发布的 <xiaonei_wap/>,和送礼物带的 <xiaonei_gift />

        body = c['filterdBody'].replace('\n',
                                        '<br>').replace('<xiaonei_wap/>', '')
        if gossip['gift']:
            body = re.sub(r'<xiaonei_gift img="http:[\.a-z0-9/]*"/>', '', body)
        patt = normal_pattern.findall(body)
        if not patt:
            try:
                logger.info(u'parse gossip body failed:\n  {body}'.format(
                    body=c["filterdBody"]))
            except UnicodeEncodeError:
                logger.info(
                    'parse gossip body failed, check origin filterBody')
        else:
            gossip['content'] = patt[0]

        Gossip.insert(**gossip).on_conflict('replace').execute()

    count = len(r["array"])
    logger.info('  crawled {count} gossip on page {page}'.format(count=count,
                                                                 page=page))
    return count
Beispiel #3
0
def load_gossip_page(uid=crawler.uid, offset=0):
    r = crawler.get_json(config.GOSSIP_URL,
                         json_=get_gossip_payload(uid, offset),
                         method="POST")

    for c in r["data"]["gossipList"]:
        local_pic = get_image(c.get("senderHeadUrl", config.DEFAULT_HEAD_PIC))

        ts = datetime.strptime(c["time"], "%Y-%m-%dT%H:%M:%S.%f%z").timestamp()

        gossip = {
            "id": c["id"],
            "uid": uid,
            "t": datetime.fromtimestamp(
                ts),  # for some reason, a conversion is needed
            "guestId": c["sender"],
            "guestName": c["senderName"],
            "headPic": local_pic,  # 居然保存的是当时的头像,这里不能往 User 表里塞了
            "attachSnap": get_image(c.get("headUrl", "")),
            "attachPic": get_image(c.get("largeUrl", "")),
            "whisper": "xiaonei_only_to_me" in c["body"],
            "wap": False,  # c['wap'] == 'true',
            "gift": "",  # c['giftImg'] if c['gift'] == 'true' else '',
            "content": "",
        }

        body = c["body"]
        # remove gift
        body = re.sub(r'<xiaonei_gift img="http:[\.a-z0-9/]*"/>', "", body)
        # remove xiaonei_only_to_me
        body = re.sub(r"<xiaonei_only_to_me/><Toid/>\d+$", "", body)

        gossip["content"] = body

        Gossip.insert(**gossip).on_conflict("replace").execute()

    count = len(r["data"]["gossipList"])
    logger.info("  crawled {count} gossip on page {page}".format(count=count,
                                                                 page=offset //
                                                                 10))
    if offset + count == r["data"]["count"]:
        return count, -1
    return count, offset + count
Beispiel #4
0
def gossip_list_page(uid, page=1):
    if page <= 0:
        abort(404)
    total_page = int(math.ceil(g.user['gossip'] * 1.0 / config.ITEMS_PER_PAGE))
    gossip_list = list(Gossip.select().where(Gossip.uid == uid).order_by(
        Gossip.t.desc(),
        Gossip.id.desc()).paginate(page, config.ITEMS_PER_PAGE).dicts())
    return render_template("gossip_list.html",
                           page=page,
                           total_page=total_page,
                           gossip_list=gossip_list)
Beispiel #5
0
def update_fetch_info(uid):
    from models import database, FetchedUser, User, Status, Gossip, Album, Photo, Blog

    with database:
        user = User.get_or_none(User.uid == uid)
        if not user:
            raise KeyError("no such user")

        fetched_info = model_to_dict(user)
        fetched_info.update(
            status=Status.select().where(Status.uid == uid).count(),
            gossip=Gossip.select().where(Gossip.uid == uid).count(),
            album=Album.select().where(Album.uid == uid).count(),
            photo=Photo.select().where(Photo.uid == uid).count(),
            blog=Blog.select().where(Blog.uid == uid).count(),
        )

        FetchedUser.insert(**fetched_info).on_conflict('replace').execute()

        print('update fetched info {fetched_info}'.format(
            fetched_info=fetched_info))

    return True