def get_wb_by_mid(mid): """ :param mid: 微博id :return: """ return db_session.query(WeiboData).filter( WeiboData.weibo_id == mid).first()
def get_pic_by_url(url, url_hash): r = db_session.query(WeiboPic).filter( WeiboPic.url_hash == url_hash).first() if r: if r.pic_url == url: return True return False
def get_repost_by_rid(rid): """ 根据转发微博id获取该转发微博信息 :param rid: 转发微博id :return: """ return db_session.query(WeiboRepost).filter(WeiboRepost.weibo_id == rid).first()
def freeze_account(name, rs): """ :param name: 账户名 :param rs: 0表示被封,1表示正常,2表示账号密码不匹配 :return: """ account = db_session.query(LoginInfo).filter(LoginInfo.name == name).first() account.enable = rs db_session.commit()
def freeze_account(name, rs): """ :param name: login account :param rs: 0 stands for banned,1 stands for normal,2 stands for name or password is invalid :return: """ account = db_session.query(LoginInfo).filter(LoginInfo.name == name).first() account.enable = rs db_session.commit()
def set_seed_crawled(uid, result): """ :param uid: user id that is crawled :param result: crawling result, 1 stands for succeed, 2 stands for fail :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed and seed.is_crawled == 0: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_seed_crawled(uid, result): """ :param uid: user id that is crawled :param result: crawling result :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed: if seed.is_crawled == 0: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_seed_crawled(uid, result): """ 该表适用于用户抓取相关逻辑 :param uid: 被抓取用户id :param result: 抓取结果 :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_crawled(id, result): """ 该表适用于用户抓取相关逻辑 :param uid: 被抓取用户id :param result: 抓取结果 :return: None """ user = db_session.query(User).filter(User.id == id).first() if user: if user.is_crawled == 0: user.is_crawled = result else: user = User(id=id, is_crawled=result) db_session.add(user) db_session.commit()
def get_ids_by_home_flag_random(status = 0, num = 100): return db_session.query(SeedIds).filter(SeedIds.home_crawled == status).order_by(func.random()).limit(num).all()
def get_search_keywords(): return db_session.query(KeyWords).filter(text('enable=1')).all()
def get_login_info(): return db_session.query(LoginInfo.name, LoginInfo.password, LoginInfo.enable).\ filter(text('enable=1')).all()
def get_user_by_uid(uid): return db_session.query(User).filter(User.uid == uid).first()
def get_by_alias(alias): return db_session.query(User).filter(User.alias == alias).first()
def get_home_ids(): """ Get all user id who's home pages need to be crawled :return: user ids """ return db_session.query(SeedIds.uid).filter(text('home_crawled=0')).all()
def get_weibo_repost_not_crawled(): return db_session.query(WeiboData.weibo_id, WeiboData.uid).filter(text('repost_crawled=0')).all()
def get_weibo_comment_not_crawled(): return db_session.query(WeiboData.weibo_id).filter(text('comment_crawled=0')).all()
def get_wb_by_mid(mid): return db_session.query(WeiboData).filter(WeiboData.weibo_id == mid).first()
def get_seed_ids(): return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
def get_zhihu_answer_comment_not_crawled(): return db_session.query(ZhihuComment.comment_id).filter( text('comment_crawled=0')).order_by(desc(ZhihuComment.id)).all()
def get_comment_by_id(cid): return db_session.query(ZhihuComment).filter( ZhihuComment.comment_id == cid).first()
def get_seed(): """ Get all user id to be crawled :return: user ids """ return db_session.query(SeedIds).filter(text('status=0')).all()
def get_repost_by_rid(rid): return db_session.query(WeiboRepost).filter(WeiboRepost.weibo_id == rid).first()
def get_by_id(id): return db_session.query(User).filter(User.id == id).first()
def freeze_account(name): account = db_session.query(LoginInfo).filter( LoginInfo.name == name).first() account.enable = 0 db_session.commit()
def get_user_by_nickname(nickname): return db_session.query(User).filter(User.nickname == nickname).first()
def get_seed_ids(): """ Get all user id to be crawled :return: user ids """ return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
def get_is_monitored(): """ 获取所有需要监控的用户 :return: """ return db_session.query(User).filter(text('is_monitored=1')).all()
def get_seed_by_id(uid): return db_session.query(SeedIds).filter(SeedIds.uid == uid).first()
def set_useless_keyword(keyword): search_word = db_session.query(KeyWords).filter( KeyWords.keyword == keyword).first() search_word.enable = 0 db_session.commit()
def get_seed_ids(): """ 获取所有个人信息需要被抓取的用户id :return: """ return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
def get_home_ids(status = 0, num = 100): """ Get all user id who's home pages need to be crawled :return: user ids """ return db_session.query(SeedIds).filter(SeedIds.home_crawled == status).limit(num).all()
def get_by_url(url): """ :param url: url :return: """ return db_session.query(WeChatData).filter(WeChatData.url == url).first()
def get_weibo_comment_not_crawled(): return db_session.query(WeiboData.weibo_id).filter( text('comment_crawled=0')).all()
def get_comment_by_id(cid): return db_session.query(WeiboComment).filter(WeiboComment.comment_id == cid).first()
def get_home_ids(): """ 获取所有主页需要被抓取的用户id :return: """ return db_session.query(SeedIds.uid).filter(text('home_crawled=0')).all()