def insert_weibo_datas(weibo_datas): for data in weibo_datas: r = get_wb_by_mid(data.weibo_id) if not r: #data.weibo_cont = process(data.weibo_cont) db_session.add(data) db_session.commit()
def set_seed_home_crawled(uid): """ :param uid: user id :return: None """ seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=0, other_crawled=0, home_crawled=1) db_session.add(seed) else: seed.home_crawled = 1 db_session.commit()
def set_seed_home_crawled(uid): """ 这里适配了直接指定uid和从数据库seed_ids表中读uid的情况 :param uid: 用户id :return: None """ seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=0, other_crawled=0, home_crawled=1) db_session.add(seed) else: seed.home_crawled = 1 db_session.commit()
def set_seed_other_crawled(uid): """ update it if user id already exists, else insert :param uid: user id :return: None """ seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=1, other_crawled=1, home_crawled=1) db_session.add(seed) else: seed.other_crawled = 1 db_session.commit()
def set_seed_other_crawled(uid): """ 存在则更新,不存在则插入 :param uid: 用户id :return: None """ seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=1, other_crawled=1, home_crawled=1) db_session.add(seed) else: seed.other_crawled = 1 db_session.commit()
def set_seed_crawled(uid, result): """ :param uid: user id that is crawled :param result: crawling result, 1 stands for succeed, 2 stands for fail :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed and seed.is_crawled == 0: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_seed_home_crawled(uid, home_flag=1): """ :param uid: user id :return: None """ timestamp = int(time()) seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=0, other_crawled=0, home_crawled=home_flag, update_time=timestamp) db_session.add(seed) else: seed.home_crawled = home_flag seed.update_time = timestamp db_session.commit()
def set_seed_crawled(uid, result): """ 该表适用于用户抓取相关逻辑 :param uid: 被抓取用户id :param result: 抓取结果 :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_seed_crawled(uid, result): """ :param uid: user id that is crawled :param result: crawling result :return: None """ seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first() if seed: if seed.is_crawled == 0: seed.is_crawled = result else: seed = SeedIds(uid=uid, is_crawled=result) db_session.add(seed) db_session.commit()
def set_crawled(id, result): """ 该表适用于用户抓取相关逻辑 :param uid: 被抓取用户id :param result: 抓取结果 :return: None """ user = db_session.query(User).filter(User.id == id).first() if user: if user.is_crawled == 0: user.is_crawled = result else: user = User(id=id, is_crawled=result) db_session.add(user) db_session.commit()
def set_seed_other_crawled(uid): """ update it if user id already exists, else insert :param uid: user id :return: None """ timestamp = int(time()) seed = get_seed_by_id(uid) if seed is None: seed = SeedIds(uid=uid, is_crawled=1, other_crawled=1, home_crawled=1, update_time=timestamp) db_session.add(seed) else: seed.other_crawled = 1 seed.update_time = timestamp db_session.commit()
def save_repost(repost): db_session.add(repost) db_session.commit()
def insert_zhihu_data(zhihu_data): db_session.add(zhihu_data) db_session.commit()
def save_comment(comment): db_session.add(comment) db_session.commit()
def insert(wechat_data): # 存入数据的时候从更高一层判断是否会重复,不在该层做判断 db_session.add(wechat_data) db_session.commit()
def insert_wechat_datas(wechat_datas): for data in wechat_datas: r = get_by_url(data.url) if not r: db_session.add(data) db_session.commit()
def insert_weibo_data(weibo_data): # 存入数据的时候从更高一层判断是否会重复,不在该层做判断 #weibo_data.weibo_cont = process(weibo_data.weibo_cont) db_session.add(weibo_data) db_session.commit()
def save_reply(reply): db_session.add(reply) db_session.commit()
def save_user(user): db_session.add(user) db_session.commit()
def insert_weibo_datas(weibo_datas): for data in weibo_datas: r = get_wb_by_mid(data.weibo_id) if not r: db_session.add(data) db_session.commit()
def insert_weibo_data(weibo_data): db_session.add(weibo_data) db_session.commit()
def insert_zhihu_datas(zhihu_datas): for data in zhihu_datas: r = get_zh_by_mid(data.zhihu_id) if not r: db_session.add(data) db_session.commit()
def insert_keyword(keyword): db_session.add(keyword) db_session.commit()
def insert_weibo_data(weibo_data): # 存入数据的时候从更高一层判断是否会重复,不在该层做判断 db_session.add(weibo_data) db_session.commit()
def insert_keyword_wbid(keyword_id, wbid): keyword_wbdata = KeywordsWbdata() keyword_wbdata.wb_id = wbid keyword_wbdata.keyword_id = keyword_id db_session.add(keyword_wbdata) db_session.commit()
def insert_weibo_pics(weibo_pics): for pic in weibo_pics: r = get_pic_by_url(pic.pic_url, pic.url_hash) if not r: db_session.add(pic) db_session.commit()