def getFilePathByUid(uid): # 初始化weibo weibo = Weibo(uid) weibo.get_user_info() # 获取用户信息 filePath = weibo.get_filepath("csv") return filePath
def run(self): while 1: ret = self.db_2.find() for i in ret: uid = int(i['star_id']) filter = 1 # 值为0表示爬取全部微博(原创微博+转发微博),值为1表示只爬取原创微博 pic_download = 1 # 值为0代表不下载微博原始图片,1代表下载微博原始图片 wb = Weibo(uid, filter, pic_download) pn = 1 while 1: wb.get_one_page(pn) time.sleep(random.randint(6, 10)) pn += 1 if pn == 3: break try: for obj in wb.weibo: obj['_id'] = md5(obj['screen_name'] + obj['text'] + obj['created_at']) self.db_1.insert_one(obj) print(obj) except Exception: break wb.weibo = [] time.sleep(60)
def main_handler(event, context): logger.info("start main handler") if "requestContext" not in event.keys(): return {"code": 410, "errorMsg": "event is not come from api gateway"} weibo = Weibo(WeiboRef, WeiboCookie) weibo.send(event['body']) return ("o")
def main(): try: # 爬取关注列表的user_id user_id = int('Your id') cookie = {'Cookie': 'Your cookie'} # 将your cookie替换成自己的cookie fw = Follow(user_id, cookie) # 调用Weibo类,创建微博实例wb fw.get_follow_list() # 获取关注列表 print(fw.follow_list) # 输出关注列表的uid print(fw.follow_name_list) # 输出关注列表的昵称 filter = 1 # 值为0表示爬取全部微博(原创微博+转发微博),值为1表示只爬取原创微博 since_date = '2018-01-01' # 起始时间,即爬取发布日期从该值到现在的微博,形式为yyyy-mm-dd """mongodb_write值为0代表不将结果写入MongoDB数据库,1代表写入;若要写入MongoDB数据库, 请先安装MongoDB数据库和pymongo,pymongo安装方法为命令行运行:pip install pymongo""" mongodb_write = 0 """mysql_write值为0代表不将结果写入MySQL数据库,1代表写入;若要写入MySQL数据库, 请先安装MySQL数据库和pymysql,pymysql安装方法为命令行运行:pip install pymysql""" mysql_write = 0 pic_download = 1 # 值为0代表不下载微博原始图片,1代表下载微博原始图片 video_download = 0 # 值为0代表不下载微博视频,1代表下载微博视频 for user in fw.follow_list: # 爬每个人的微博 new_list = [user] wb = Weibo(filter, since_date, mongodb_write, mysql_write, pic_download, video_download) wb.start(new_list) except Exception as e: print('Error: ', e) traceback.print_exc()
def __init__(self, token=None, keyWord=None, weiboRef=None, weiboCookie=None, weiboSCF=None, weixinToken=None): self.useDingTalk = False self.useWeibo = False self.useSCF = False self.useWeixin = False if token and keyWord: self.useDingTalk = True self.d = DingtalkChatbot( 'https://oapi.dingtalk.com/robot/send?access_token=%s' % token) self.keyWord = keyWord if weiboRef and weiboCookie: self.useWeibo = True self.weibo = Weibo(weiboRef, weiboCookie) if weiboSCF: self.useSCF = True self.weiboSCF = weiboSCF if weixinToken: self.useWeixin = True self.wxurl = 'https://sc.ftqq.com/%s.send' % weixinToken
def main(): ins = Ins() weibo = Weibo() ins.user = "******" while True: ins.checkUpdate() time.sleep(5) weibo.checkNewDirs() time.sleep(1200)
def postSingleFilm(self, url=None): status = self.parse_film_info(url) if url else self.pick_a_film( self.get_bangdan()) status += "\n#电影# #电视剧#" print(status) if status: w = Weibo() w.login() w.postStatus(status, [self.cover]) w.safeWaterFeeds() w.tearDown()
def load_session(self): self.weibo = None try: with open(SESSION_PATH, 'rb') as f: self.weibo = pickle.load(f) except OSError: # 打开文件错误 pass except pickle.PickleError: logger.exception('反序列化Weibo时出错:') if self.weibo is None: self.weibo = Weibo()
def _on_cookie_added(self, cookie): if cookie.name() == b'SUB': try: cookie_sub = cookie.value().data().decode() cookies = {'SUB': cookie_sub} if Weibo(cookies).is_login(): self._weibo_cookies = cookies self.accept() else: logger.debug('无效的Cookie:%s', cookie_sub) except: logger.exception('获取登录状态时出错:')
def reducer_get_user_info(self, key, _): # 避免代理失效 for _ in range(3): try: wb = Weibo([key]) wb.start() logger.info(wb.user.json()) yield key, wb.user.json() break except: logger.error(f"{key} eroor") else: yield key, "fail"
def run(): weibo = Weibo() # 热搜 searches, resp = weibo.get_hot_search() if resp: save_raw_content(resp.text, 'hot-search') # 话题榜 topics, resp = weibo.get_hot_topic() if resp: save_raw_content(resp.text, 'hot-topic') # 最新数据 readme = generate_readme(searches, topics) save_readme(readme) # 归档 archiveMd = generate_archive_md(searches, topics) save_archive_md(archiveMd)
def update_data(): # old_tweet = OwlTweet() nga = Nga() weibo = Weibo() object_data = { 'Transfer': { 'data': [] + weibo.new_items + nga.new_items, 'id_key': 'id' }, } for name, info in object_data.items(): data_objects = [] LEANCLOUD_OBJECT_DATA = load_json(os.path.join('leancloud_data', name)) data_dict = {} for item in info['data']: if data_changed( LEANCLOUD_OBJECT_DATA.get( object_id_key(name, item.get(info['id_key'])), {}), item): if info['id_key'] not in item: continue data_objects.append( leancloud_object(name, item, info['id_key'])) data_dict[item.get(info['id_key'])] = item print(name + " Total Count:" + str(len(info['data']))) print(name + " Changed Count:" + str(len(data_objects))) i = 0 batch_size = 20 while True: if len(data_objects[i:i + batch_size]) > 0: leancloud.Object.save_all(data_objects[i:i + batch_size]) i += batch_size else: break for data_object in data_objects: OBJECT_ID_MAP[object_id_key(name, data_object.get( info['id_key']))] = data_object.id LEANCLOUD_OBJECT_DATA[object_id_key( name, data_object.get(info['id_key']))] = data_dict[data_object.get( info['id_key'])] write_json('local_config/object_id_map.json', OBJECT_ID_MAP) write_json(os.path.join('leancloud_data', name), LEANCLOUD_OBJECT_DATA)
def main(): queue = Message(config.Redis, config.RedisKey) weibo = Weibo(config.ChromeDriver, callback) while True: try: msg = queue.getMessage() if msg is not None: msg = msg.decode() if msg == 'debug': weibo.debug("debug") continue log.info("检测到消息,准备发送") weibo.postWeibo(msg) except Exception: queue.reAddMessage(msg) weibo.debug("exception") log.error("error: %s", traceback.format_exc()) weibo.browser.refresh() time.sleep(10)
def login(username, passwd): ''' 登录 ''' weibo = Weibo(username, passwd) login = weibo.login() if not login[0]: try: print login[1] except: print login[1].encode('utf-8') sys.exit(1) try: print 'success login\nuid= %s,' % login[1], 'nick=', login[2] except: print 'success login\nuid= %s,' % login[1], 'nick=', login[2].encode( 'utf-8') return weibo
def getWeibo(): weibo = Weibo() try: INFO('check weibo') global weibo_id_array global firstcheck_weibo # 初次启动记录前十条微博id if firstcheck_weibo is True: INFO('first check weibo') weibo_id_array = weibo.IdArray firstcheck_weibo = False if firstcheck_weibo is False: # 取最新的前三条微博 for idcount in range(0, 3): # 广告位微博id为0,忽略 if int(weibo.IdArray[idcount]) == 0: continue # 微博id不在记录的id列表里,判断为新微博 if weibo.IdArray[idcount] not in weibo_id_array: msg = [] # 将id计入id列表 weibo_id_array.append(weibo.IdArray[idcount]) # 检查新微博是否是转发 if weibo.checkRetweet(idcount): msg.append( { 'type': 'text', 'data': {'text': '小偶像刚刚转发了一条微博:\n'}}) msg.append( { 'type': 'text', 'data': {'text': '%s\n' % weibo.getRetweetWeibo(idcount)}}) # 原创微博 else: msg.append( { 'type': 'text', 'data': {'text': '小偶像刚刚发了一条新微博:\n'}}) msg.append( { 'type': 'text', 'data': {'text': '%s\n' % weibo.getWeibo(idcount)}}) # 检查原创微博是否带图 if weibo.checkPic(idcount): # 只取第一张图,pro可以直接发图,air则无 msg.append( { 'type': 'image', 'data': {'file': '%s' % weibo.getPic(idcount)[0]}}) # 播报图的总数 if len(weibo.getPic(idcount)) > 1: msg.append( { 'type': 'text', 'data': {'text': '\n(一共有%d张图喔)\n' % len(weibo.getPic(idcount))}}) msg.append( { 'type': 'text', 'data': {'text': '传送门:%s' % weibo.getScheme(idcount)}}) for grpid in groupid(): bot.send_group_msg_async( group_id=grpid, message=msg, auto_escape=False) time.sleep(0.5) # print(msg) except Exception as e: WARN('error when getWeibo', e) finally: INFO('weibo check completed')
def main(filepath): w = Weibo(00000, filter=0) w.get_weibo_from_file(filepath) w.write_csv(filepath.replace("txt", "csv"))
#!/usr/bin/env python3 # dependencies import sys from datetime import datetime from weibo import Weibo # configuration parameters from config import cookie from config import filter from config import connection_timeout from config import pause_interval from config import pause_time # 更新微博 w = Weibo(5461287018, filter=0) w.set_cookie(cookie) w.connection_timeout = connection_timeout w.pause_interval = pause_interval w.pause_time = pause_time w.start()
from config import EMAIL, PASSWD, COOKIE_FILE, UID from weibo import Weibo import db import os import random from time import sleep as _sleep import requests_cache requests_cache.configure('cache') weibo = Weibo(EMAIL, PASSWD, COOKIE_FILE) weibo.load_cookies() def login(): weibo.login() weibo.dump_cookies() def get_friends(fans, follow): friends = [] for fan in fans: for fo in follow: if fan == fo: friends.append(fan) break return friends def get_myrelation():
#!/usr/bin/env python3 # dependencies import sys from datetime import datetime from utilities import stream_tee from weibo import Weibo # configuration parameters from config import cookie from config import filter from config import user_id from config import connection_timeout from config import pause_interval from config import pause_time # 日志 logname = datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' logfile = open(logname, "w+") sys.stdout = stream_tee(sys.stdout, logfile) # 读取微博 w = Weibo(user_id, filter=0) w.set_cookie(cookie) w.connection_timeout = connection_timeout w.pause_interval = pause_interval w.pause_time = pause_time w.update() logfile.close()
def getWeibo(i): contrainerID = i["contrainerID"] weibo = Weibo(contrainerID) weiboID = i["weiboID"] print(f"正在扫描 {weiboID}") try: # 初次启动记录前十条微博id if first_check_dict[contrainerID] is True: weibo_id_array = weibo.IdArray weibo_id_dict[contrainerID] = weibo_id_array first_check_dict[contrainerID] = False if first_check_dict[contrainerID] is False: # 取最新的前三条微博 for idcount in range(0, 3): # 广告位微博id为0,忽略 if int(weibo.IdArray[idcount]) == 0: continue # 微博id不在记录的id列表里,判断为新微博 if weibo.IdArray[idcount] not in weibo_id_dict[contrainerID]: # 将id计入id列表 weibo_id_dict[contrainerID].append(weibo.IdArray[idcount]) # 检查新微博是否是转发 if weibo.checkRetweet(idcount): print("[INFO] IGNORE REPOST TEXT") else: text = weibo.getWeibo(idcount) url = weibo.getScheme(idcount) tag = True for word in i['keyword']: if word not in text: tag = False if tag is True: print("FIND NEW MESSAGE") if i['shieldingWords'] == "": title = f"{weiboID} 微博更新提醒" mail_msg = text + "\n" + url send(mail_msg, title) if i['shieldingWords'] != "" and i[ 'shieldingWords'] not in text: title = f"{weiboID} 微博更新提醒" mail_msg = text + "\n" + url send(mail_msg, title) else: print("[INFO] FIND SHIELDINGWORDS,") else: print("[INFO] TEXT DON'T MATCH") time.sleep(0.5) else: pass except Exception as e: print('[ERROE] GET WEIBO FAILED', e) finally: pass
import sys, os from zhan import Zhan from weibo import Weibo from config import * class MyData(db.Model): last_id = db.StringProperty(required=True) if __name__ == "__main__": data = MyData.get_or_insert(key_name='mydata', last_id="feed_3674946092032508824") last_post_id = data.last_id weibo = Weibo() zhan = Zhan("ishoothust") new_posts = zhan.get_new_posts(last_post_id) for post in new_posts: image_url = post["image_url"].encode("utf-8") msg = "#我们爱拍华科#%s " % post["title"].encode("utf-8") count = post["photo_count"] - 1 msg += "还有%d张精彩照片呦:" % count if count else " " msg += post["link"].encode("utf-8") weibo.send(msg, image_url) last_post_id = post["id"] data.last_id = last_post_id data.put()
#!/usr/bin/env python3 # dependencies import sys from datetime import datetime from weibo import Weibo # configuration parameters from config import cookie from config import filter from config import user_id from config import connection_timeout from config import pause_interval from config import pause_time # 更新微博 for _id in user_id: w = Weibo(_id, filter=0) w.set_cookie(cookie) w.connection_timeout = connection_timeout w.pause_interval = pause_interval w.pause_time = pause_time w.update() # 创建新的timeline 杨冰怡 from timeline import build_timeline build_timeline(int(user_id[0]), template_name="PinkStar") # 创建新的timeline 冯晓菲 build_timeline(int(user_id[1]), template_name="PinkStarReverse")
def fix_images_over_nine(): try: wb = Weibo(1, '2010-01-01', 0, 0, 0, 0) weibo_update_list = [] # count = 1 # tmp_count = 0 # random_pages = random.randint(1, 5) """建立数据库连接""" conn = pymysql.connect(host=dbinfo_host, user=dbinfo_user, passwd=dbinfo_password, db=dbinfo_db) cursor = conn.cursor() n = cursor.execute( "SELECT WEIBO_ID FROM weibo_info w JOIN weibo_user_info u ON w.USER_ID = u.USER_ID WHERE LENGTH(PICS) - LENGTH(REPLACE(PICS, ',', '')) = 8 AND w.CREATE_TIME >= '2019-10-01 00:00:00' AND w.CREATE_TIME < '2019-12-01 00:00:00' AND u.FLAG = '1' AND u.STATUS = '1' AND u.BAN <> '1' ORDER BY WEIBO_ID" ) conn.close() if n: # for row in cursor.fetchall(): for row in tqdm(cursor.fetchall(), desc='progress'): weibo_id = row[0] print '爬取微博id:' + weibo_id weibo = wb.get_long_weibo(weibo_id) print '微博内容:' + str(weibo) if weibo: pics = weibo['pics'] if pics: if pics.find(",") >= 0: pics_arr = pics.split(",") if pics_arr.__len__() > 9: print "ok" weibo_update = { 'weibo_id': row[0], 'pics': pics } weibo_update_list.append(weibo_update) sleep(random.randint(1, 5)) # if count - tmp_count == random_pages and count < n: # sleep(random.randint(1, 3)) # tmp_count = count # random_pages = random.randint(7, 20) # count = count + 1 print "需要更新微博数:", len(weibo_update_list) conn = pymysql.connect(host=dbinfo_host, user=dbinfo_user, passwd=dbinfo_password, db=dbinfo_db) cursor = conn.cursor() for weibo_update in weibo_update_list: weibo_id = weibo_update['weibo_id'] pics = weibo_update['pics'] cursor.execute( "UPDATE weibo_info SET PICS = %s WHERE WEIBO_ID = %s", (pics, weibo_id)) conn.commit() conn.close() except Exception as e: print('Error: ', e) traceback.print_exc()