Exemple #1
0
def getFilePathByUid(uid):
    # 初始化weibo
    weibo = Weibo(uid)
    weibo.get_user_info()
    # 获取用户信息
    filePath = weibo.get_filepath("csv")
    return filePath
Exemple #2
0
 def run(self):
     while 1:
         ret = self.db_2.find()
         for i in ret:
             uid = int(i['star_id'])
             filter = 1  # 值为0表示爬取全部微博(原创微博+转发微博),值为1表示只爬取原创微博
             pic_download = 1  # 值为0代表不下载微博原始图片,1代表下载微博原始图片
             wb = Weibo(uid, filter, pic_download)
             pn = 1
             while 1:
                 wb.get_one_page(pn)
                 time.sleep(random.randint(6, 10))
                 pn += 1
                 if pn == 3:
                     break
                 try:
                     for obj in wb.weibo:
                         obj['_id'] = md5(obj['screen_name'] + obj['text'] +
                                          obj['created_at'])
                         self.db_1.insert_one(obj)
                         print(obj)
                 except Exception:
                     break
                 wb.weibo = []
         time.sleep(60)
def main_handler(event, context):
    logger.info("start main handler")
    if "requestContext" not in event.keys():
        return {"code": 410, "errorMsg": "event is not come from api gateway"}
    weibo = Weibo(WeiboRef, WeiboCookie)
    weibo.send(event['body'])
    return ("o")
def main():
    try:
        # 爬取关注列表的user_id
        user_id = int('Your id')
        cookie = {'Cookie': 'Your cookie'}
        # 将your cookie替换成自己的cookie
        fw = Follow(user_id, cookie)  # 调用Weibo类,创建微博实例wb
        fw.get_follow_list()  # 获取关注列表
        print(fw.follow_list)  # 输出关注列表的uid
        print(fw.follow_name_list)  # 输出关注列表的昵称

        filter = 1  # 值为0表示爬取全部微博(原创微博+转发微博),值为1表示只爬取原创微博
        since_date = '2018-01-01'  # 起始时间,即爬取发布日期从该值到现在的微博,形式为yyyy-mm-dd
        """mongodb_write值为0代表不将结果写入MongoDB数据库,1代表写入;若要写入MongoDB数据库,
        请先安装MongoDB数据库和pymongo,pymongo安装方法为命令行运行:pip install pymongo"""
        mongodb_write = 0
        """mysql_write值为0代表不将结果写入MySQL数据库,1代表写入;若要写入MySQL数据库,
        请先安装MySQL数据库和pymysql,pymysql安装方法为命令行运行:pip install pymysql"""
        mysql_write = 0
        pic_download = 1  # 值为0代表不下载微博原始图片,1代表下载微博原始图片
        video_download = 0  # 值为0代表不下载微博视频,1代表下载微博视频
        for user in fw.follow_list:
            # 爬每个人的微博
            new_list = [user]
            wb = Weibo(filter, since_date, mongodb_write, mysql_write,
                       pic_download, video_download)
            wb.start(new_list)

    except Exception as e:
        print('Error: ', e)
        traceback.print_exc()
Exemple #5
0
 def __init__(self,
              token=None,
              keyWord=None,
              weiboRef=None,
              weiboCookie=None,
              weiboSCF=None,
              weixinToken=None):
     self.useDingTalk = False
     self.useWeibo = False
     self.useSCF = False
     self.useWeixin = False
     if token and keyWord:
         self.useDingTalk = True
         self.d = DingtalkChatbot(
             'https://oapi.dingtalk.com/robot/send?access_token=%s' % token)
         self.keyWord = keyWord
     if weiboRef and weiboCookie:
         self.useWeibo = True
         self.weibo = Weibo(weiboRef, weiboCookie)
     if weiboSCF:
         self.useSCF = True
         self.weiboSCF = weiboSCF
     if weixinToken:
         self.useWeixin = True
         self.wxurl = 'https://sc.ftqq.com/%s.send' % weixinToken
Exemple #6
0
def main():
    ins = Ins()
    weibo = Weibo()
    ins.user = "******"

    while True:
        ins.checkUpdate()
        time.sleep(5)
        weibo.checkNewDirs()
        time.sleep(1200)
Exemple #7
0
 def postSingleFilm(self, url=None):
     status = self.parse_film_info(url) if url else self.pick_a_film(
         self.get_bangdan())
     status += "\n#电影# #电视剧#"
     print(status)
     if status:
         w = Weibo()
         w.login()
         w.postStatus(status, [self.cover])
         w.safeWaterFeeds()
         w.tearDown()
 def load_session(self):
     self.weibo = None
     try:
         with open(SESSION_PATH, 'rb') as f:
             self.weibo = pickle.load(f)
     except OSError:  # 打开文件错误
         pass
     except pickle.PickleError:
         logger.exception('反序列化Weibo时出错:')
     if self.weibo is None:
         self.weibo = Weibo()
Exemple #9
0
    def _on_cookie_added(self, cookie):
        if cookie.name() == b'SUB':
            try:
                cookie_sub = cookie.value().data().decode()
                cookies = {'SUB': cookie_sub}
                if Weibo(cookies).is_login():
                    self._weibo_cookies = cookies
                    self.accept()
                else:
                    logger.debug('无效的Cookie:%s', cookie_sub)

            except:
                logger.exception('获取登录状态时出错:')
Exemple #10
0
 def reducer_get_user_info(self, key, _):
     # 避免代理失效
     for _ in range(3):
         try:
             wb = Weibo([key])
             wb.start()
             logger.info(wb.user.json())
             yield key, wb.user.json()
             break
         except:
             logger.error(f"{key} eroor")
     else:
         yield key, "fail"
Exemple #11
0
def run():
    weibo = Weibo()
    # 热搜
    searches, resp = weibo.get_hot_search()
    if resp:
        save_raw_content(resp.text, 'hot-search')
    # 话题榜
    topics, resp = weibo.get_hot_topic()
    if resp:
        save_raw_content(resp.text, 'hot-topic')

    # 最新数据
    readme = generate_readme(searches, topics)
    save_readme(readme)
    # 归档
    archiveMd = generate_archive_md(searches, topics)
    save_archive_md(archiveMd)
Exemple #12
0
def update_data():
    # old_tweet = OwlTweet()
    nga = Nga()
    weibo = Weibo()
    object_data = {
        'Transfer': {
            'data': [] + weibo.new_items + nga.new_items,
            'id_key': 'id'
        },
    }

    for name, info in object_data.items():
        data_objects = []
        LEANCLOUD_OBJECT_DATA = load_json(os.path.join('leancloud_data', name))
        data_dict = {}
        for item in info['data']:
            if data_changed(
                    LEANCLOUD_OBJECT_DATA.get(
                        object_id_key(name, item.get(info['id_key'])), {}),
                    item):
                if info['id_key'] not in item:
                    continue
                data_objects.append(
                    leancloud_object(name, item, info['id_key']))
            data_dict[item.get(info['id_key'])] = item
        print(name + " Total Count:" + str(len(info['data'])))
        print(name + " Changed Count:" + str(len(data_objects)))
        i = 0
        batch_size = 20
        while True:
            if len(data_objects[i:i + batch_size]) > 0:
                leancloud.Object.save_all(data_objects[i:i + batch_size])
                i += batch_size
            else:
                break
        for data_object in data_objects:
            OBJECT_ID_MAP[object_id_key(name, data_object.get(
                info['id_key']))] = data_object.id
            LEANCLOUD_OBJECT_DATA[object_id_key(
                name,
                data_object.get(info['id_key']))] = data_dict[data_object.get(
                    info['id_key'])]
        write_json('local_config/object_id_map.json', OBJECT_ID_MAP)
        write_json(os.path.join('leancloud_data', name), LEANCLOUD_OBJECT_DATA)
Exemple #13
0
def main():
    queue = Message(config.Redis, config.RedisKey)
    weibo = Weibo(config.ChromeDriver, callback)
    while True:
        try:
            msg = queue.getMessage()
            if msg is not None:
                msg = msg.decode()
                if msg == 'debug':
                    weibo.debug("debug")
                    continue
                log.info("检测到消息,准备发送")
                weibo.postWeibo(msg)
        except Exception:
            queue.reAddMessage(msg)
            weibo.debug("exception")
            log.error("error: %s", traceback.format_exc())
            weibo.browser.refresh()
        time.sleep(10)
Exemple #14
0
def login(username, passwd):
    '''
    登录
    '''
    weibo = Weibo(username, passwd)
    login = weibo.login()
    if not login[0]:
        try:
            print login[1]
        except:
            print login[1].encode('utf-8')

        sys.exit(1)
    try:
        print 'success login\nuid= %s,' % login[1], 'nick=', login[2]
    except:
        print 'success login\nuid= %s,' % login[1], 'nick=', login[2].encode(
            'utf-8')
    return weibo
Exemple #15
0
def getWeibo():
    weibo = Weibo()
    try:
        INFO('check weibo')
        global weibo_id_array
        global firstcheck_weibo
        # 初次启动记录前十条微博id
        if firstcheck_weibo is True:
            INFO('first check weibo')
            weibo_id_array = weibo.IdArray
            firstcheck_weibo = False
        if firstcheck_weibo is False:
            # 取最新的前三条微博
            for idcount in range(0, 3):
                # 广告位微博id为0,忽略
                if int(weibo.IdArray[idcount]) == 0:
                    continue
                # 微博id不在记录的id列表里,判断为新微博
                if weibo.IdArray[idcount] not in weibo_id_array:
                    msg = []
                    # 将id计入id列表
                    weibo_id_array.append(weibo.IdArray[idcount])
                    # 检查新微博是否是转发
                    if weibo.checkRetweet(idcount):
                        msg.append(
                            {
                                'type': 'text',
                                'data': {'text': '小偶像刚刚转发了一条微博:\n'}})
                        msg.append(
                            {
                                'type': 'text',
                                'data': {'text': '%s\n' % weibo.getRetweetWeibo(idcount)}})
                    # 原创微博
                    else:
                        msg.append(
                            {
                                'type': 'text',
                                'data': {'text': '小偶像刚刚发了一条新微博:\n'}})
                        msg.append(
                            {
                                'type': 'text',
                                'data': {'text': '%s\n' % weibo.getWeibo(idcount)}})
                        # 检查原创微博是否带图
                        if weibo.checkPic(idcount):
                            # 只取第一张图,pro可以直接发图,air则无
                            msg.append(
                                {
                                    'type': 'image',
                                    'data': {'file': '%s' % weibo.getPic(idcount)[0]}})
                            # 播报图的总数
                            if len(weibo.getPic(idcount)) > 1:
                                msg.append(
                                    {
                                        'type': 'text',
                                        'data': {'text': '\n(一共有%d张图喔)\n' % len(weibo.getPic(idcount))}})
                    msg.append(
                        {
                            'type': 'text',
                            'data': {'text': '传送门:%s' % weibo.getScheme(idcount)}})
                    for grpid in groupid():
                        bot.send_group_msg_async(
                            group_id=grpid, message=msg, auto_escape=False)
                        time.sleep(0.5)
                    # print(msg)
    except Exception as e:
        WARN('error when getWeibo', e)
    finally:
        INFO('weibo check completed')
Exemple #16
0
def main(filepath):

    w = Weibo(00000, filter=0)
    w.get_weibo_from_file(filepath)
    w.write_csv(filepath.replace("txt", "csv"))
#!/usr/bin/env python3
# dependencies
import sys
from datetime import datetime
from weibo import Weibo

# configuration parameters
from config import cookie
from config import filter
from config import connection_timeout
from config import pause_interval
from config import pause_time

# 更新微博
w = Weibo(5461287018, filter=0)
w.set_cookie(cookie)
w.connection_timeout = connection_timeout
w.pause_interval = pause_interval
w.pause_time = pause_time
w.start()
Exemple #18
0
from config import EMAIL, PASSWD, COOKIE_FILE, UID
from weibo import Weibo
import db
import os
import random
from time import sleep as _sleep

import requests_cache
requests_cache.configure('cache')

weibo = Weibo(EMAIL, PASSWD, COOKIE_FILE)

weibo.load_cookies()


def login():
    weibo.login()
    weibo.dump_cookies()


def get_friends(fans, follow):
    friends = []
    for fan in fans:
        for fo in follow:
            if fan == fo:
                friends.append(fan)
                break
    return friends


def get_myrelation():
Exemple #19
0
#!/usr/bin/env python3
# dependencies
import sys
from datetime import datetime
from utilities import stream_tee
from weibo import Weibo

# configuration parameters
from config import cookie
from config import filter
from config import user_id
from config import connection_timeout
from config import pause_interval
from config import pause_time

# 日志
logname = datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
logfile = open(logname, "w+")
sys.stdout = stream_tee(sys.stdout, logfile)

# 读取微博
w = Weibo(user_id, filter=0)
w.set_cookie(cookie)
w.connection_timeout = connection_timeout
w.pause_interval = pause_interval
w.pause_time = pause_time
w.update()

logfile.close()
Exemple #20
0
def getWeibo(i):
    contrainerID = i["contrainerID"]
    weibo = Weibo(contrainerID)
    weiboID = i["weiboID"]
    print(f"正在扫描 {weiboID}")
    try:
        # 初次启动记录前十条微博id
        if first_check_dict[contrainerID] is True:
            weibo_id_array = weibo.IdArray
            weibo_id_dict[contrainerID] = weibo_id_array
            first_check_dict[contrainerID] = False

        if first_check_dict[contrainerID] is False:
            # 取最新的前三条微博
            for idcount in range(0, 3):
                # 广告位微博id为0,忽略
                if int(weibo.IdArray[idcount]) == 0:
                    continue
                # 微博id不在记录的id列表里,判断为新微博
                if weibo.IdArray[idcount] not in weibo_id_dict[contrainerID]:
                    # 将id计入id列表
                    weibo_id_dict[contrainerID].append(weibo.IdArray[idcount])

                    # 检查新微博是否是转发
                    if weibo.checkRetweet(idcount):
                        print("[INFO] IGNORE REPOST TEXT")
                    else:
                        text = weibo.getWeibo(idcount)
                        url = weibo.getScheme(idcount)

                        tag = True

                        for word in i['keyword']:
                            if word not in text:
                                tag = False

                        if tag is True:
                            print("FIND NEW MESSAGE")
                            if i['shieldingWords'] == "":
                                title = f"{weiboID} 微博更新提醒"
                                mail_msg = text + "\n" + url
                                send(mail_msg, title)

                            if i['shieldingWords'] != "" and i[
                                    'shieldingWords'] not in text:
                                title = f"{weiboID} 微博更新提醒"
                                mail_msg = text + "\n" + url
                                send(mail_msg, title)
                            else:
                                print("[INFO] FIND SHIELDINGWORDS,")

                        else:
                            print("[INFO] TEXT DON'T MATCH")

                    time.sleep(0.5)

                else:
                    pass

    except Exception as e:
        print('[ERROE] GET WEIBO FAILED', e)
    finally:
        pass
Exemple #21
0
import sys, os
from zhan import Zhan
from weibo import Weibo
from config import *


class MyData(db.Model):
    last_id = db.StringProperty(required=True)


if __name__ == "__main__":
    data = MyData.get_or_insert(key_name='mydata',
                                last_id="feed_3674946092032508824")
    last_post_id = data.last_id

    weibo = Weibo()
    zhan = Zhan("ishoothust")
    new_posts = zhan.get_new_posts(last_post_id)

    for post in new_posts:
        image_url = post["image_url"].encode("utf-8")
        msg = "#我们爱拍华科#%s " % post["title"].encode("utf-8")
        count = post["photo_count"] - 1
        msg += "还有%d张精彩照片呦:" % count if count else " "
        msg += post["link"].encode("utf-8")
        weibo.send(msg, image_url)
        last_post_id = post["id"]

    data.last_id = last_post_id
    data.put()
Exemple #22
0
#!/usr/bin/env python3
# dependencies
import sys
from datetime import datetime
from weibo import Weibo

# configuration parameters
from config import cookie
from config import filter
from config import user_id
from config import connection_timeout
from config import pause_interval
from config import pause_time

# 更新微博
for _id in user_id:
    w = Weibo(_id, filter=0)
    w.set_cookie(cookie)
    w.connection_timeout = connection_timeout
    w.pause_interval = pause_interval
    w.pause_time = pause_time
    w.update()

# 创建新的timeline 杨冰怡
from timeline import build_timeline
build_timeline(int(user_id[0]), template_name="PinkStar")

# 创建新的timeline 冯晓菲
build_timeline(int(user_id[1]), template_name="PinkStarReverse")
Exemple #23
0
def fix_images_over_nine():
    try:
        wb = Weibo(1, '2010-01-01', 0, 0, 0, 0)

        weibo_update_list = []

        # count = 1
        # tmp_count = 0
        # random_pages = random.randint(1, 5)
        """建立数据库连接"""
        conn = pymysql.connect(host=dbinfo_host,
                               user=dbinfo_user,
                               passwd=dbinfo_password,
                               db=dbinfo_db)
        cursor = conn.cursor()
        n = cursor.execute(
            "SELECT WEIBO_ID FROM weibo_info w JOIN weibo_user_info u ON w.USER_ID = u.USER_ID WHERE LENGTH(PICS) - LENGTH(REPLACE(PICS, ',', '')) = 8 AND w.CREATE_TIME >= '2019-10-01 00:00:00' AND w.CREATE_TIME < '2019-12-01 00:00:00' AND u.FLAG = '1'  AND u.STATUS = '1' AND u.BAN <> '1' ORDER BY WEIBO_ID"
        )
        conn.close()
        if n:
            # for row in cursor.fetchall():
            for row in tqdm(cursor.fetchall(), desc='progress'):
                weibo_id = row[0]
                print '爬取微博id:' + weibo_id
                weibo = wb.get_long_weibo(weibo_id)
                print '微博内容:' + str(weibo)
                if weibo:
                    pics = weibo['pics']
                    if pics:
                        if pics.find(",") >= 0:
                            pics_arr = pics.split(",")
                            if pics_arr.__len__() > 9:
                                print "ok"
                                weibo_update = {
                                    'weibo_id': row[0],
                                    'pics': pics
                                }
                                weibo_update_list.append(weibo_update)
                sleep(random.randint(1, 5))
                # if count - tmp_count == random_pages and count < n:
                #     sleep(random.randint(1, 3))
                #     tmp_count = count
                #     random_pages = random.randint(7, 20)
                # count = count + 1

        print "需要更新微博数:", len(weibo_update_list)
        conn = pymysql.connect(host=dbinfo_host,
                               user=dbinfo_user,
                               passwd=dbinfo_password,
                               db=dbinfo_db)
        cursor = conn.cursor()
        for weibo_update in weibo_update_list:
            weibo_id = weibo_update['weibo_id']
            pics = weibo_update['pics']
            cursor.execute(
                "UPDATE weibo_info SET PICS = %s WHERE WEIBO_ID = %s",
                (pics, weibo_id))
        conn.commit()
        conn.close()
    except Exception as e:
        print('Error: ', e)
        traceback.print_exc()