def __init__(self, interval, live_name, live_url, mysql): super().__init__(interval) self.browser = Chrome(cache_path=r"E:\Temp") # 打开Chrome浏览器 self.browser.get(live_url) # 访问目标斗鱼主播的直播间 time.sleep(10) self.mysql = mysql time_string = time.strftime("%Y%m%d_%H%M", time.localtime(time.time())) self.table_name = "douyu_{}".format(time_string) sql_create = "CREATE TABLE live_barrage.`douyu_{}` (" \ "`bid` int(11) NOT NULL AUTO_INCREMENT COMMENT '弹幕ID(barrage id)'," \ "`type` varchar(60) DEFAULT NULL COMMENT '弹幕类型'," \ "`fetch_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '弹幕抓取时间(约等于弹幕发布时间)'," \ " `user_name` varchar(40) DEFAULT NULL COMMENT '弹幕发布者名称'," \ " `user_level` int(11) DEFAULT NULL COMMENT '弹幕发布者等级'," \ " `content` varchar(100) DEFAULT NULL COMMENT '弹幕内容'," \ " `text` varchar(100) DEFAULT NULL COMMENT '弹幕其他信息'," \ " PRIMARY KEY (`bid`)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='斗鱼弹幕({})'" mysql.create(sql_create.format(time_string, live_name)) print("开始抓取斗鱼直播弹幕.....") self.total_time = 0 self.total_num = 0 self.barrage_id_list = list() self.data_id_max = 0
def __init__(self, interval, live_name, live_url, mysql): super().__init__(interval) self.browser = Chrome(cache_path=r"E:\temp") self.browser.get(live_url) # 访问目标虎牙主播的直播间 self.mysql = mysql time_string = time.strftime("%Y%m%d_%H%M", time.localtime(time.time())) self.table_name = "huya_{}".format(time_string) sql_create = "CREATE TABLE live_barrage.`huya_{}` (" \ "`bid` int(11) NOT NULL AUTO_INCREMENT COMMENT '弹幕ID(barrage id)'," \ "`type` char(10) DEFAULT NULL COMMENT '弹幕类型'," \ "`fetch_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '弹幕抓取时间(约等于弹幕发布时间)'," \ " `user_name` varchar(40) DEFAULT NULL COMMENT '弹幕发布者名称'," \ " `user_noble` int(11) DEFAULT NULL COMMENT '弹幕发布者贵族等级'," \ " `content` varchar(100) DEFAULT NULL COMMENT '弹幕内容'," \ " `gift_name` varchar(40) DEFAULT NULL COMMENT '赠送礼物名称'," \ " `gift_num` int(11) DEFAULT '0' COMMENT '赠送礼物数量'," \ " `other` varchar(60) DEFAULT NULL COMMENT '弹幕其他信息'," \ " PRIMARY KEY (`bid`)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='虎牙弹幕({})';" self.mysql.create(sql_create.format(time_string, live_name)) print("开始抓取虎牙直播弹幕.....") self.total_time = 0 self.total_num = 0 self.data_id_max = 0
def running(self): driver = Chrome(cache_path=r"E:\temp") driver.get("https://cc.julive.com/project/s") bs = BeautifulSoup(driver.page_source, 'lxml') # 将网页转化为BeautifulSoup结构 city_dict = dict() for element_city in bs.select( "body > div.container-5-2.container > div.header-v5.header-v5-2.header-normal > div > div.inn-p > div.city-position.city-tip > div.city-change-list-new > ul > li > ul > li> a" ): city_name = element_city.text city_url = element_city["href"] city_dict[city_name] = city_url return city_dict
def crawler(): browser = Chrome(cache_path=r"E:\temp") browser.get("https://cc.julive.com/project/s") bs = BeautifulSoup(browser.page_source, 'lxml') # 将网页转化为BeautifulSoup结构 city_dict = dict() for element_city in bs.select( "body > div.container-5-2.container > div.header-v5.header-v5-2.header-normal > div > div.inn-p > div.city-position.city-tip > div.city-change-list-new > ul > li > ul > li> a" ): city_name = element_city.text city_url = element_city["href"] city_dict[city_name] = city_url print(city_name, city_url) Utils.io.write_json("julive_city_url_20191217.json", city_dict)
def crawler(live_list_path): driver = Chrome(cache_path=r"E:\Temp") account_list = tool.io.load_string(live_list_path) spider = SpiderDouyuSubscribe(driver) for account_url in account_list.split("\n"): text_subscribe = spider.running(account_url) print(account_url, text_subscribe)
def running(self): driver = Chrome(cache_path=r"E:\temp") # ChromeDriver可执行文件的路径 driver.get("http://piaofang.maoyan.com/dashboard/web-heat") time.sleep(1) res = [] for movie_label in driver.find_elements_by_css_selector( "#app > div > div > div.dashboard-content > div.dashboard-list.dashboard-left.bg > div.movielist-container > div > table > tbody > tr" ): res.append([ movie_label.find_element_by_class_name("moviename-index").text, movie_label.find_element_by_class_name("moviename-name").text, movie_label.find_element_by_class_name("moviename-info").text, movie_label.find_element_by_class_name("heat-text").text, movie_label.find_element_by_class_name("last-col").text ]) return res
def crawler(): driver = Chrome(cache_path=r"E:\Temp") # 采集城市编码列表 spider_city_code = SpiderCityCode(driver) result1 = spider_city_code.run() Utils.io.write_json("anjuke_city_code.json", result1) # 采集城市房源数量 city_code_list = Utils.io.load_json("anjuke_city_code.json") city_info_list = Utils.io.load_json("anjuke_city_infor.json", default={}) spider_city_info = SpiderCityInfo(driver) for city_name, city_code in city_code_list.items(): if city_name not in city_info_list: city_info_list[city_name] = spider_city_info.run(city_code=city_code) Utils.io.write_json("anjuke_city_info.json", city_info_list) time.sleep(2) driver.quit()
def crawler(): browser = Chrome(cache_path=r"E:\temp") account_list = Utils.io.load_string("huya_account_list.txt") for account_url in account_list.split("\n"): browser.get(account_url) # 读取直播间订阅数量 text_subscribe = "" try: label_subscribe = browser.find_element_by_xpath( '//*[@id="activityCount"]') if label_subscribe is not None: text_subscribe = label_subscribe.text except NoSuchElementException: pass # 读取直播间ID text_id = "" try: label_id = browser.find_element_by_css_selector( '#J_roomHeader > div.room-hd-l > div.host-info > div.host-detail.J_roomHdDetail > span.host-rid' ) if label_id is not None: text_id = label_id.text except NoSuchElementException: pass print(account_url, text_id, text_subscribe) time.sleep(3)
def crawler(): driver = Chrome(cache_path=r"E:\temp") account_list = tool.io.load_string("huya_account_list.txt") spider = SpiderHuyaSubscribe(driver) for account_url in account_list.split("\n"): text_id, text_subscribe = spider.running(account_url) print(account_url, text_id, text_subscribe) time.sleep(3)
def crawler(file_name): """ LeetCode题目列表爬虫 :param file_name: 抓取结果存储文件地址 """ selenium = Chrome(cache_path=r"E:\Temp") # 启动Chrome浏览器驱动 selenium.get(PROBLEMS_SET_URL) # 打开题库页面 # 获取题目列表(Json格式) problems_all_json = requests.get(PROBLEMS_ALL_API).json() print("解析题目总数:", problems_all_json["num_total"]) # 解析题目列表(生成problem实例列表):key=题目ID,value=题目的problem实例 result_problems = {} for problem in problems_all_json["stat_status_pairs"]: problem_elem = Problem(problem) result_problems[problem_elem.id] = problem_elem # 获取题目标签(Json格式)并将结果写入到题目列表中 problems_tags_json = requests.get(PROBLEMS_TAGS_API).json() for topic in problems_tags_json["topics"]: tag_name = topic["translatedName"] if topic[ "translatedName"] else topic["name"] for qid in topic["questions"]: if qid in result_problems: result_problems[qid].add_tag(tag_name) else: print("题目ID未找到:", qid, tag_name) # 获取题目翻译(Json格式)并将结果写入到题目列表中 translations_json = selenium.post(GRAPHQL_API, json.dumps(GRAPHQL_QUERY_TRANSLATIONS), payload=True) for problem in translations_json["data"]["translations"]: if (qid := int(problem["questionId"])) in result_problems: result_problems[qid].title = problem["title"]
def crawler(live_list_path): driver = Chrome(cache_path=r"E:\Temp") account_list = Utils.io.load_string(live_list_path) for account_url in account_list.split("\n"): driver.get(account_url) time.sleep(3) text_subscribe = "" for _ in range(10): try: label_subscribe = driver.find_element_by_xpath( '//*[@id="js-player-title"]/div/div[4]/div/span') if label_subscribe.text is not None and label_subscribe.text != "": text_subscribe = label_subscribe.text break time.sleep(1) except NoSuchElementException: time.sleep(1) print(account_url, text_subscribe)
def crawler(live_name, live_url, mysql): driver = Chrome(cache_path=r"E:\Temp") # 打开Chrome浏览器 spider_bilibili_barrage = SpiderBilibiliBarrage(driver=driver, live_url=live_url) # 创建目标数据表 table_name = "bilibili_{}".format( time.strftime("%Y%m%d_%H%M", time.localtime(time.time()))) sql_create = "CREATE TABLE live_barrage.`{}` (" \ "`bid` int(11) NOT NULL AUTO_INCREMENT COMMENT '弹幕ID(barrage id)'," \ "`type` varchar(60) DEFAULT NULL COMMENT '弹幕类型'," \ "`fetch_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '弹幕抓取时间(约等于弹幕发布时间)'," \ " `user_name` varchar(40) DEFAULT NULL COMMENT '弹幕发布者名称'," \ " `user_id` int(11) DEFAULT NULL COMMENT '弹幕发布者等级'," \ " `content` varchar(100) DEFAULT NULL COMMENT '弹幕内容'," \ " PRIMARY KEY (`bid`)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='Bilibili弹幕({})';" mysql.create(sql_create.format(table_name, live_name)) print("开始抓取Bilibili直播弹幕.....") total_time = 0 total_num = 0 barrage_num = 0 for num in range(36000): start_time = time.time() barrage_list = spider_bilibili_barrage.running() mysql.insert(table_name, barrage_list) total_num += 1 total_time += 1000 * (time.time() - start_time) wait_time = 0.5 if wait_time > (time.time() - start_time): time.sleep(0.5 - (time.time() - start_time)) barrage_num += len(barrage_list) print("本次时间范围内新增弹幕:", len(barrage_list), "条,", "(共计:", barrage_num, ")", "|", "运行时间:", round(total_time / total_num), "毫秒", "(", round(total_time), "/", total_num, ")")
import crawlertool as tool from Selenium4R import Chrome class SpiderAnjukeCityCodeList(tool.abc.SingleSpider): """ 安居客城市编码列表爬虫 """ def __init__(self, driver): self.driver = driver def running(self) -> List[Dict]: self.driver.get("https://www.anjuke.com/sy-city.html") result = [] for city_label in self.driver.find_elements_by_css_selector( "body > div.content > div > div.letter_city > ul > li > div > a" ): city_name = city_label.text city_code = city_label.get_attribute("href").replace( "https://", "").replace(".anjuke.com/", "") result.append({"city_name": city_name, "city_code": city_code}) return result if __name__ == "__main__": driver = Chrome(cache_path=r"E:\Temp") print(SpiderAnjukeCityCodeList(driver).running()) driver.quit()
tweet_info["text"] = tweet_content tweet_info["replies"] = tweet_replies tweet_info["retweets"] = tweet_retweets tweet_info["likes"] = tweet_likes tweet_list.append(tweet_info) # 向下滚动到最下面的一条推文 if last_label_tweet is not None: driver.execute_script("arguments[0].scrollIntoView();", last_label_tweet) # 滑动到推文标签 time.sleep(1) else: break return tweet_list if __name__ == "__main__": selenium = Chrome(cache_path=r"E:\temp") tweet_template = { "tweet_id": None, "is_retweet": 0, "time": None, "text": None, "replies": None, "retweets": None, "likes": None } tweets = crawler(selenium, "realDonaldTrump", tweet_template, since=dt.date(2020, 9, 10), until=dt.date(2020, 9, 11)) # 闭开区间
""" 猫眼网播热度采集 需要第三方模块: Selenium4R >= 0.0.3 @author: ChangXing @version: 1.0 @create: 2020.05.26 @revise: - """ import time from Selenium4R import Chrome if __name__ == "__main__": browser = Chrome(cache_path=r"E:\temp") # ChromeDriver可执行文件的路径 browser.get("http://piaofang.maoyan.com/dashboard/web-heat") time.sleep(1) for movie_label in browser.find_elements_by_css_selector( "#app > div > div > div.dashboard-content > div.dashboard-list.dashboard-left.bg > div.movielist-container > div > table > tbody > tr"): print("排名:", movie_label.find_element_by_class_name("moviename-index").text) print("名称:", movie_label.find_element_by_class_name("moviename-name").text) print("信息:", movie_label.find_element_by_class_name("moviename-info").text) print("信息:", movie_label.find_element_by_class_name("heat-text").text) print("信息:", movie_label.find_element_by_class_name("last-col").text)
def crawler(live_name, live_url, mysql): browser = Chrome(cache_path=r"E:\temp") browser.get(live_url) # 访问目标虎牙主播的直播间 time_string = time.strftime("%Y%m%d_%H%M", time.localtime(time.time())) table_name = "huya_{}".format(time_string) sql_create = "CREATE TABLE live_barrage.`huya_{}` (" \ "`bid` int(11) NOT NULL AUTO_INCREMENT COMMENT '弹幕ID(barrage id)'," \ "`type` char(10) DEFAULT NULL COMMENT '弹幕类型'," \ "`fetch_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '弹幕抓取时间(约等于弹幕发布时间)'," \ " `user_name` varchar(40) DEFAULT NULL COMMENT '弹幕发布者名称'," \ " `user_noble` int(11) DEFAULT NULL COMMENT '弹幕发布者贵族等级'," \ " `content` varchar(100) DEFAULT NULL COMMENT '弹幕内容'," \ " `gift_name` varchar(40) DEFAULT NULL COMMENT '赠送礼物名称'," \ " `gift_num` int(11) DEFAULT '0' COMMENT '赠送礼物数量'," \ " `other` varchar(60) DEFAULT NULL COMMENT '弹幕其他信息'," \ " PRIMARY KEY (`bid`)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='虎牙弹幕({})';" mysql.create(sql_create.format(time_string, live_name)) print("开始抓取虎牙直播弹幕.....") total_time = 0 total_num = 0 data_id_max = 0 for num in range(int(36000 / 0.5)): start_time = time.time() label_html = browser.find_element_by_id( "chat-room__list").get_attribute("innerHTML") bs = BeautifulSoup(label_html, 'lxml') # 将网页内容解析为Soup对象 barrage_list = [] for label in bs.select("li"): data_id = int(label["data-id"]) # 提取:弹幕ID if data_id <= data_id_max: # 依据弹幕的ID判断弹幕是否还未抓取 if data_id > data_id_max - 101: continue data_id_max = data_id barrage_info = { "bid": data_id, # 弹幕ID "type": "", # 弹幕所属类型 "user_name": "", # 弹幕发布者名称 "user_noble": 0, # 弹幕发布者贵族等级 "content": "", # 弹幕内容 "gift_name": "", # 礼物名称 "gift_num": 0, # 礼物数量 "other": "" # 其他信息 } category = str(label.select_one("li > div")["class"]) # 提取:弹幕类型 if "msg-smog" in category: # 处理smog类型弹幕(普通弹幕) barrage_info["type"] = "SG" barrage_info["user_name"] = label.select_one( "li > div > span:nth-child(1)").text barrage_info["content"] = label.select_one( "li > div > span:nth-child(3)").text elif "msg-normal" in category: # 处理普通类型弹幕(普通弹幕) barrage_info["type"] = "NM" barrage_info["user_name"] = label.select_one( "li > div > span:nth-child(2)").text barrage_info["content"] = label.select_one( "li > div > span:nth-child(5)").text elif "msg-nobleEnter" in category: # 处理nobleEnter类型弹幕(贵族进入弹幕) barrage_info["type"] = "NE" barrage_info["user_name"] = label.select_one( "li > div > div > p > span:nth-child(1)").text barrage_info["user_noble"] = label.select_one( "li > div > div")["class"] barrage_info["content"] = "驾临直播间" elif "msg-nobleSpeak" in category: # 处理nobleSpeak类型弹幕(贵族发言) barrage_info["type"] = "NS" barrage_info["user_name"] = label.select_one( "li > div > p > span:nth-child(2)").text barrage_info["user_noble"] = int( label.select_one("li > div")["class"]) barrage_info["content"] = label.select_one( "li > div > p > span:nth-child(5)").text elif "tit-h-send" in category: # 处理send类型提示(礼物赠送提示) barrage_info["type"] = "SD" barrage_info["user_name"] = label.select_one( "li > div > span:nth-child(1)").text barrage_info["gift_name"] = label.select_one( "li > div > span:nth-child(3) > img")["alt"] barrage_info["gift_num"] = int( label.select_one( "li > div > span:nth-child(4) > img").text) elif "msg-onTVLottery" in category: barrage_info["type"] = "TV" barrage_info["user_name"] = label.select_one( "li > div > span:nth-child(2)").text barrage_info["content"] = label.select_one( "li > div > div > span").text elif "msg-auditorSys" in category: # 处理msg-auditorSys类型提示(系统提示) barrage_info["type"] = "AS" barrage_info["other"] = label.text elif "msg-sys" in category: # 处理msg-sys类型提示(系统提示) barrage_info["type"] = "SY" barrage_info["other"] = label.text else: # 处理其他类型 barrage_info.update(type="OT", other="弹幕名称" + category) barrage_list.append(barrage_info) mysql.insert(table_name, barrage_list) total_num += 1 total_time += 1000 * (time.time() - start_time) wait_time = 0.5 if wait_time > (time.time() - start_time): time.sleep(0.5 - (time.time() - start_time)) print("本次时间范围内新增弹幕:", len(barrage_list), "条,", "(共计:", data_id_max, ")", "|", "运行时间:", round(total_time / total_num), "毫秒", "(", round(total_time), "/", total_num, ")")
break return tweet_list if __name__ == "__main__": setting = Utils.io.load_json(r"E:\Github\ChangxingJiang\setting.json") mysql = Utils.db.MySQL( host=setting["Huabang"]["host"], user=setting["Huabang"]["user"], password=setting["Huabang"]["password"], database=setting["Huabang"]["database"] ) selenium = Chrome(cache_path=r"E:\temp") # 打开Selenium控制的Chrome浏览器 if "Huabang" in env.DATA and "Media List" in env.DATA["Huabang"]: for media_item in env.DATA["Huabang"]["Media List"]: # if media_item[0] != 211: # continue print("开始抓取媒体:", media_item[1], "(", media_item[0], ")", "-", media_item[3], "(", media_item[2], ")") tweet_template = { "media_id": media_item[0], "media_name": media_item[1], "tweet_id": None, "is_retweet": 0, "time": None, "text": None, "replies": None, "retweets": None,
tweet_info = copy.deepcopy(template) tweet_info["tweet_id"] = tweet_id tweet_info["time"] = tweet_time tweet_info["text"] = tweet_content tweet_info["replies"] = tweet_replies tweet_info["retweets"] = tweet_retweets tweet_info["likes"] = tweet_likes tweet_info["from_user"] = tweet_from_user tweet_info["from_content"] = tweet_from_content tweet_list.append(tweet_info) # 向下滚动到最下面的一条推文 if last_label_tweet is not None: driver.execute_script("arguments[0].scrollIntoView();", last_label_tweet) # 滑动到推文标签 time.sleep(1) else: break return tweet_list if __name__ == "__main__": selenium = Chrome() # 打开Selenium控制的Chrome浏览器 tweets = crawler(selenium, "appledaily_hk", {}, since=dt.date(2020, 7, 20), until=dt.date(2020, 7, 24)) print(tweets)
def crawler(live_name, live_url, mysql): browser = Chrome(cache_path=r"E:\Temp") # 打开Chrome浏览器 browser.get(live_url) # 访问目标斗鱼主播的直播间 time.sleep(10) time_string = time.strftime("%Y%m%d_%H%M", time.localtime(time.time())) table_name = "douyu_{}".format(time_string) sql_create = "CREATE TABLE live_barrage.`douyu_{}` (" \ "`bid` int(11) NOT NULL AUTO_INCREMENT COMMENT '弹幕ID(barrage id)'," \ "`type` varchar(60) DEFAULT NULL COMMENT '弹幕类型'," \ "`fetch_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '弹幕抓取时间(约等于弹幕发布时间)'," \ " `user_name` varchar(40) DEFAULT NULL COMMENT '弹幕发布者名称'," \ " `user_level` int(11) DEFAULT NULL COMMENT '弹幕发布者等级'," \ " `content` varchar(100) DEFAULT NULL COMMENT '弹幕内容'," \ " `text` varchar(100) DEFAULT NULL COMMENT '弹幕其他信息'," \ " PRIMARY KEY (`bid`)" \ ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='斗鱼弹幕({})';" mysql.create(sql_create.format(time_string, live_name)) print("开始抓取斗鱼直播弹幕.....") total_time = 0 total_num = 0 # screenshot = 0 barrage_id_list = list() data_id_max = 0 for num in range(int(36000 / 0.5)): start_time = time.time() label_html = browser.find_element_by_id( "js-barrage-list").get_attribute("innerHTML") soup = BeautifulSoup(label_html, 'lxml') # 将网页内容解析为Soup对象 barrage_list = [] for label in soup.select("li"): bid = str(label["id"]) # 提取:弹幕ID if bid in barrage_id_list: continue barrage_id_list.append(bid) if len(barrage_id_list) > 200: barrage_id_list.remove(barrage_id_list[0]) barrage_info = { "type": "", # 弹幕所属类型 "user_name": "", # 弹幕发布者名称 "user_level": 0, # 弹幕发布者等级 "content": "", # 弹幕内容 "text": "" # 其他信息 } type_class = label.select_one("li > div")["class"] if "Barrage-notice" in type_class and "normalBarrage" not in type_class: barrage_info["type"] = "NOTICE" elif "normalBarrage" in type_class: barrage_info["type"] = "NORMAL" elif "Barrage-userEnter" in type_class: barrage_info["type"] = "ENTER" elif "Barrage-message" in type_class: barrage_info["type"] = "MESSAGE" for info_label in label.select("li > div > span"): info_label_class = info_label["class"] if "UserLevel" in info_label_class: barrage_info["user_level"] = re.search( "[0-9]+", info_label["title"]).group() elif "Barrage-nickName" in info_label_class: barrage_info["user_name"] = info_label.text.replace( " ", "") elif "Barrage-content" in info_label_class: barrage_info["content"] = info_label.text.replace(" ", "") elif "Barrage-text" in info_label_class: barrage_info["text"] = info_label.text.replace(" ", "") barrage_list.append(barrage_info) if len(barrage_list) < 200: mysql.insert(table_name, barrage_list) total_num += 1 total_time += 1000 * (time.time() - start_time) print("本次时间范围内新增弹幕:", len(barrage_list), "条,", "(共计:", data_id_max, ")", "|", "运行时间:", round(total_time / total_num), "毫秒", "(", round(total_time), "/", total_num, ")") else: total_num += 1 total_time += 1000 * (time.time() - start_time) print("本次时间范围内弹幕列表未自动向下滚动...") wait_time = 0.5 if wait_time > (time.time() - start_time): time.sleep(0.5 - (time.time() - start_time)) data_id_max += len(barrage_list)
"[0-9]+", feedback_item): item["retweets"] = int(pattern.group()) if "喜欢" in feedback_item: if pattern := re.search( "[0-9]+", feedback_item): item["likes"] = int(pattern.group()) item_list.append(item) # 向下滚动到最下面的一条推文 if last_label_tweet is not None: self.driver.execute_script("arguments[0].scrollIntoView();", last_label_tweet) # 滑动到推文标签 self.console("执行一次向下翻页...") time.sleep(3) else: break return item_list # ------------------- 单元测试 ------------------- if __name__ == "__main__": driver = Chrome(cache_path=r"E:\Temp") print( SpiderTwitterAccountPost(driver).running( user_name=SpiderTwitterAccountPost.get_twitter_user_name( "https://twitter.com/zaobaosg"), since_date=dt.date(2020, 10, 1), until_date=dt.date(2020, 10, 7)))