def doBatch(tasks, username, password): client = weibo.Weibo() client.login(username, password) if not client.state: client.logout() return False tis = tasks.get('tis', []) for ti in tis: hid = ti['hid'] helper.doHFollow(client, hid) helper.doHSignIn(client, hid) time.sleep(3) tls = tasks.get('tls', []) for tl in tls: hid = tl['hid'] text = tl['text'] number = tl['number'] commentThreshold = tl['commentThreshold'] helper.doHSalvage(client, hid, text, number, commentThreshold) time.sleep(3) tps = tasks.get('tps', []) for tp in tps: hid = tp['hid'] text = tp['text'] picture = tp['picture'] helper.doHPost(client, hid, text, picture) time.sleep(3) return True
def main(): myConfig = config.Config() myWeibo = weibo.Weibo(myConfig) myMail = mail.Mail(myConfig) thread_pool = [myWeibo, myMail] for t in thread_pool: t.start() time.sleep(10) for t in thread_pool: t.join() print('###Everything has been done, now you can exit.###\n') while True: time.sleep(1)
def get_weibo_posts(config, db): """Return a list of weibo posts. CONFIG is the configuration dictionary described in README.md. DB is the database.""" post_list = [] wb = weibo.Weibo(make_weibo_config(config)) for user in wb.user_config_list: wb.initialize_info(user) # We have to get user_info first, ‘get_one_page’ uses # information retrieved by it. wb.get_user_info() # Only crawl the first page, that should be more than # enough. wb.get_one_page(1) post_list += reversed(wb.weibo) return post_list
def __init__(self, name=None, url=None): if name: if not site_class.has_key(name): raise SiteNotSupported() site = site_class[name] self.site = site() else: try: self.url = lib.site.get_orig_url(url) except: self.url = url parsed = urlparse.urlsplit(self.url) netloc = parsed.netloc self.site = None if re.search(r"weibo.com|weibo.cn|sinajs.cn|sinaimg.cn", netloc): self.site = weibo.Weibo() elif re.search(r"meipai.com", netloc): self.site = meipai.Meipai() elif re.search(r"miaopai.com|ent.v.sina.cn", netloc): self.site = miaopai.Miaopai() elif re.search(r"weipai.cn", netloc): self.site = weipai.Weipai() elif re.search(r"vlook.cn", netloc): self.site = vlook.Vlook() elif re.search(r"weipainv.com", netloc): self.site = weipainv.Weipainv() elif re.search(r"xiaoying.tv", netloc): self.site = xiaoying.Xiaoying() elif re.search(r"xiaokaxiu.com", netloc): self.site = xiaokaxiu.Xiaokaxiu() elif re.search(r"gifshow.com|kuaishou.com", netloc): self.site = gifshow.Gifshow() elif re.search(r"v1.cn", netloc): self.site = v1.V1() elif re.search(r"tumblr.com", netloc): self.site = tumblr.Tumblr() elif re.search(r"mitaose8.com", netloc): self.site = mitaose.Mitaose() elif re.search(r"aishipin.net", netloc): self.site = aishipin.Aishipin() else: raise SiteNotSupported()
def doBatch(tasks, username, password, oddeven): client = weibo.Weibo() client.login(username, password) if not client.state: client.logout() return False eicfs = [] eilikes = [] ercfs = [] erlikes = [] uid = tasks.get('uid', 5644764907) icfWell = True ilikeWell = True cfWell = True likeWell = True time.sleep(2) # 先关注,然后全部赞评转 ret = helper.doFollow(client, uid) if ret: # 根微博 rms = tasks.get('rms', []) for rm in rms: ruid = rm.get('ruid', uid) rmid = rm['rmid'] # 奇偶分批 if rmid % 2 == oddeven: continue time.sleep(6) # 子微博 ims = rm.get('ims', []) for im in ims: iuid = im.get('iuid', uid) imid = im['imid'] # 只关注目标用户 if iuid != uid: continue # 评论 if icfWell: time.sleep(2) ics = random.sample(tasks['ics'], 1)[0] % (util.randomText(9)) ret = helper.doIComment(client, ruid, rmid, iuid, imid, ics) # print('--- ics ---', ics, '\n', flush=True) if not ret: icfWell = False if not icfWell: eicfs.append({ 'ruid': ruid, 'rmid': rmid, 'iuid': iuid, 'imid': imid }) # 点赞 if ilikeWell: time.sleep(2) ret = helper.doILike(client, rmid, iuid, imid) if not ret: ilikeWell = False if not ilikeWell: eilikes.append({'rmid': rmid, 'iuid': iuid, 'imid': imid}) # 只关注目标用户 if ruid != uid: continue # 转发带评论或评论带转发模式切换 if cfWell: time.sleep(2) rfs = random.sample(tasks['rfs'], 1)[0] % (util.randomText(9)) ret = helper.doForward(client, rmid, rfs, 1) # print('--- rfs ---', rfs, '\n', flush=True) if not ret: cfWell = False if not cfWell: ercfs.append({'rmid': rmid}) # 点赞 if likeWell: time.sleep(2) ret = helper.doLike(client, rmid) if not ret: likeWell = False if not likeWell: erlikes.append({'rmid': rmid}) # 重做出错部分 if not ilikeWell or not icfWell or not likeWell or not cfWell: time.sleep(20) for eilike in eilikes: print('+++ redo eilike +++', eilike, '\n', flush=True) ret = helper.doILike(client, eilike['rmid'], eilike['iuid'], eilike['imid']) if not ret: break time.sleep(5) for eicf in eicfs: print('+++ redo eicf +++', eicf, '\n', flush=True) ics = random.sample(tasks['ics'], 1)[0] % (util.randomText(9)) ret = helper.doIComment(client, eicf['ruid'], eicf['rmid'], eicf['iuid'], eicf['imid'], ics) if not ret: break time.sleep(5) for erlike in erlikes: print('+++ redo erlike +++', erlike, '\n', flush=True) ret = helper.doLike(client, erlike['rmid']) if not ret: break time.sleep(5) for ercf in ercfs: print('+++ redo ercf +++', ercf, '\n', flush=True) rcs = random.sample(tasks['rcs'], 1)[0] % (util.randomText(9)) ret = helper.doComment(client, ercf['rmid'], rcs, 1) if not ret: break time.sleep(5) client.logout() return True
for i in range(length): now0 += timedelta(hours=1) tmp = int(now0.timestamp()) * 1000 if tmp not in now_list: missed_time.append((now0.hour, tmp)) logging.info("现在有%d条定时微博,需要新发布%d条" % (len(now_list), len(missed_time))) return missed_time if __name__ == "__main__": # 初始化 username = "" # 你的微博账号 password = "" # 你的微博密码 ocr_token = "" # orc密钥,如果需要填写图片字母数字的验证码,可以在fast.95man.com申请,否则可以留空 cookies_flag = True # 是否保留Cookies,下次无需再次登录,建议开启 comfirm_weibo_length = 5 # 检查后几个整点 # 微博登录 # wb = weibo.Weibo(os.environ['username'],os.environ['password'],os.environ['token']) wb = weibo.Weibo(username, password, ocr_token, cookies_flag) wb.weibo_login() # 定时列表 lists = wb.get_intime_weibo() # 检查下几个整点微博 missed_time = check_time(lists, comfirm_weibo_length) # 发布微博 post_weibo(wb, missed_time)
def weibo(self): """ 获取指定用户的微博 :param required_weibo_count: 所需的微博条数 :param time_delay: 时间延迟 :return: weibo_list 元素为SinaWeibo对象 .. code-block:: python [ { 'uid': 'EpO2KnAor', 'is_repost': False, 'text': '物是人非.', 'attitude_count' : 0, 'repost_count': 7, 'comment_count': 0, 'time': '01月08日 04:44' 'terminal_source': 'iPad mini' }, { 'uid': 'EAJwkph8X', 'is_repost': False, 'text': '祝你生日快乐', 'attitude_count' : 0, 'repost_count': 0, 'comment_count': 1, 'time': '2016-12-30 23:34:34' 'terminal_source': '生日动态' }, ] """ weibo_url = self.basic_url page_count = 1 now_page_count = 1 is_first = True pattern = re.compile(r'\d+') while True: tt.sleep(self._time_delay) # 获取页面源码(bs4对象) requests_content = BeautifulSoup( self._session.get(weibo_url).content, "lxml") # 获取当前页的微博列表 unit_list = requests_content.find_all('div', attrs={'class': 'c'}) for i in unit_list: # 每个微博的信息以微博类SinaWeibo存储 try: if str(i.attrs['id']) and str( i.attrs['id']).startswith('M'): weibo_uid = i.attrs['id'].split('_')[1] else: continue except: continue # 检查是否为转发的微博 if len(i.div.find_all('span')) >= 2: is_repost = True else: is_repost = False # for c in i.div.find_all('span'): # if str(c.attrs['class']) == "['cmt']": # is_repost = True if is_repost: text = i.div.find_all('span')[0].get_text( ) + i.div.find_all('span')[1].get_text() else: text = i.div.span.get_text() # 有的微博处html格式不对 try: attitude_count = int( re.findall(pattern, i.div.find_all('a')[-4].get_text())[0]) repost_count = int( re.findall(pattern, i.div.find_all('a')[-3].get_text())[0]) comment_count = int( re.findall( pattern, i.find_all('div')[-1].find_all('a')[-2].get_text()) [0]) except IndexError: try: comment_count = int( re.findall( pattern, i.find_all('div')[-1].find_all('a') [-3].get_text())[0]) repost_count = int( re.findall( pattern, i.find_all('div')[-1].find_all('a') [-4].get_text())[0]) attitude_count = int( re.findall( pattern, i.find_all('div')[-1].find_all('a') [-5].get_text())[0]) except IndexError: attitude_count = int( re.findall(pattern, i.find_all('div')[-1].get_text())[0]) repost_count = int( re.findall(pattern, i.find_all('div')[-1].get_text())[1]) comment_count = int( re.findall(pattern, i.find_all('div')[-1].get_text())[2]) # print(attitude_count, repost_count, comment_count) try: time = i.find_all('div')[-1].find_all( 'span', attrs={'class': 'ct'})[0].get_text().split('来自')[0] terminal_source = i.div.find_all( 'span', attrs={'class': 'ct'})[0].get_text().split('来自')[1] except IndexError: time = i.find_all('div')[-1].find_all( 'span', attrs={'class': 'ct'})[0].get_text().split('来自')[0] try: terminal_source = i.find_all('div')[-1].find_all( 'span', attrs={'class': 'ct'})[0].get_text().split('来自')[1] except IndexError: terminal_source = '暂无' # print(time, terminal_source) weibo_cache = { "is_repost": is_repost, "text": text, "attitude_count": attitude_count, "repost_count": repost_count, "comment_count": comment_count, "time": time, "terminal_source": terminal_source } self.now_weibo_cache = weibo_cache self.now_weibo_uid = weibo_uid yield weibo.Weibo(id=weibo_uid, cache=weibo_cache) is_repost = False # 若是第一页,则获取总页数 if is_first: # 若发现‘x/y页’ 则有不止一页 if requests_content.find(attrs={'id': 'pagelist'}): page_count = requests_content.find(attrs={ 'id': 'pagelist' }).form.div.contents[-1].strip() page_count = page_count.split('/')[1] page_count = int(re.findall(pattern, page_count)[0]) else: return is_first = False now_page_count += 1 if now_page_count > page_count: return weibo_url = 'http://weibo.cn/u/' + str( self.uid) + '?page=' + str(now_page_count)