def process(): while True: p = rd.spop("imageTask") IM = p.replace('"', "") IM = IM.replace('\n', "") data = IM.split(',') url = 'http://183.59.160.50:8082/EPG/jsp/images/universal/film/poster/' + data[3] path = u"E:/posters_5/"+"/".join(data[3].split('/')[:-1])+"/" try: os.makedirs(path) except Exception as e: # print(str(e)) pass local_filename = path+url.split('/')[-1] r = requests_get(url) print("r.status_code:",r.status_code) if r.status_code == 404 or r == False: with open("E:/404.txt", "a") as myfile: myfile.write(p) rd.sadd("imageTaskFailed",p) print("failed", p) continue f = open(local_filename, 'wb') for chunk in r.iter_content(chunk_size=512 * 1024): if chunk: f.write(chunk) f.close() print("done", local_filename)
def go_detail_list_task(): retry = 5 i = 0 while True: q = rd.spop(config.yk_video_detail_task) if q is None: print(u"yk_video_detail_task sleeping 20 sec....") # time.sleep(task_wait) return True detail_url = json.loads(q) #if rd.sismember(config.yk_video_detail_failed,q)==True or rd.sismember(config.yk_video_detail_done,detail_url['url'])==True: if rd.sismember(config.yk_video_detail_done, detail_url['url']) == True: print("pass", detail_url['url']) continue # r = go_detail_list_page(detail_url) r = requests_get(detail_url['url'], headers=youku_home_headers) d = parse_detail_list_page(r, detail_url['url']) data = d['data'] if data is False or data == None: rd.sadd(config.yk_video_detail_failed, q) continue for x in d['stars']: rd.sadd(config.yk_star_task, x) # 明星采集队列,redis set特性去重 print('detail_url done:', detail_url['url'], data) done = rd.sadd(config.yk_video_detail_done, detail_url['url']) # finished #if done == 1: youku_videos.insert(data, check_keys=False) # save tv data # 每50步更新一次session # time.sleep(2) i += 1 if i % max_step == 0: update_session()
def task_types_fetch(): retry = 5 i = 0 while True: type_url = rd.spop(config.yk_types_task) if type_url is None: print(u"yk_types_task sleeping 20sec....") return True if rd.sismember(config.yk_types_failed, type_url) == True or rd.sismember( config.yk_types_done, type_url) == True: continue r = requests_get(url=type_url, headers=youku_home_headers, session=session) if r is False or r == None: print(u'filed task:%s' % type_url) rd.sadd(config.yk_types_failed, type_url) continue pages = parse_category_show(r, type_url) print("task_types_fetch data:", pages) for page in xrange(1, int(pages['pages'])): page_url = re.sub('(\.html.*)', '_s_1_d_1_p_{page}.html'.format(page=page), type_url) print("task_types_fetch for :", page_url) if rd.sismember(config.yk_page_failed, page_url) == False and rd.sismember( config.yk_page_done, page_url) == False: rd.sadd(config.yk_page_task, page_url) rd.sadd(config.yk_types_done, type_url) # 每50步更新一次session i += 1 if i % max_step == 0: update_session()
def process(): path = u"E:/avatar/" while True: p = rd.spop("stars") if not p: return True task = json.loads(p) if task.get("avatar") and not task.get("img_url"): im = requests_get(task['avatar']) elif not task.get("avatar") and task.get("img_url"): im = requests_get(task['img_url']) else: print("----",p) continue #print("r.status_code:",r.status_code) #if r.status_code == 404 or r == False: if not im: rd.sadd("avatar_failed",p) print("failed", p) continue #im = Image.open(r.raw) file_name = "/".join([task.get("_id"),"%s.jpg"%(task.get("_id"))]) try: os.makedirs(re.search('(.*/)',path+file_name).group(1)) except Exception as e: #print(str(e)) pass im.convert('RGB').save(path+file_name) result = mongo_conn.stars.update_one({"_id":ObjectId(task['_id'])},{"$set":{"file_path":file_name}}) print("done-----%s-----%s"%(result.modified_count,path+file_name)) return
def task_merge_doubanvideo(): m = Merge() while True: task = rd.spop("task_merge_doubanvideo") if task: m.merge_doubanvideo(query={"_id": ObjectId(task)}) else: break
def task_merge_youku_videos(): m = Merge() while True: task = rd.spop("task_merge_youku_videos") if task: m.merge_youku_videos(query={"_id": ObjectId(task)}) else: break
def task_merge_letvstar(): m = Merge() while True: task = rd.spop("task_merge_letvstar") if task: m.merge_letvstar(query={"_id": ObjectId(task)}) else: break
def task_video(): """ """ retry = 5 i = 0 while True: id = rd.spop(config.douban_tv_task) # id = rd.spop(config.douban_tv_failed) if id is None: print(u"task_page sleeping....20sec") return True if rd.sismember(config.doubantv_ajax_task_done, id) == True: print(u"already done%s" % id) continue url = tv_url.format(id=id) r = requests_get(url=url, headers=douban_home_headers) if r == False or r == None: rd.sadd(config.douban_tv_failed, id) continue try: cb = check_block(r) except Exception as e: print("check_block:", str(e)) if u'检测到有异常请求从你的 IP 发出' in r: print("------spider ben block... break......") delay(block_wait) continue data = parse_video(r) piw = piwik(page_title=page_title(r), session_time=session_time, origin_url=url, urlref='') print("piw", piw) if data.get("title") == None: rd.sadd(config.douban_tv_failed, id) time.sleep(task_wait) # update_session() print("------spider ben block...") continue data['doubanid'] = id print(json.dumps(data)) mongo_r = mongo_douban_tvs.insert(data, check_keys=False) # photostask = json.dumps({"id": id, "mongoTVID": str(mongo_r)}) if rd.sismember(config.douban_star_done, photostask) == False and rd.sismember( config.douban_photos_failed, photostask) == False: rd.sadd(config.douban_photos_task, photostask) print(photostask) # return True rd.sadd(config.douban_tv_done, id) # tv_after(id=id, url=url) print("done.. sleep %s seconds." % task_wait) delay() i += 1 if i % max_step == 0: bid = random_str(10) session.cookies.set('bid', bid, domain='.douban.com', path='/')
def failed_job(self): print("go failed_job") while True: '''监听task''' p = rd.spop(config.content_work_task_failed) if p == None: return True task = json.loads(p) if task.get("contentName") == None: continue r = self.process(task) if not r: rd.sadd(config.content_work_task_failed, p)
def job(self): '''后台job''' while True: '''监听task''' p = rd.spop(config.gd_task_bkbk.encode('latin1')) if p==None: print("sleep 6s...") time.sleep(6) continue task = pickle.loads(p) if task.get("name") == None: continue r = self.process(task) if not r: rd.sadd(config.gd_task_bkbk,p) pass
def job(self): '''后台job''' print("go job") while True: '''监听task''' p = rd.spop(config.content_work_task) if p == None: self.failed_job() print("sleep 6s...") time.sleep(6) continue task = json.loads(p) if task.get("contentName") is None: continue r = self.process(task) if not r: rd.sadd(config.content_work_task_failed, p)
def failed_job(self): '''后台job''' while True: '''监听task''' # p = rd.spop(config.gd_task_bkbk.encode('latin1')) p = rd.spop(config.gd_task_failed) if p == None: return True task = pickle.loads(p) if task.get("name") == None: continue r = self.process(task) if not r: rd.sadd(config.gd_task_failed, p) pass else: rd.sadd(config.gd_task_bkbk, p) pass
def get_detailurl_task(): """ get_detailurl_task yk_get_detailurl_task 解析到detail_list页面的url """ retry = 5 i = 0 while True: q = rd.spop(config.yk_get_detailurl_task) if q is None: print(u"yk_get_detailurl_task sleeping 20 sec") # time.sleep(task_wait) return True to_detail_url = json.loads(q) headers = youku_home_headers headers['Referer'] = to_detail_url['Referer'] # if rd.sismember(config.yk_get_detailurl_done,q)==True or rd.sismember(config.yk_get_detailurl_field,q)==True: if rd.sismember(config.yk_get_detailurl_done,q)==True: print("pass") continue r = requests_get(to_detail_url['url'], headers=headers) # headers = youku_home_headers # headers['Referer'] = to_detail_url['url'] # try: # session.get('http://cmstool.youku.com/cms/player/userinfo/user_info?specialTest=test&client=pc&callback=tuijsonp1',headers=headers) # except Exception as e: # pass print("to_detail_url",to_detail_url['url']) detail_url = parse_tv_show(r, to_detail_url['url']) print("detail_url:",detail_url) if detail_url == False or detail_url==None: rd.sadd(config.yk_get_detailurl_field, q) continue # if rd.sismember(config.yk_video_detail_done,json.dumps({"url": detail_url, 'Referer': to_detail_url['url']}))==False: if rd.sismember(config.yk_video_detail_done,detail_url)==False: red = rd.sadd(config.yk_video_detail_task, json.dumps({"url": detail_url, 'Referer': to_detail_url['url']})) if red==1: print("yes") rd.sadd(config.yk_get_detailurl_done,q) # rd.sadd(config.yk_video_detail_task_, json.dumps({"url": detail_url, 'Referer': to_detail_url['url']})) # time.sleep(2) i += 1 if i % max_step == 0: update_session()
def process(): path = u"E:/posters/" while True: p = rd.spop(config.image_v) if not p: print("done! sleep 6s") time.sleep(6) continue task = json.loads(p) # im = requests_get(u'http://meeting.itvfocus.com/'+task['image_v']) im = requests_get( u'http://183.59.160.50:8082/EPG/jsp/images/universal/film/poster/' + task['image_v']) if not im: rd.sadd("image_v_failed", p) print("failed", p) continue #im = Image.open(r.raw) if im.width < 180: continue file_name = "/".join([ task.get("content_id"), "%s_%sx%s.jpg" % (task.get("content_id"), im.width, im.height) ]) try: os.makedirs(re.search('(.*/)', path + file_name).group(1)) except Exception as e: #print(str(e)) pass im.convert('RGB').save(path + file_name) ise = mongo_conn.posters.find({ "file_path": file_name, "content_id": task['content_id'] }) if ise.count() != 0: continue task['file_path'] = file_name task['url'] = task['image_v'] if task.get("_id"): del task['_id'] _id = mongo_conn.posters.insert(task, check_keys=False) print(task['content_id'], _id, file_name)
def task_star(): """ """ retry = 5 i = 0 while True: # task = rd.spop(config.douban_star_task) task = rd.spop(config.douban_star_failed) if task is None: print(u"task_page sleeping....20sec") break continue # if rd.sismember(config.douban_star_failed, task) == True or rd.sismember(config.douban_star_done, task) == True: if rd.sismember(config.douban_star_done, task) == True: print(u"already done%s" % task) continue url = star_url.format(id=task) print(url) r = requests_get(url=url) if u'检测到有异常请求从你的 IP 发出' in r: print("------spider ben block... break......") delay(block_wait) continue data = parse_star(r) if data == False or data == None or data.get("name") == None: rd.sadd(config.douban_star_failed, task) update_session() time.sleep(20) print("------spider ben sleep 20 sec...") continue data['doubanid'] = task print(json.dumps(data)) result = mongo_douban_stars.insert(data, check_keys=False) rd.sadd(config.douban_star_done, task) delay() print("done.%s. sleep 3 seconds." % result) i += 1 if i % max_step == 0: bid = random_str(10) session.cookies.set('bid', bid, domain='.douban.com', path='/')
def task_page_fetch(): """ 解析每一个category下每个分类下的每一页list数据中的所有tv url, 这里要做url任务去重 """ retry = 5 i = 0 while True: page_url = rd.spop(config.yk_page_task) # page_url = rd.spop(config.yk_page_failed) #retry if page_url is None: print(u"task_page_fetch sleeping 20sec....") # time.sleep(task_wait) return True print("page_url", page_url) if rd.sismember(config.yk_page_failed, page_url) == True or rd.sismember( config.yk_page_done, page_url) == True: continue r = requests_get(url=page_url, headers=youku_home_headers, session=session) if r is False or r == None: # 获取详情失败 print(u'filed task:%s' % page_url) rd.sadd(config.yk_page_failed, page_url) continue print("done task_page_fetch:", page_url) data = parse_page_fetch(r, page_url) for x in data['yk_get_detailurl_task']: rd.sadd(config.yk_get_detailurl_task, json.dumps(x)) # 链接是直接到播放页面的V_show类型 for x in data['yk_video_detail_task']: r_add = rd.sadd(config.yk_video_detail_task, json.dumps(x)) # detail_list_task rd.sadd(config.yk_page_done, page_url) # 每50步更新一次session i += 1 if i % max_step == 0: update_session()
def task_star(): """ """ retry = 5 i = 0 while True: task = rd.spop(config.le_star_task) # task = u'{"7088": "石田卓也"}' if task is None: print(u"task_page sleeping....20sec") time.sleep(task_wait) continue print(task) is_done = rd.sismember(config.le_star_done, task) if is_done == True: print("already done.") continue task_json = json.loads(task) url = so_url.format(wd=task_json[task_json.keys()[0]]) r = requests_get(url=url, headers=leso_headers) if r is False or r == None: # 失败 print(u'filed task:%s' % url) rd.sadd(config.le_star_failed, task) continue data = parse_sostar(r, task_json) if data == False or data == None: rd.sadd(config.le_star_failed, task) continue mongo_id = mongo_letv_stars.insert(data, check_keys=False) # if mongo_id: rd.sadd(config.le_star_done, task) else: print(mongo_id) rd.sadd(config.le_star_failed, task) print('done.') # 每50步更新一次session i += 1 if i % max_step == 0: update_session()
def task_category(): """ 解析每一个category下的分类, 并获取该category 每个分类下的全部资源的url任务, 这里要做url任务去重 """ retry = 5 i = 0 while True: category = rd.spop(config.yk_category_task) if category is None: print(u"task_category sleeping....20sec") # time.sleep(task_wait) return True category = json.loads(category) print(category) r = requests_get(url=category['url'], headers=youku_home_headers,session=session) if r is False or r == None: # 获取详情失败 print(u'filed task:%s' % category['url']) rd.sadd(config.yk_category_task_failed, category['url']) continue data = parse_category_show(r, category['url']) print("category and types:", json.dumps(data)) if len(data['types']) == 0: # category下没有type, re_sadd = rd.sadd(config.yk_types_task,category['url']) # types url else: for ty in data['types']: if rd.sismember(config.yk_types_done,data['types'][ty]) == False and rd.sismember(config.yk_types_failed,data['types'][ty]) == False: rd.sadd(config.yk_types_task,data['types'][ty]) # types fetch task re_sadd = rd.sadd(config.yk_types_done,data['types'][ty]) # types url 数据库去重 if re_sadd == 0: # 去重保存 continue youku_video_types.insert( {"name": ty, "url": data['types'][ty], "category": category['name']}, check_keys=False) # save tv types rd.sadd(config.yk_category_task_done, category['url']) # 每50步更新一次session i += 1 if i % max_step == 0: update_session()
def job(self): '''后台job''' while True: '''监听task''' # p = rd.spop(config.gd_task_bk.encode('latin1')) # p = rd.spop(config.gd_task_bkbk.encode('latin1')) p = rd.spop(config.gd_task) if p == None: self.failed_job() print("sleep 60s...") time.sleep(60) continue task = pickle.loads(p) if task.get("name") == None: continue r = self.process(task) print("process", r) if not r: rd.sadd(config.gd_task_failed, p) pass else: rd.sadd(config.gd_task_bkbk, p) pass
def task_page(): """ """ retry = 5 i = 0 while True: url = rd.spop(config.le_page_task) # url = rd.spop(config.le_page_failed) if url is None: print(u"task_page sleeping....20sec") time.sleep(task_wait) continue if rd.sismember(config.le_page_done, url) == True: print(u"already done%s" % url) continue r = requests_get(url, headers=leshi_headers) if r is False or r == None: # 失败 print(u'filed task:%s' % url) rd.sadd(config.le_page_failed, url) continue m = re.search( u"frontUrl\: *'(http://list\.le\.com\/getLesoData([^',]+?))',", r) print("task_page:", url) if m: # http://list.le.com/getLesoData?from=pc&src=1&stype=1&ps=30&pn=1&ph=420001&dt=1&cg=2&or=4&stt=1&vt=180001 ajax_url = m.group(1) pn = 1 while True: ajax_url = re.sub(u"pn=\d*", 'pn=%s' % pn, ajax_url) print("ajax_url:", ajax_url) r = requests_get(url=ajax_url, headers=leshi_ajax_headers) if r == False or r == None: rd.sadd(config.le_page_ajax_failed, ajax_url) continue pn += 1 # print(r) try: list_data = json.loads(r) except Exception as e: print(str(e)) print(r) print(ajax_url) rd.sadd(config.le_page_ajax_failed, ajax_url) print("continue") continue if list_data.get("data").get("more") == False: print("this url page fetch done") break for x in list_data.get("data").get("arr"): is_done = rd.sismember(config.le_tv_done, x["unique_id"]) if is_done == True: print("already done!") print(x['name']) # return False continue # 初步清洗 data = {} data = x data['created_at'] = time.time() data['updated_at'] = time.time() # print(json.dumps(x)) # data["summary"] = x['description'] # data["category"] = x['categoryName'] # data["title"] = x['name'] # data["alias"] = x['otherName'] # data["subname"] = x['subname'] # data["englishName"] = x['englishName'] # data["language"] = x['language'] # data["area"] = x['areaName'] # data["plays_num"] = x['playCount'] # data["le_score"] = x['rating'] # # data["isEnd"] = x['isEnd'] # data["subCategoryName"] = x['subCategoryName'] # data["videoTypeName"] = x['videoTypeName'] # data["duration"] = x['duration'] #时长 单集视频是秒,电视剧剧集资源是每集的分钟数 # data["doubanid"] = x['doubanid'] #doubanid # data["urlLink"] = x['urlLink'] # data["copyright"] = x['copyright'] # data["imgUrl"] = x['imgUrl'] # data["tag"] = x['tag'] # data["vids"] = x['vids'] #子集ids # data["shortDesc"] = x['shortDesc'] # data["monthCount"] = x['monthCount'] # data["intro"] = x['intro'] # data["publishCompany"] = x['publishCompany'] # data["fitAge"] = x['fitAge'] # data["weekCount"] = x['weekCount'] # data["style"] = x['style'] # data["letv_original_id"] = x['letv_original_id'] # data["global_id"] = x['global_id'] # data["tvTitle"] = x['tvTitle'] # data["videoBaseType"] = x['videoBaseType'] # data["pubName"] = x['pubName'] # data["nameQuanpin"] = x['nameQuanpin'] # data["nameJianpin"] = x['nameJianpin'] # data["allowforeign"] = x['allowforeign'] # data["subSrc"] = x['subSrc'] # data["updataInfo"] = x['updataInfo'] # data["downloadPlatform"] = x['downloadPlatform'] # data["pushFlag"] = x['pushFlag'] # data["payPlatform"] = x['payPlatform'] # data["vid"] = x['vid'] # data["episodes"] = x['episodes'] #集数 # data["nowEpisodes"] = x['nowEpisodes'] #当前更新到 # data["ispay"] = x['ispay'] # data["country"] = x['country'] # data["videoList"] = x['videoList'] # try: # data["published_at"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(x['releaseDate'])/1000)) #乐视平台的发布时间 # except Exception as e: # data["published_at"] = x['releaseDate'] #有-28800000,-126259200000此类值 data["ctime"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(int(x['ctime']) / 1000)) # 乐视平台的ctime,待分析,不明意义 data["mtime"] = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(int(x['mtime']) / 1000)) # 乐视平台的mtime,待分析,不明意义 data["images"] = [{ "url": x['images'][k], "width": k.split('*')[0], "height": k.split('*')[1] } for k in x['images']] # hai bao data["actors"] = "".join( [x['actor'][it] + "," for it in x['actor']]) # 演员 data["directors"] = "".join([ x['directory'][it] + "," for it in x['directory'] ]) # 导演 starring_type = type(x['starring']).__name__ if starring_type != u'str': for it in x['starring']: if rd.sismember(config.le_star_done, json.dumps(it)) == True: continue if rd.sismember(config.le_star_failed, json.dumps(it)) == True: continue rd.sadd(config.le_star_task, json.dumps(it)) # 主演 坑啊,python 拷贝 可变类型.... x['starring']和data["starring"]的值在同一块内存地址 data["starring"] = "".join([ starring[starring.keys()[0]] + "," for starring in x['starring'] ]) if type(x['actor']).__name__ != u'str': for it in x['actor']: if rd.sismember(config.le_star_done, json.dumps({it: x['actor'][it] })) == True: continue if rd.sismember(config.le_star_failed, json.dumps({it: x['actor'][it] })) == True: continue print(json.dumps({it: x['actor'][it]})) rd.sadd(config.le_star_task, json.dumps({it: x['actor'][it]})) if type(x['directory']).__name__ != u'str': for it in x['directory']: if rd.sismember( config.le_star_done, json.dumps({it: x['directory'][it]})) == True: continue if rd.sismember( config.le_star_failed, json.dumps({it: x['directory'][it]})) == True: continue json.dumps({it: x['directory'][it]}) rd.sadd(config.le_star_task, json.dumps({it: x['directory'][it]})) # print(json.dumps(data)) print("done!") mongo_letv_tvs.insert(data, check_keys=False) # rd.sadd(config.le_tv_done, x['unique_id']) else: print(u'filed task:%s' % url) rd.sadd(config.le_page_failed, url) continue # 每50步更新一次session i += 1 if i % max_step == 0: update_session()
def task_photos(): """ """ retry = 5 i = 0 photos_url = u'https://movie.douban.com/subject/{id}/photos?type=R' while True: #线程锁,必须加这里. #with threading.Lock(): # task = rd.spop(config.douban_photos_task) task = rd.spop(config.douban_photos_failed) if task is None: print(u"task_page sleeping....20sec") return True # if rd.sismember(config.douban_photos_failed, task) == True or rd.sismember(config.douban_photos_done, task) == True: if rd.sismember(config.douban_photos_done, task) == True: print(u"already done%s" % task) continue T = json.loads(task) # T = {} # task = "" # T['id'] = "25827963" url = photos_url.format(id=T['id']) print(url) # data = [] data = get_photos(url=url, id=T['id']) # for x in get_photos(url=url, id=T['id']): # #if x == False or len(x) == 0 or x == None: # if x == False or x == None: # # rd.sadd(config.douban_photos_failed, task) # rd.sadd(config.douban_photos_task, task) # print("------spider ben sleep 20 sec...") # update_session() # break # print(json.dumps(x)) # print(len(x)) # data += x print("++++++++++++++++%s+++++++++++++%s++++++++++++" % (task, len(data))) if len(data) == 0: #rd.sadd(config.douban_photos_failed, task) #rd.sadd(config.douban_photos_task, task) continue print(json.dumps(data)) # return '''这是后面的骚操作.....''' mongo_douban_tvs.update({'_id': ObjectId(T['mongoTVID'])}, {'$unset': { 'poster': 1 }}, multi=True) result = mongo_douban_tvs.update_one({'_id': ObjectId(T['mongoTVID'])}, {'$set': { 'poster': data }}) if result.modified_count == 0: rd.sadd(config.douban_photos_failed, task) #rd.sadd(config.douban_photos_task, task) rd.sadd(config.douban_photos_done, task) delay() print("done.%s. sleep 3 seconds." % result.modified_count) i += 1 if i % max_step == 0: bid = random_str(10) session.cookies.set('bid', bid, domain='.douban.com', path='/')
def task_api(): """ """ retry = 5 i = 0 while True: url = rd.spop(config.doubantv_ajax_task) origin_url = url if url is None: print(u"task_page sleeping....20sec") time.sleep(task_wait) continue # if rd.sismember(config.doubantv_ajax_task_done, url) == True or rd.sismember(config.doubantv_ajax_task_failed, url) == True: if rd.sismember(config.doubantv_ajax_task_done, url) == True: print(u"already done%s" % url) continue start = 0 while True: url = re.sub(u'start=(\d*)', 'start=%s' % str(start * 20), url) print(url) r = requests_get(url, headers=douban_referer_tag_headers) if r is False or r == None: # 失败 print(u'filed task:%s' % url) rd.sadd(config.doubantv_ajax_task_failed, url) continue try: r_data = json.loads(r) except Exception as e: rd.sadd(config.doubantv_ajax_task_failed, url) print(r) print(str(e)) update_session() time.sleep(task_wait) print("-----spider ben sleep 10 sec....") continue if len(r_data['data']) == 0: rd.sadd(config.doubantv_ajax_task_done, origin_url) print("done%s" % origin_url) break for x in r_data['data']: if rd.sismember(config.douban_tv_done, x['id']) == False and rd.sismember( config.douban_tv_failed, x['id']) == False: add_task = rd.sadd(config.douban_tv_task, x['id']) if add_task == 1: print( "---------------join task.----%s--------------------" % x['id']) else: print( '***********task repeat-******%s********************' % x['id']) rd.sadd(config.douban_tvids, x['id']) rd.sadd(config.doubantv_ajax_task_done, origin_url) print("sleep 2 seconds") delay() i += 1 start += 1 if i % max_step == 0: bid = random_str(10) session.cookies.set('bid', bid, domain='.douban.com', path='/') try: session.get(url=ad_url.format(bid=bid), headers=douban_referer_tag_headers, timeout=timeout) except Exception as e: pass