def choice_proxy(is_debug=True, url="", area=u"中国", host=u"master", port=8880): """ 获取代理地址,请求指定区域的代理,默认是全国范围。 :param is_debug: (bool) 是否不使用收费的代理 -> True:不使用收费代理 False:使用收费代理 :param url: (unicode) 网页地址,目前只支持收费的代理。是为了代理访问网站的负载均衡 :param area: (unicode) 代理地址所在的区域和类型,目前只支持收费的代理。-> 中国,电信,联通,浙江 ,深圳,广东,上海 北京,福建,重庆,四川,新疆,湖北,山东,黑龙江 :return: (str) 代理 ->228.21.11.78:8064 """ if is_debug: proxy = get_crawler_proxy() else: proxy = None while proxy is None or len(proxy) < 1: if len(url) > 0: urlpret = urlparse.urlparse(url) host = urlpret.hostname try: proxy = get_server_proxy(host, area.encode("UTF-8", "ignore"), port=port) except BaseException as e: print exceputil.traceinfo(e) time.sleep(10) continue proxy = proxy.strip() print "choice_proxy ret:", proxy return proxy
def getredisQueuev2(key, redis_host="master1", redis_port=57819, redis_password="******", sleep_time=10, retry=9, email_list=functions.mailto_list_ourselves): """ 连接redis队列,出错后会重试,重试一定次数会发报警邮件 :param key: (str) 队列名 :param sleep_time: (int) 休眠时间 :param retry:(int) 重试次数 :param email_list: (list) 电子邮件列表 :return: (RedisQueue) redis队列 """ i = 0 count = retry + 1 while True: try: return getredisQueue(key, redis_host=redis_host, redis_port=redis_port, redis_password=redis_password) except Exception as e: print str(e) i += 1 if i >= count: #发邮件 mail.send_mail(email_list, u"redis队列连接异常", u"错误信息%s" % exceputil.traceinfo(e)) time.sleep(3600) i = 0 else: time.sleep(sleep_time)
def getredisSetv2(key, redis_host="master1", redis_port=57819, redis_password="******", sleep_time=10, retry=9, email_list=functions.mailto_list_ourselves): i = 0 count = retry + 1 while True: try: return getredisSet(key, redis_host=redis_host, redis_port=redis_port, redis_password=redis_password) except Exception as e: i += 1 if i >= count: #发邮件 functions.send_mail_old(email_list, u"redis集合连接异常", u"错误信息%s" % exceputil.traceinfo(e)) time.sleep(3600) i = 0 else: time.sleep(sleep_time)
def getv2(self, key="",block=True, timeout=30,sleep_time=10,retry=9,email_list=functions.mailto_list_ourselves): """ 从队列头部获取一条数据 :param key: (str) 队列名 :param block: (bool) :param timeout:(int) 超时时间 ->30 :param sleep_time: (int) 休眠时间 ->10 :param retry:(int) 重试次数 :param email_list: (list) 电子邮件列表 :return: (str) 队列中刚开头内容 """ #redis_queue = getredisQueuev2(self.key,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password,email_list=email_list) redis_queue=None i=0 count=retry+1 while True: try: if redis_queue!=None: return redis_queue.get(block=block, timeout=timeout) else: return self.get(block=block, timeout=timeout) except Exception as e: print str(e) i+=1 if i>=count: #发邮件 functions.send_mail_old(email_list,u"redis队列更新异常",u"错误信息%s"%exceputil.traceinfo(e)) time.sleep(3600) i=0 else: time.sleep(sleep_time) redis_queue = getredisQueuev2(self.key,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password,email_list=email_list)
def putv2(self, item,key="",sleep_time=10,retry=9,email_list=functions.mailto_list_ourselves): """ Put item into the queue.if ,retry if error ,send mail if retry Multiple retries :param item: (str)内容 :param sleep_time: (int) 休眠时间 :param retry:(int) 重试次数 :param email_list: (list) 电子邮件列表 :return: (None) """ redis_queue=None i=0 count=retry+1 while True: try: if redis_queue!=None: return redis_queue.put(item) else: return self.put(item) except Exception as e: i+=1 if i>=count: #发邮件 functions.send_mail_old(email_list,u"redis队列更新异常",u"错误信息%s"%exceputil.traceinfo(e)) time.sleep(3600) i=0 else: time.sleep(sleep_time) redis_queue = getredisQueuev2(self.key,email_list=email_list,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password)
def getmondbbyhostv2(db, table, sleep_time=10, retry=9, email_list=functions.mailto_list_ourselves): """ 获取操作mongo数据库和文档集合的对象。遇到错误会重试指定次数 :param db: mongo数据库 :param table: (str) 文档集合 :param sleep_time: 休眠时间 :param retry: 重试次数 :return: (mongodb) mongodb对象 """ count = retry + 1 while True: try: count -= 1 result = getmondbbyhost(db, table) if result != None: return result except Exception as e: print(u"mongo数据库连接异常,错误信息%s" % str(e)) if count == 0: #发送邮件 functions.send_mail_old(email_list, u"mongo数据库连接异常", u"错误信息%s" % exceputil.traceinfo(e)) time.sleep(3600) count = retry + 1 else: time.sleep(sleep_time)
def get_server_proxy(host, area, port=8880): """ 获取收费代理地址 :param host: (str) 域名 :param area: (str) 区域 ->全国 :return: (unicode) 代理 -> 231.29.67.145:8585 """ global proxyServerClient if proxyServerClient == None: #初始化 while True: try: proxyServerClient = init_proxy_client(host=host, port=port) break except Exception as e: print exceputil.traceinfo(e) time.sleep(10) proxy = "" while True: try: proxy = proxyServerClient.getPorxy(host, area) if proxy != None and len(proxy.strip()) > 0: break else: #代理是None或空字符串则重试 time.sleep(10) while True: try: proxyServerClient = init_proxy_client(host=host, port=port) break except Exception as e1: print exceputil.traceinfo(e1) time.sleep(10) continue except Exception as e: print exceputil.traceinfo(e) time.sleep(10) while True: try: proxyServerClient = init_proxy_client(host=host, port=port) break except Exception as e1: print exceputil.traceinfo(e1) time.sleep(10) return proxy.strip()
def updatev2(db, id, valueset, dbname, table, flag=True, retry=9, sleep_time=10, email_list=functions.mailto_list_ourselves): """ 更新数据库,错误会一直尝试 :param db: (mongodb)数据库 :param id: (str) mongo数据库_id 主键内容 :param valueset:(dict) 内容字典 :param dbname: (str) 数据库名 :param table: (str) 文档集合名 :param retry (int) 重试次数 :param sleep_time (int) 睡眠时间 :return:(None) """ count = retry + 1 while True: try: count -= 1 update(db, id, valueset, flag) break except Exception as e: print(u"更新mongo数据库异常,错误信息:%s" % str(e)) if count == 0: #发送邮件 functions.send_mail_old(email_list, u"mongo数据库更新异常", u"错误信息%s" % exceputil.traceinfo(e)) time.sleep(3600) count = retry + 1 #重新连接mongo数据库 time.sleep(sleep_time) db = getmondbbyhostv2(dbname, table)