Esempio n. 1
0
def choice_proxy(is_debug=True, url="", area=u"中国", host=u"master", port=8880):
    """
    获取代理地址,请求指定区域的代理,默认是全国范围。
    :param is_debug:  (bool) 是否不使用收费的代理 -> True:不使用收费代理  False:使用收费代理
    :param url: (unicode) 网页地址,目前只支持收费的代理。是为了代理访问网站的负载均衡
    :param area: (unicode) 代理地址所在的区域和类型,目前只支持收费的代理。->
                 中国,电信,联通,浙江 ,深圳,广东,上海
                 北京,福建,重庆,四川,新疆,湖北,山东,黑龙江
    :return: (str) 代理 ->228.21.11.78:8064
    """
    if is_debug:
        proxy = get_crawler_proxy()
    else:
        proxy = None
        while proxy is None or len(proxy) < 1:
            if len(url) > 0:
                urlpret = urlparse.urlparse(url)
                host = urlpret.hostname
            try:
                proxy = get_server_proxy(host,
                                         area.encode("UTF-8", "ignore"),
                                         port=port)
            except BaseException as e:
                print exceputil.traceinfo(e)
                time.sleep(10)
                continue
    proxy = proxy.strip()
    print "choice_proxy ret:", proxy
    return proxy
Esempio n. 2
0
def getredisQueuev2(key,
                    redis_host="master1",
                    redis_port=57819,
                    redis_password="******",
                    sleep_time=10,
                    retry=9,
                    email_list=functions.mailto_list_ourselves):
    """
    连接redis队列,出错后会重试,重试一定次数会发报警邮件
    :param key: (str) 队列名
    :param sleep_time: (int) 休眠时间
    :param retry:(int) 重试次数
    :param email_list: (list)  电子邮件列表
    :return: (RedisQueue) redis队列
    """
    i = 0
    count = retry + 1
    while True:
        try:
            return getredisQueue(key,
                                 redis_host=redis_host,
                                 redis_port=redis_port,
                                 redis_password=redis_password)
        except Exception as e:
            print str(e)
            i += 1
            if i >= count:
                #发邮件
                mail.send_mail(email_list, u"redis队列连接异常",
                               u"错误信息%s" % exceputil.traceinfo(e))
                time.sleep(3600)
                i = 0
            else:
                time.sleep(sleep_time)
Esempio n. 3
0
def getredisSetv2(key,
                  redis_host="master1",
                  redis_port=57819,
                  redis_password="******",
                  sleep_time=10,
                  retry=9,
                  email_list=functions.mailto_list_ourselves):
    i = 0
    count = retry + 1
    while True:
        try:
            return getredisSet(key,
                               redis_host=redis_host,
                               redis_port=redis_port,
                               redis_password=redis_password)
        except Exception as e:
            i += 1
            if i >= count:
                #发邮件
                functions.send_mail_old(email_list, u"redis集合连接异常",
                                        u"错误信息%s" % exceputil.traceinfo(e))
                time.sleep(3600)
                i = 0
            else:
                time.sleep(sleep_time)
Esempio n. 4
0
    def getv2(self, key="",block=True, timeout=30,sleep_time=10,retry=9,email_list=functions.mailto_list_ourselves):
        """
        从队列头部获取一条数据
        :param key: (str) 队列名
        :param block: (bool)
        :param timeout:(int) 超时时间 ->30
        :param sleep_time: (int) 休眠时间 ->10
        :param retry:(int) 重试次数
        :param email_list: (list)  电子邮件列表
        :return: (str) 队列中刚开头内容
        """
        #redis_queue = getredisQueuev2(self.key,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password,email_list=email_list)
        redis_queue=None
        i=0
        count=retry+1
        while True:
            try:
                if redis_queue!=None:
                    return redis_queue.get(block=block, timeout=timeout)
                else:
                    return self.get(block=block, timeout=timeout)
            except Exception as e:
                print str(e)
                i+=1
                if i>=count:
                    #发邮件
                    functions.send_mail_old(email_list,u"redis队列更新异常",u"错误信息%s"%exceputil.traceinfo(e))
                    time.sleep(3600)
                    i=0
                else:
                    time.sleep(sleep_time)

                redis_queue = getredisQueuev2(self.key,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password,email_list=email_list)
Esempio n. 5
0
    def putv2(self, item,key="",sleep_time=10,retry=9,email_list=functions.mailto_list_ourselves):
        """
        Put item into the queue.if ,retry if error ,send mail if retry Multiple retries
        :param item: (str)内容
        :param sleep_time: (int) 休眠时间
        :param retry:(int) 重试次数
        :param email_list: (list)  电子邮件列表
        :return: (None)
        """
        redis_queue=None
        i=0
        count=retry+1
        while True:
            try:
                if redis_queue!=None:
                    return redis_queue.put(item)
                else:
                    return self.put(item)
            except Exception as e:
                i+=1
                if i>=count:
                    #发邮件
                    functions.send_mail_old(email_list,u"redis队列更新异常",u"错误信息%s"%exceputil.traceinfo(e))
                    time.sleep(3600)
                    i=0
                else:
                    time.sleep(sleep_time)

                redis_queue = getredisQueuev2(self.key,email_list=email_list,redis_host=self.redis_host,redis_port=self.redis_port,redis_password=self.redis_password)
Esempio n. 6
0
def getmondbbyhostv2(db,
                     table,
                     sleep_time=10,
                     retry=9,
                     email_list=functions.mailto_list_ourselves):
    """
    获取操作mongo数据库和文档集合的对象。遇到错误会重试指定次数
    :param db:  mongo数据库
    :param table:  (str) 文档集合
    :param sleep_time:  休眠时间
    :param retry:  重试次数
    :return: (mongodb) mongodb对象
    """
    count = retry + 1
    while True:
        try:
            count -= 1
            result = getmondbbyhost(db, table)
            if result != None:
                return result
        except Exception as e:
            print(u"mongo数据库连接异常,错误信息%s" % str(e))
            if count == 0:
                #发送邮件
                functions.send_mail_old(email_list, u"mongo数据库连接异常",
                                        u"错误信息%s" % exceputil.traceinfo(e))
                time.sleep(3600)
                count = retry + 1
            else:
                time.sleep(sleep_time)
Esempio n. 7
0
def get_server_proxy(host, area, port=8880):
    """
    获取收费代理地址
    :param host: (str) 域名
    :param area:  (str) 区域 ->全国
    :return: (unicode) 代理 -> 231.29.67.145:8585
    """
    global proxyServerClient
    if proxyServerClient == None:
        #初始化
        while True:
            try:
                proxyServerClient = init_proxy_client(host=host, port=port)
                break
            except Exception as e:
                print exceputil.traceinfo(e)
                time.sleep(10)
    proxy = ""
    while True:
        try:
            proxy = proxyServerClient.getPorxy(host, area)
            if proxy != None and len(proxy.strip()) > 0:
                break
            else:
                #代理是None或空字符串则重试
                time.sleep(10)
                while True:
                    try:
                        proxyServerClient = init_proxy_client(host=host,
                                                              port=port)
                        break
                    except Exception as e1:
                        print exceputil.traceinfo(e1)
                        time.sleep(10)
                continue
        except Exception as e:
            print exceputil.traceinfo(e)
            time.sleep(10)
            while True:
                try:
                    proxyServerClient = init_proxy_client(host=host, port=port)
                    break
                except Exception as e1:
                    print exceputil.traceinfo(e1)
                    time.sleep(10)

    return proxy.strip()
Esempio n. 8
0
def updatev2(db,
             id,
             valueset,
             dbname,
             table,
             flag=True,
             retry=9,
             sleep_time=10,
             email_list=functions.mailto_list_ourselves):
    """
    更新数据库,错误会一直尝试
    :param db: (mongodb)数据库
    :param id:  (str) mongo数据库_id 主键内容
    :param valueset:(dict) 内容字典
    :param dbname: (str) 数据库名
    :param table: (str) 文档集合名
    :param retry (int) 重试次数
    :param sleep_time (int) 睡眠时间
    :return:(None)
    """
    count = retry + 1
    while True:
        try:
            count -= 1
            update(db, id, valueset, flag)
            break
        except Exception as e:
            print(u"更新mongo数据库异常,错误信息:%s" % str(e))
            if count == 0:
                #发送邮件
                functions.send_mail_old(email_list, u"mongo数据库更新异常",
                                        u"错误信息%s" % exceputil.traceinfo(e))
                time.sleep(3600)
                count = retry + 1

            #重新连接mongo数据库
            time.sleep(sleep_time)
            db = getmondbbyhostv2(dbname, table)