Python ErrorLogsFile Exemples, errorlogs.ErrorLogsFile Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : avcsearchcategory_ct.py Projet : wangganghua/AVCSearchCategory

def run_cjbrand():
    keyisvalue = rconnection_test.keys(redis_key_phone_w)
    if keyisvalue:
        print keyisvalue
        # 读取品牌型号搜索
    else:
        print "没有找到key: %s" % redis_key_phone_w

    while True:
        axw = rconnection_test.lpop(redis_key_phone_w)
        try:
            if axw:
                modeljson = json.loads(
                    str(axw).replace("\\", "、").replace("	", ""))
                url = modeljson["urls"]
                # print url
                search_CHUNTAO_brand(url, modeljson)
            else:
                print "没有找到key, break"
                break
        except Exception, e:
            print "load url json error : %s" % e
            wr = ErrorLogsFile("load url json error : %s ,,,,, error : %s " %
                               (axw, e))
            wr.saveerrorlog()

Exemple #2

0

Afficher le fichier

Fichier : avcsearchcategory_ct.py Projet : wangganghua/AVCSearchCategory

def search_CHUNTAO_page(urlx, category):
    print urlx
    url = str(urlx).replace(
        "s=0", "s={0}"
    )  #"https://list.CHUN TAOall.com/search_product.hCHUN TAO?cat=50936015&s={0}"
    sesson = requests.session()
    isValue = True
    index = 0
    # 先查找页面显示的 页数
    while isValue:
        if errorCount < index:
            print "%s: CHUN TAO not find this url : %s" % (datetime.now(), url)
            break
        # 随机获取代理ip
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        sesson.proxies = {
            'http': 'http://' + proxiip,
            'https': 'https://' + proxiip
        }
        try:
            req = sesson.get(url, timeout=30)
            hCHUNTAOl = req.text
            req.close()
            isValue = False
            if hCHUNTAOl:
                # print hCHUNTAOl
                # 查找总页数
                totalye = re.search('(?<=data-totalPage=")\d+(?=")',
                                    str(hCHUNTAOl))
                # print totalye
                if totalye:
                    print totalye.group()
                    # search_CHUNTAO(1, url, category)
                    for int_page in range(int(totalye.group())):
                        search_CHUNTAO(int_page, url, category)
                else:
                    print "search the total page regular isvalid ?"
                    wr = ErrorLogsFile(
                        "search the total page regular isvalid ?: url:%s" %
                        (url))
                    wr.saveerrorlog()
        except Exception, e:
            isValue = True
            index += 1
            print "errormessage: %s" % e
            if index == errorCount:
                print "search total page category error: %s , %s" % (index, e)
                wr = ErrorLogsFile(
                    "search  total page category error: url:%s,errormessage:%s"
                    % (url, e))
                wr.saveerrorlog()
                return
            time.sleep(5)

Exemple #3

0

Afficher le fichier

Fichier : avcsearchcategory_ct.py Projet : wangganghua/AVCSearchCategory

def search_CHUNTAO(totalpage, urls, category):
    # url = "https://list.CHUN TAOall.com/search_product.hCHUN TAO?cat=50936015&s=0"
    sesson = requests.session()
    isValue = True
    index = 0
    # print "总页数:%s" % totalye
    while isValue:
        if errorCount < index:
            print "%s: CHUN TAO not find this url : %s" % (datetime.now(),
                                                           urls)
            break
        # 随机获取代理ip
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        sesson.proxies = {
            'http': 'http://' + proxiip,
            'https': 'https://' + proxiip
        }
        # 重新赋值 url
        pagecount = 40 * totalpage
        url = urls.format(pagecount)
        try:
            print "toale : %s ,url : %s" % (totalpage, url)
            req = sesson.get(url, timeout=30)
            hCHUNTAOl = req.text
            # print hCHUNTAOl
            req.close()
            isValue = False
            if hCHUNTAOl:
                # 查找商品名称、月销量、单价、旗舰店
                # tzurl = re.findall(r'<div class="item-info">[\s\S]*?</div>[\s\S]*?</div>[\s\S]*?</a>', hCHUNTAOl)
                tzurl = re.findall(
                    r'<div class="item-info">[\s\S]*?</div>[\s\S]*?</div>[\s\S]*?</li>',
                    hCHUNTAOl)
                wgh = 0
                if len(tzurl) == 0:
                    wgh = 1
                    tzurl = re.findall(r'<span class="volume">月销量(.*)</span>',
                                       hCHUNTAOl)
                if tzurl:
                    for i in tzurl:
                        if (wgh == 1):
                            ix = i[1]
                        else:
                            ix = i
                        if len(ix) > 0:

                            # 【村淘优选】判断
                            search_value = re.search('(?<=title=")村淘优选(?=")',
                                                     str(ix))
                            if search_value == None:
                                continue
                            # 开始查找id号,
                            search_id = re.search('(id=(?P<dd>.*?)")', ix)
                            if search_id:
                                # 截取页面信息id
                                spid = search_id.group("dd")
                                # print "spid : %s"% spid
                                # 截取页面信息商品名称
                                if "title=" not in ix:
                                    continue
                                # 商品名称
                                search_spname = re.search(
                                    '(?<=title=").*(?=".*target="_blank">)',
                                    ix)
                                if search_spname:
                                    spname = search_spname.group()
                                    # print "spname : %s" % spname
                                    # 判断是否无效,如果isspnameTrue 为True,则表示无效数据，过滤，如果为False，表示是有效数据
                                    isspnameTrue = False
                                    for ia in invalid_keywords:
                                        if ia in spname and "送" not in spname:
                                            isspnameTrue = True
                                    if isspnameTrue == True:
                                        continue
                                    else:
                                        # print ix
                                        # 查找月销量
                                        search_yxl = re.search(
                                            '(?<=<span class="volume">月销量).*(?=</span>)',
                                            str(ix))
                                        if search_yxl:
                                            yxl = search_yxl.group()
                                            yxl = str(yxl).replace(
                                                "&nbsp;", "")
                                            # print "search_yxl : %s" % yxl
                                            # 查找单价
                                            search_price = re.search(
                                                '(?<=class="price-value">).*(?=<)',
                                                str(ix))
                                            if search_price:
                                                price = search_price.group()
                                                # print "price : %s" % price
                                                result_url = CHUNTAO_url.format(
                                                    spid)
                                                result = '{"urlweb":"cun","urls":"%s","urlleibie":"%s","price":"%s","yxl":"%s","spname": "%s"}' % (
                                                    result_url, category,
                                                    price, yxl, spname)
                                                # 拼写json类型保存至redis
                                                rconnection_test.lpush(
                                                    redis_key_phone_w, result)
                                            else:
                                                print "%s:can not find CHUN TAO price,please search regular is valid:%s" % (
                                                    datetime.now(), url)
                                                wr = ErrorLogsFile(
                                                    "can not find CHUN TAO price,please search regular is valid:%s"
                                                    % (url))
                                                wr.saveerrorlog()
                                        else:
                                            print "%s:can not find CHUN TAO yue xiao liang,please search regular is valid:%s" % (
                                                datetime.now(), url)
                                            wr = ErrorLogsFile(
                                                "can not find CHUN TAO yue xiao liang,please search regular is valid:%s"
                                                % (url))
                                            wr.saveerrorlog()
                                else:
                                    print "%s:can not find CHUN TAO spname,please search regular is valid:%s" % (
                                        datetime.now(), url)
                                    wr = ErrorLogsFile(
                                        "can not find CHUN TAO spname,please search regular is valid:%s"
                                        % (url))
                                    wr.saveerrorlog()
                            else:
                                print "%s:can not find CHUN TAO id,please search regular is valid" % datetime.now(
                                )
                                wr = ErrorLogsFile(
                                    "can not find CHUN TAO id,please search regular is valid:%s"
                                    % (url))
                                wr.saveerrorlog()
                else:
                    print "%s:CHUN TAO url---the first regular is valid %s?" % (
                        datetime.now(), url)
                    wr = ErrorLogsFile(
                        "CHUN TAO url---the first regular is valid:%s?" %
                        (url))
                    wr.saveerrorlog()
        except Exception, e:
            isValue = True
            index += 1
            if index == errorCount:
                print "connection redis error: %s , %s" % (index, e)
                wr = ErrorLogsFile(
                    "connection redis error: url:%s ,errormessage:%s" %
                    (url, e))
                wr.saveerrorlog()
            time.sleep(5)

Exemple #4

0

Afficher le fichier

Fichier : avcsearchcategory_ct.py Projet : wangganghua/AVCSearchCategory

def search_CHUNTAO_brand(urls, attributes):
    sesson = requests.session()
    isValue = True
    index = 0
    # print "总页数:%s" % totalye
    while isValue:
        if errorCount < index:
            print "%s: CHUN TAO not find this url : %s" % (datetime.now(),
                                                           urls)
            break
        # 随机获取代理ip
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        sesson.proxies = {
            'http': 'http://' + proxiip,
            'https': 'https://' + proxiip
        }
        try:
            req = sesson.get(urls, timeout=30)
            hCHUNTAOl = HTMLParser.HTMLParser().unescape(req.text)
            req.close()
            isValue = False
            if hCHUNTAOl:
                # print hCHUNTAOl
                # 查找型号
                model = ""
                tz_model = re.search(r'(?<=型号:).*(?=<)', str(hCHUNTAOl))
                if tz_model:
                    model = str(tz_model.group()).replace("&nbsp;",
                                                          "").replace(" ", "")
                if model == "":
                    tz_model2 = re.search(r'(?<=货号:).*(?=<)', str(hCHUNTAOl))
                    if tz_model2:
                        model = str(tz_model2.group()).replace("&nbsp;",
                                                               "").replace(
                                                                   " ", "")
                    else:
                        print "not find model,please search regular is valued?: %s , %s" % (
                            datetime.now(), urls)
                        wr = ErrorLogsFile(
                            "not find model,please search regular is valued?: %s , %s"
                            % (datetime.now(), urls))
                        wr.saveerrorlog()
                tz_brand = re.search(r'品牌[^"]{0,5}:(?P<dd>.*?)</li>',
                                     str(hCHUNTAOl))
                brand = ""
                if tz_brand:
                    brand = str(tz_brand.group("dd")).replace("&nbsp;",
                                                              "").replace(
                                                                  " ", "")
                    brand = brand.replace(" ", "")
                else:
                    # print str(hCHUNTAOl)
                    print "not find brand,please search regular is valued?: %s , %s" % (
                        datetime.now(), urls)
                    wr = ErrorLogsFile(
                        "not find brand,please search regular is valued?: %s , %s"
                        % (datetime.now(), urls))
                    wr.saveerrorlog()
                isValue = False
                result = '{"urlweb":"cun","urls":"%s",' \
                             '"urlleibie":"%s","price":"%s",' \
                             '"yxl":"%s","spname": "%s",' \
                             '"brand":"%s","model":"%s"}' % \
                             (attributes["urls"], attributes["urlleibie"],
                              attributes["price"], attributes["yxl"], attributes["spname"], brand, model)
                # 拼写json类型保存至redis
                rconnection_test.lpush(redis_key_phone_result, result)
        except Exception, e:
            print "connection redis error: %s , %s" % (index, e)
            isValue = True
            index += 1
            if index == errorCount:
                print "connection redis error: %s , %s" % (index, e)
                wr = ErrorLogsFile(
                    "connection redis error: url:%s ,errormessage:%s" %
                    (urls, e))
                wr.saveerrorlog()
            time.sleep(5)

Exemple #5

0

Afficher le fichier

Fichier : avcsearchcategory_tm.py Projet : wangganghua/AVCSearchCategory

def search_TM(totalpage, urls, category):
    # urls = "https://list.tmall.com/search_product.htm?s=0&q=%E7%94%B5%E5%AD%90%E7%A7%B0"

    sesson = requests.session()
    isValue = True
    index = 0
    # print "总页数:%s" % totalye
    while isValue:
        if errorCount < index:
            print "%s: TM not find this url : %s" % (datetime.now(), urls)
            break
        # 随机获取代理ip
        redis_key_proxy = random.choice(proxykeys)
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        sesson.proxies = {'http': 'http://' + proxiip, 'https': 'https://' + proxiip}
        # 随机获取 天猫cookie
        tmcookies = rconnection_test.srandmember(redis_key_tm_cookies)
        # tmcookiejson = json.loads(tmcookies)
        # tmcookie = tmcookiejson["cookie"]
        headers = {
            "User-Agent": "%s" % random.choice(user_agent_list),
            "Accept": "*/*",
            "Referer": "https://www.tmall.com/",
            "Cookie": cookie
        }
        print "begin aaaaaaaaaaaaaaaaaaaaaa"
        # 重新赋值 url
        pagecount = 60*totalpage
        url = urls.format(pagecount)
        try:
            print url
            req = sesson.get(url, headers=headers, timeout=30)
            html = req.text
            req.close()
            isValue = False
            if html:
                tzurl = re.findall(r'<p class="productTitle">[\s\S]*?</p>', html)
                wgh = 0
                if len(tzurl) == 0:
                    wgh = 1
                    tzurl = re.findall(r'(<a href="//detail.tmall.com/item.htm?)+(.*)(</a>)', html)
                    print html

                if tzurl:
                    for i in tzurl:
                        if (wgh == 1):
                            ix = i[1]
                        else:
                            ix = i
                        if len(ix) > 0:
                            # 开始查找id号,
                            search_id = re.search("(id=(?P<dd>.*?))+(&amp;skuId)", ix)
                            if search_id:
                                # 截取页面信息id
                                spid = search_id.group("dd")
                                # print spid
                                # 截取页面信息商品名称
                                if "title=" not in ix:
                                    continue
                                search_spname = re.search("(title=.*)+(>.*)", ix)
                                if search_spname:
                                    spname = search_spname.group()
                                    # 判断是否无效,如果isspnameTrue 为True,则表示无效数据，过滤，如果为False，表示是有效数据
                                    isspnameTrue = False
                                    for ia in invalid_keywords:
                                        if ia in spname and "送" not in spname:
                                            isspnameTrue = True
                                    if isspnameTrue == True:
                                        continue
                                    else:
                                        result_url=TM_url.format(spid)
                                        result = '{"Urlweb":"TM","Urls":"%s","Urlleibie":"%s","spbjpinpai": "",' \
                                                 '"spbjjixing": "",' \
                                                 '"pc": ""}'% (result_url, category)
                                        # 拼写json类型保存至redis
                                        rconnection_test.lpush(redis_key_phone_w, result)
                                else:
                                    print "%s:can not find TM spname,please search regular is valid:%s" % (datetime.now(),url)
                                    wr = ErrorLogsFile(
                                         "can not find TM spname,please search regular is valid:%s" % ( url))
                                    wr.saveerrorlog()
                            else:
                                print "%s:can not find TM id,please search regular is valid" % datetime.now()
                                wr = ErrorLogsFile("can not find TM id,please search regular is valid:%s" % ( url))
                                wr.saveerrorlog()
                else:
                    print "%s:TM url---the first regular is valid ?" % datetime.now()
                    wr = ErrorLogsFile("TM url---the first regular is valid:%s?" % (url))
                    wr.saveerrorlog()
                    time.sleep(5)
                    isValue = True
        except Exception, e:
            isValue = True
            index += 1
            if index == errorCount:
                print "connection redis error: %s , %s" % (index, e)
                wr = ErrorLogsFile("connection redis error: url:%s ,errormessage:%s" % (url,e))
                wr.saveerrorlog()
            time.sleep(5)
        time.sleep(5)

Exemple #6

0

Afficher le fichier

Fichier : avcsearchcategory_tm.py Projet : wangganghua/AVCSearchCategory

def search_TM_urllib2(urlx, category):
    print urlx
    url = str(urlx).replace("s=0", "s={0}") #"https://list.tmall.com/search_product.htm?cat=50936015&s={0}"
    sesson = requests.session()
    isValue = True
    index = 0
    # 先查找页面显示的 页数
    while isValue:
        if errorCount < index:
            print "%s: TM not find this url : %s" % (datetime.now(), url)
            break
        # 随机获取代理ip
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        print proxiip
        prxyip = {'http': 'http://' + proxiip, 'https': 'https://' + proxiip}
        proxy_s = urllib2.ProxyHandler(prxyip)
        openner = urllib2.build_opener(proxy_s)
        urllib2.install_opener(openner)
        # sesson.proxies = {'http': 'http://' + proxiip, 'https': 'https://' + proxiip}
        try:
            cj = cookielib.CookieJar()
            opener = urllib2.build_opener(proxy_s, urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)
            resp = urllib2.urlopen(url)
            print cj
            req = urllib2.urlopen(url)
            print "req : %s" % req.read()
            html = req
            # req = sesson.get(url, timeout=30)
            # html = req.text
            # cj = cookielib.CookieJar()
            # opener = urllib2.build_opener(proxy_s,urllib2.HTTPCookieProcessor(cj))
            # urllib2.install_opener(opener)
            # resp = urllib2.urlopen(url)
            # print cj
            # for index, cookie in enumerate(cj):
            #     print '[', index, ']', cookie;
            req.close()
            isValue = False
            if html:
                # print html
                totalye = re.search('(?<=共)\d+(?=页)', str(html))
                # print totalye
                if totalye:
                    print totalye.group()
                    for int_page in range(int(totalye.group())):
                        print "intpage: %s" % int_page
                        search_TM(int_page, url, category)
                else:
                    print "search the total page regular isvalid ?"
                    wr = ErrorLogsFile("search the total page regular isvalid ?: url:%s" % (url))
                    wr.saveerrorlog()
        except Exception, e:
            isValue = True
            index += 1
            print "errormessage: %s" % e
            if index == errorCount:
                print "search total page category error: %s , %s" % (index, e)
                wr = ErrorLogsFile("search  total page category error: url:%s,errormessage:%s" % (url, e))
                wr.saveerrorlog()
                return
            time.sleep(5)

Exemple #7

0

Afficher le fichier

Fichier : avcsearchcategory_tm.py Projet : wangganghua/AVCSearchCategory

def search_TM_page(urlx, category):
    print urlx
    url = str(urlx).replace("s=0", "s={0}") #"https://list.tmall.com/search_product.htm?cat=50936015&s={0}"
    sesson = requests.session()
    isValue = True
    index = 0
    isvalued =0
    # 先查找页面显示的 页数
    while isValue:
        if errorCount < index:
            print "%s: TM not find this url : %s" % (datetime.now(), url)
            break
        # 随机获取代理ip
        redis_key_proxy = random.choice(proxykeys)
        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        print proxiip
        sesson.proxies = {'http': 'http://' + proxiip, 'https': 'https://' + proxiip}
        # 随机获取 天猫cookie
        tmcookies = rconnection_test.srandmember(redis_key_tm_cookies)
        # tmcookiejson = json.loads(tmcookies)
        # tmcookie = tmcookiejson["cookie"]
        headers = {
            "User-Agent": "%s" % random.choice(user_agent_list),
            "Accept": "*/*",
            "Referer": "https://www.tmall.com/",
            "Cookie": cookie
        }
        try:
            # time.sleep(10)
            req = sesson.get(url, headers=headers, timeout=30)
            html = req.text
            req.close()
            isValue = False
            if html:
                # print html
                totalye = re.search('(?<=共)\d+(?=页)', str(html))
                # print totalye
                if totalye:
                    print totalye.group()
                    page = totalye.group()
                    print "page %s" % page
                    for int_page in range(0, int(page)):
                        print "intpage: %s" % int_page
                        search_TM(int_page, url, category)
                else:
                    print "search the total page regular isvalid ?"
                    wr = ErrorLogsFile("search the total page regular isvalid ?: url:%s" % (url))
                    wr.saveerrorlog()
                    isValue = True
                    if isvalued == errorCount:
                        return
                    isvalued += 1
                    time.sleep(2)
        except Exception, e:
            isValue = True
            index += 1
            print "errormessage: %s" % e
            if index == errorCount:
                print "search total page category error: %s , %s" % (index, e)
                wr = ErrorLogsFile("search  total page category error: url:%s,errormessage:%s" % (url, e))
                wr.saveerrorlog()
                return
            time.sleep(5)