def get_account_info(driver):
    """
    获取邮箱,手机,是否完成身份验证
    :param driver:驱动实例对象
    :return: 返回邮箱,手机,是否完成身份验证
    """
    account_ele_href = chrome_api.get_element_href(
        driver, "#J_MtMainNav > li:nth-child(2) > a")
    driver.get(account_ele_href)
    refresh_page(driver, "dl.detail-info")
    html_str = driver.page_source.encode("utf-8").decode()
    html = etree.HTML(html_str)
    email_list = html.xpath(
        "//span[contains(text(),'登 录 邮 箱:')]/../span[2]/text()")
    if len(email_list) == 0:
        email = ""
    else:
        email = email_list[0]
    binding_phone_list = html.xpath(
        "//span[contains(text(),'绑 定 手 机:')]/../span[2]/text()")
    if len(binding_phone_list) == 0:
        binding_phone = ""
    else:
        binding_phone = binding_phone_list[0]
        binding_phone = binding_phone.replace("\n",
                                              "").replace("\t",
                                                          "").replace(" ", "")
    authentication_list = html.xpath("//span[contains(text(),'已完成')]/text()")
    if len(authentication_list) == 0:
        authentication = "未完成"
    else:
        authentication = authentication_list[0]
    return email, binding_phone, authentication
def get_account_type(driver):
    """
    获取支付宝账户类别
    :param driver:
    :return: 支付宝账户类别
    """
    refresh_page(
        driver,
        "#main-content table > tbody > tr:nth-child(1) > td:nth-child(2)")
    ele = wait_ele(
        driver,
        "#main-content table > tbody > tr:nth-child(1) > td:nth-child(2)")
    if ele:
        account_type_ele = chrome_api.get_element(
            driver,
            "#main-content table > tbody > tr:nth-child(1) > td:nth-child(2)")
        if account_type_ele:
            account_type = account_type_ele.text
            account_type = account_type.replace(" ", "").replace("\n",
                                                                 "").replace(
                                                                     "\t", "")
        else:
            account_type = ""
    else:
        account_type = ""
    return account_type
def get_integral(driver):
    """我的积分,页面跳转会出现bug"""
    driver.get("https://pages.tmall.com/wow/jifen/act/point-details")
    refresh_page(driver, "div#pointContent")
    wait_ele(driver, "div#pointContent")
    html_str = driver.page_source.encode("utf-8").decode()
    html = etree.HTML(html_str)
    tianmao_grade_list = html.xpath(
        "//span[contains(text(),'可用的积分')]/../span[2]/text()")
    if len(tianmao_grade_list) == 0:
        tianmao_grade = ""
    else:
        tianmao_grade = tianmao_grade_list[0]
    return tianmao_grade
def get_index_data(driver, current_mouse):
    """
    从主页面获取买家淘气值,会员等级,会员名
    :param driver: 驱动实例对象
    :param current_mouse: 上一次鼠标的位置
    :return: score
    """
    refresh_page(
        driver,
        "#J_SiteNavLogin > div.site-nav-menu-hd > div.site-nav-user > a")
    try:
        rect = chrome_api.get_element_rect(
            driver,
            '#J_SiteNavLogin > div.site-nav-menu-hd > div.site-nav-user > a')
        offset_x, offset_y = int(random.uniform(2, rect['width'] - 2)), int(
            random.uniform(2, rect['height'] - 2))
        current_mouse = chrome_api.move_to_position(
            driver, current_mouse,
            [rect['left'] + offset_x, rect['top'] + offset_y])
        # 获取淘气值
        score_ele = wait_ele(driver, "div.site-nav-user-info p:nth-child(2)")
        if score_ele:
            score = chrome_api.get_element_text(
                driver, "div.site-nav-user-info p:nth-child(2)")
        else:
            score = ""
        # 获取会员等级
        vip_level_ele = wait_ele(driver,
                                 "div.site-nav-user-info p:nth-child(3)")
        if vip_level_ele:
            vip_level = chrome_api.get_element_text(
                driver, "div.site-nav-user-info p:nth-child(3)")
        else:
            vip_level = ""
        # 获取会员名
        login_name_ele = wait_ele(driver, "a.site-nav-login-info-nick")
        if login_name_ele:
            login_name = chrome_api.get_element_text(
                driver, "a.site-nav-login-info-nick")
        else:
            login_name = ""
    except Exception as e:
        score = ""
        vip_level = ""
        login_name = ""
    return score, vip_level, login_name, current_mouse
def get_personal_deal_info(driver):
    """
    获取个人真实姓名,个人地址
    :param driver: 驱动实例对象
    :return: 个人真实姓名,个人地址
    """
    deal_info_href = chrome_api.get_element_href(driver,
                                                 "#newAccountProfile > a")
    driver.get(deal_info_href)
    refresh_page(driver, "h2.h2-single")
    name_ele = chrome_api.get_element(
        driver, "#main-content > form > h2 + ul > li:nth-child(1) > strong")
    if name_ele is None:
        name = ""
    else:
        name = name_ele.text
    address = chrome_api.get_element_value(
        driver, "#main-content > form > h2 + ul > li:nth-child(6) > input")
    if not address:
        address = ""
    return name, address
Exemple #6
0
def get_refund_number(driver, current_mouse):
    """退款数量"""
    refund_number_ele = wait_ele(driver, "#refundList > a")
    if refund_number_ele:
        current_mouse = enter_next(driver, current_mouse, "#refundList > a")
        refresh_page(driver, "#topContainer_1")
        refund_div = chrome_api.get_element(
            driver, '#bottomContainer_1 > div:nth-child(2)')
        refund_number = 0
        if refund_div is not None:
            num = 2
            while True:
                current_div = chrome_api.get_element(
                    driver,
                    '#bottomContainer_1 > div:nth-child({})'.format(num))
                if current_div is None:
                    break
                num += 2
                refund_number += 1
        return refund_number, current_mouse
    else:
        refund_number = 0
        return refund_number, current_mouse
def get_good_reputation(driver, user_id):
    """买家累积信用  好评率"""
    logger.info("user {}: start crawl rate_summary".format(user_id))
    rate_summary_href = chrome_api.get_element_href(driver, "#myRate > a")
    list_bought_href = chrome_api.get_element_href(driver, "#bought")
    driver.get(rate_summary_href)
    refresh_page(driver, "table.seller-rate-info")
    html_str = driver.page_source.encode("utf-8").decode()
    html = etree.HTML(html_str)
    cumulative_credit_list = html.xpath(
        "//h4[contains(text(),'买家累积信用')]/a/text()")
    if len(cumulative_credit_list) == 0:
        cumulative_credit = ""
    else:
        cumulative_credit = cumulative_credit_list[0]
    rate_summary_list = html.xpath("//p[contains(text(),'好评率')]/strong/text()")
    if len(rate_summary_list) == 0:
        rate_summary = ""
    else:
        rate_summary = rate_summary_list[0]
    data_list = html.xpath(
        '//table[@class="tb-rate-table align-c thm-plain"]/tbody/tr')
    one_week = {}
    one_month = {}
    for i, tr in enumerate(data_list):
        moment_list = tr.xpath('./td//text()')
        if moment_list[0] == "好评":
            key = "good"
        elif moment_list[0] == "中评":
            key = "medium"
        elif moment_list[0] == "差评":
            key = "bad"
        elif moment_list[0] == "总计":
            key = "total"
        one_week[key] = moment_list[1]
        one_month[key] = moment_list[2]
    return cumulative_credit, rate_summary, one_week, one_month, list_bought_href
Exemple #8
0
    def run(self, driver, taobao_total_data, size=(1366, 768)):
        tb_user = {}
        tb_order = {}
        try:
            logger.info("user {}: start crawl login_name".format(self.user_id))
            current_mouse = [
                random.randint(1, int(size[0] * 0.75)),
                random.randint(2, int(size[1] * 0.75))
            ]
            ActionChains(driver).move_by_offset(
                xoffset=current_mouse[0], yoffset=current_mouse[1]).perform()
            score, vip_level, login_name, current_mouse = get_index_data(
                driver, current_mouse)
            tb_user["score"] = score
            tb_user["vip_level"] = vip_level
            tb_user["login_name"] = login_name
            # 进入我的淘宝页面
            current_mouse = enter_next(
                driver, current_mouse,
                '#J_SiteNavMytaobao > div.site-nav-menu-hd > a')
            refresh_page(driver, "header.mt-header")
            logistics_information = get_logistics_information(driver)
            # 进入评价管理获取买家累积信用 好评
            cumulative_credit, rate_summary, one_week, one_month, list_bought_href = get_good_reputation(
                driver, self.user_id)
            tb_user["cumulative_credit"] = cumulative_credit
            tb_user["rate_summary"] = rate_summary
            tb_user["one_week"] = one_week
            tb_user["one_month"] = one_month
            # 进入退款维权页面
            refresh_page(driver, "a.J_MtIndicator")
            current_mouse = enter_next(driver, current_mouse,
                                       "a.J_MtIndicator")
            time.sleep(random.uniform(1.5, 2.5))
            # 进入退款管理页面
            refund_number, current_mouse = get_refund_number(
                driver, current_mouse)
            # 进入退款维权页面
            refresh_page(driver, "a.J_MtIndicator")
            current_mouse = enter_next(driver, current_mouse,
                                       "a.J_MtIndicator")
            time.sleep(random.uniform(1.5, 2.5))
            # 进入投诉管理页面
            current_mouse = enter_next(driver, current_mouse,
                                       "#rulesManager > a")
            # 等待售后管理页面加载完成
            refresh_page(driver, "#rightManager > a")
            # 需要往下滑动几下
            v = abs(random.gauss(200, 20))
            time.sleep(random.uniform(0.2, 0.8))
            driver.execute_script("window.scrollBy(0,%d)" % v)
            # 进入售后管理页面
            current_mouse = enter_next(driver, current_mouse,
                                       "#rightManager > a")
            refresh_page(driver, "div.table-hd")
            after_number = get_after_number(driver)
            # 进入账户设置页面
            email, binding_phone, authentication = get_account_info(driver)
            tb_user["email"] = email
            tb_user["binding_phone"] = binding_phone
            tb_user["authentication"] = authentication
            # 进入个人交易信息页面
            logger.info("user {}: start crawl deal info".format(self.user_id))
            name, address = get_personal_deal_info(driver)
            tb_user["name"] = name
            tb_user["address"] = address
            tb_user["host_age"] = get_host_age(driver)
            # 进入收货地址页面
            logger.info("user {}: start crawl address_list".format(
                self.user_id))
            address_list = get_personal_address(driver)
            taobao_total_data["tb_deliver_addrs"] = address_list
            logger.info("user {}: start crawl zhifubao info".format(
                self.user_id))
            tb_zhifubao_binding, current_mouse = get_alipay_data(
                driver, current_mouse)
            # 进入我的积分 获取天猫积分
            tianmao_grade = get_integral(driver)
            tb_user["tianmao_grade"] = tianmao_grade
            taobao_total_data["tb_user"] = tb_user
            # 进入商品列表页
            logger.info("user {}: start crawl goods list".format(self.user_id))
            driver.get(list_bought_href)
            # 等待已买宝贝标签加载完
            refresh_page(driver, "#bought")
            html_0 = driver.page_source
            html_str0 = html_0.encode("utf-8").decode()
            logger.info("user {}: start crawl good detail".format(
                self.user_id))
            order_list = flip_over(driver, html_str0, address_list)
            tb_order["order_list"] = order_list
            tb_order["refund_number"] = refund_number
            tb_order["after_number"] = after_number
            tb_order["logistics_information"] = logistics_information
            taobao_total_data["tb_order"] = tb_order
            taobao_total_data["tb_order"]["tb_order_num"] = len(
                taobao_total_data["tb_order"]["order_list"])
            taobao_total_data["tb_zhifubao_binding"] = tb_zhifubao_binding

        except Exception as e:
            taobao_total_data["now_time"] = get_now_time()
            taobao_total_data["status_code"] = "5002"
            taobao_total_data[
                "data_status_message"] = "crawl server error so get data fail"
            taobao_total_data["user_id"] = self.user_id
            DingDing.send(json.dumps(taobao_total_data))
            logger.error('user {}:{}'.format(self.user_id, e))

        else:
            taobao_total_data["now_time"] = get_now_time()
            taobao_total_data["status_code"] = "2002"
            taobao_total_data["data_status_message"] = "get data success"
            taobao_total_data["user_id"] = self.user_id
            InfoManager().save_userinfo(taobao_total_data)
            InfoManager().save_deliveraddrsinfo(taobao_total_data)
            InfoManager().save_orderinfo(taobao_total_data)
            InfoManager().save_productinfo(taobao_total_data)
            InfoManager().save_zhifubaoinfo(taobao_total_data)
            logger.info("user {}: crawl completed.".format(self.user_id))
        finally:
            save_to_kafka(taobao_total_data)
            self.sr.decr(settings.SERVER_RANDOM_UUID)
            driver.quit()

        return taobao_total_data
def get_alipay_data(driver, current_mouse):
    # 进入支付宝引导页面
    tb_zhifubao_binding = {}
    current_mouse = enter_next(driver, current_mouse,
                               '#newAccountManagement > a')
    tb_zhifubao_binding["account_type"] = get_account_type(driver)
    # 等待页面加载
    refresh_page(driver, "p.ui-tipbox-explain > a:nth-child(1)")
    # 进入支付宝页面
    enter_next(driver, current_mouse, "p.ui-tipbox-explain > a:nth-child(1)")
    refresh_page(driver, "td.i-assets-balance")
    try:
        # 点击屏幕
        c_url = driver.current_url.split("/")[2]
        if c_url == "my.alipay.com":
            pass
        elif c_url == "mrchportalweb.alipay.com":
            # 支付宝商家界面
            driver.get("https://personalweb.alipay.com/portal/i.htm")
        else:
            driver.get("https://personalweb.alipay.com/portal/i.htm")
        # 显示支付宝隐藏标签
        hide_label_list = driver.find_elements_by_xpath('//a[text()="显示金额"]')
        if hide_label_list != []:
            a = 1
            b = 1
            c = 1
            num = 0
            while a + b + c:
                num += 1
                if num >= 20:
                    break
                star_list = driver.find_elements_by_xpath("//strong")
                for i, star in enumerate(star_list):
                    if i == 0:
                        try:
                            if "*" in star.text:
                                hide_label_list[0].click()
                            else:
                                a = 0
                        except:
                            continue
                    elif len(star_list) > 3 and i == 3:
                        try:
                            if "*" in star.text:
                                hide_label_list[2].click()
                            else:
                                c = 0
                        except:
                            continue
                    elif len(star_list) > 3 and (i == 1 or i == 2):
                        try:
                            if "*" in star.text:
                                hide_label_list[1].click()
                            else:
                                b = 0
                        except:
                            continue
        html_str = driver.page_source.encode("utf-8").decode()
        alipay_html = etree.HTML(html_str)
        # 支付宝账户
        data_list = alipay_html.xpath(
            '//*[@id="J-userInfo-account-userEmail"]//text()')
        tb_zhifubao_binding["zhifubao_account"] = ''.join(data_list)
        # 花呗总额度
        data_list = alipay_html.xpath('//p[text()="总额度:"]//strong//text()')
        tb_zhifubao_binding["huabei_total_credit_amount"] = ''.join(data_list)
        if tb_zhifubao_binding["huabei_total_credit_amount"] == "":
            data_list = alipay_html.xpath(
                '//h3[text()="花呗"]/../..//strong[2]//text()')
            tb_zhifubao_binding["huabei_total_credit_amount"] = ''.join(
                data_list)
        # 余额宝历史累计收益
        data_list = alipay_html.xpath('//*[@id="J-income-num"]//text()')
        tb_zhifubao_binding["total_profit"] = ''.join(data_list)
        # 支付宝余额
        data_list = alipay_html.xpath(
            '//a[text()="充 值"]/../../../div/strong//text()')
        tb_zhifubao_binding["balance"] = ''.join(data_list)
        # 余额宝账户余额
        data_list = alipay_html.xpath(
            '//tbody[1]//h3[text()="余额宝"]/../../div[2]/p[1]/strong//text()')
        tb_zhifubao_binding["total_quotient"] = ''.join(data_list)
        if tb_zhifubao_binding["total_quotient"] == "":
            data_list = alipay_html.xpath(
                '//a[text()="转出"][1]/../..//strong//text()')
            tb_zhifubao_binding["total_quotient"] = ''.join(data_list)
        # 花呗可用额度
        data_list = alipay_html.xpath('//p[text()="可用额度"]//strong//text()')
        tb_zhifubao_binding["huabei_credit_amount"] = ''.join(data_list)
        if tb_zhifubao_binding["huabei_credit_amount"] == "":
            data_list = alipay_html.xpath(
                '//h3[text()="花呗"]/../..//strong[1]//text()')
            tb_zhifubao_binding["huabei_credit_amount"] = ''.join(data_list)
        # 进入账户管理
        wait_ele(driver, ".userInfo-portrait")
        hua_bei = driver.find_element_by_css_selector(".userInfo-portrait")
        ActionChains(driver).move_to_element(hua_bei).click().perform()
        wait_ele(driver, ".table-list")
        html_str = driver.page_source.encode("utf-8").decode()
        basic_info_html = etree.HTML(html_str)
        # 绑定的手机号
        data_list = basic_info_html.xpath('//tbody/tr[3]/td[1]/span/text()')
        tb_zhifubao_binding["binding_phone"] = ''.join(data_list)
        # 支付宝账户类型
        tb_zhifubao_binding["account_type"] = "个人账户"
        # 支付宝实名认证的姓名
        data_list = basic_info_html.xpath('//*[@id="username"]/text()')
        tb_zhifubao_binding["verified_name"] = ''.join(data_list)
        # 支付宝实名认证的身份证号
        data_list = basic_info_html.xpath('//tbody/tr[1]/td[1]/span[3]/text()')
        tb_zhifubao_binding["verified_id_card"] = ''.join(data_list)
    except Exception as e:
        tb_zhifubao_binding = ""
        logger.error(e)
    return tb_zhifubao_binding, current_mouse