Ejemplo n.º 1
0
    def parse(self, response):
        self.number += 1
        # delivery_id = 'F617B115D6F3447983E94BB781231244'
        delivery_id = 'DDA1001010'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'".format(
                delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            sell_time = 0
            html = self.sess_xiaoshanyiyuan.get(url=self.start_urls[0], headers=self.headers, verify=False)
            resp = etree.HTML(html.content.decode('utf-8'))
            __VIEWSTATE = resp.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
            image = self.sess_xiaoshanyiyuan.get(
                url="http://www.hzxsyy.com.cn:8080/verifyimage.aspx?time=%s" % (random.random()),
                headers=self.headers,
                verify=False)
            # print('image', image.url)
            # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies))
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
                # print('path', path)
                files = r'{}{}static{}20-xiaoshanyiyuan'.format(path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                with open(r'{}{}static{}20-xiaoshanyiyuan{}yzm.jpg'.format(path, symbol, symbol, symbol), 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'{}{}static{}20-xiaoshanyiyuan{}yzm.jpg'.format(path, symbol, symbol, symbol)
            else:
                with open(r'./20-xiaoshanyiyuanyzm.jpg', 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'./20-xiaoshanyiyuanyzm.jpg'

            codetype = 4004
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            cid, code_result = ydm.run(filename, codetype, timeout)
            # yzm = input('请输入验证码:')
            # print('cid:%s   code_result:%s' % (cid, code_result))
            yzm = code_result
            # yzm = input('请输入验证码:')
            data = {
                "__VIEWSTATE": __VIEWSTATE,
                "UserName": get_account,
                "PassWord": get_pwd,
                "vcode": yzm,
            }
            self.sess_xiaoshanyiyuan.post(
                url="http://www.hzxsyy.com.cn:8080/syslogin.aspx?result=3&username=10690&txtCompanyID=22",
                data=data, headers=self.headers, verify=False)
            psot_data_html = self.sess_xiaoshanyiyuan.get(
                url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?/BNumberTrafficFlowQuery&rs:Command=Render&rc:Parameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format(
                    self.fist, self.last),
                # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?/BNumberTrafficFlowQuery&rs:Command=Render&rc:Parameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=',
                headers=self.headers,
                verify=False)
            # print('psot_data_html', psot_data_html.content.decode('utf-8'))
            # print('*' * 1000)
            psot_data_html = etree.HTML(psot_data_html.content.decode('utf-8'))
            __VIEWSTATE = psot_data_html.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
            __EVENTVALIDATION = psot_data_html.xpath('//*[@id="__EVENTVALIDATION"]/@value')[0]
            data_data = {
                "AjaxScriptManager": "AjaxScriptManager|ReportViewerControl$ctl09$Reserved_AsyncLoadTarget",
                "__EVENTTARGET": "ReportViewerControl$ctl09$Reserved_AsyncLoadTarget",
                "__EVENTARGUMENT": "",
                "__VIEWSTATE": __VIEWSTATE,
                "__EVENTVALIDATION": __EVENTVALIDATION,
                "NavigationCorrector$ScrollPosition": "",
                "NavigationCorrector$ViewState": "",
                "NavigationCorrector$PageState": "",
                "NavigationCorrector$NewViewState": "",
                "ReportViewerControl$ctl03$ctl00": "",
                "ReportViewerControl$ctl03$ctl01": "",
                "ReportViewerControl$ctl10": "ltr",
                "ReportViewerControl$ctl11": "quirks",
                "ReportViewerControl$AsyncWait$HiddenCancelField": "False",
                "ReportViewerControl$ctl04$ctl03$txtValue": "NetSrv_App3",
                "ReportViewerControl$ctl04$ctl05$txtValue": "192.168.18.1",
                "ReportViewerControl$ctl04$ctl07$txtValue": "HYg8AE7GMeP2W2YGWaIEpg==",
                "ReportViewerControl$ctl04$ctl09$txtValue": "mztZ8O0gn1HUBnbz9wW68Q==",
                "ReportViewerControl$ctl04$ctl11$txtValue": "22",
                "ReportViewerControl$ctl04$ctl13$txtValue": "CUSR201709041352",
                "ReportViewerControl$ctl04$ctl15$txtValue": "",
                "ReportViewerControl$ctl04$ctl17$txtValue": "",
                # "ReportViewerControl$ctl04$ctl19$txtValue": "2018-07-25",
                "ReportViewerControl$ctl04$ctl19$txtValue": self.fist,
                # "ReportViewerControl$ctl04$ctl21$txtValue": "2018-10-25",
                "ReportViewerControl$ctl04$ctl21$txtValue": self.last,
                "ReportViewerControl$ctl04$ctl23$txtValue": "",
                "ReportViewerControl$ctl04$ctl25$txtValue": "",
                "ReportViewerControl$ctl04$ctl27$txtValue": "",
                "ReportViewerControl$ToggleParam$store": "",
                "ReportViewerControl$ToggleParam$collapse": "true",
                "ReportViewerControl$ctl05$ctl00$CurrentPage": "",
                "ReportViewerControl$ctl05$ctl03$ctl00": "",
                "ReportViewerControl$ctl08$ClientClickedId": "",
                "ReportViewerControl$ctl07$store": "",
                "ReportViewerControl$ctl07$collapse": "false",
                "ReportViewerControl$ctl09$VisibilityState$ctl00": "None",
                "ReportViewerControl$ctl09$ScrollPosition": "",
                "ReportViewerControl$ctl09$ReportControl$ctl02": "",
                "ReportViewerControl$ctl09$ReportControl$ctl03": "",
                "ReportViewerControl$ctl09$ReportControl$ctl04": "100",
                "__ASYNCPOST": "true",
            }
            # print('data_data', data_data)
            # print('-' * 1000)
            data_html = self.sess_xiaoshanyiyuan.post(
                url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format(
                    self.fist, self.last),
                # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=',
                data=data_data, headers=self.headers,
                verify=False)
            # print('data_html', data_html.content.decode('utf-8'))
            # print('*' * 1000)
            self.__VIEWSTATEs = re.findall(r'__VIEWSTATE\|(.+?)\|', data_html.content.decode('utf-8'))[0]
            self.__EVENTVALIDATIONs = re.findall(r'__EVENTVALIDATION\|(.+?)\|', data_html.content.decode('utf-8'))[0]
            data_html = etree.HTML(data_html.content.decode('utf-8'))
            # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
            try:
                self.NavigationCorrector = data_html.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0]
                data_len = int(len(data_html.xpath('//tr[@valign="top"]'))) - 1
                # print('data_len', data_len)
                md5 = hashlib.md5()
                for i in range(data_len):
                    # try:
                    # 入驻企业id
                    company_id = company_id
                    # 配送公司id
                    delivery_id = delivery_id
                    # 配送公司名称
                    delivery_name = enterprise_name
                    # 数据版本号
                    data_version = delivery_id + "-" + self.time_stamp
                    # 数据类型:1,phython 2,导入
                    data_type = 1
                    # 单据类型:1进货,2退货,3销售,4销售退货
                    bill_type = 3
                    # 表的名称
                    table_name = 'order_metadata_xiaoshanyiyuan'
                    try:
                        drug_name = data_html.xpath(
                            '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[
                            0].strip().split(
                            ' ')[
                            0]
                    except:
                        drug_name = 0

                    if drug_name != 0:
                        try:
                            # 金额
                            drug_price_sum = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[
                                0].strip()
                        except Exception as e:
                            drug_price_sum = ''
                            # print('drug_price_sum e:', e)

                        try:
                            # 药品规格
                            drug_specification = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[
                                0].strip().split(
                                ' ')[1]
                        except:
                            drug_specification = ''

                        try:
                            # 生产企业
                            supplier_name = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[
                                0].strip().split(
                                ' ')[2]
                        except:
                            supplier_name = ''

                        try:
                            # 计量单位(瓶,盒等)
                            drug_unit = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[6]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            drug_unit = ''

                        try:
                            # 部门
                            department = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[12]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            department = ''

                        try:
                            # 类型
                            bill_types = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[2]/div/text()' % (i + 3))[
                                0].strip()

                            if bill_types == '进货':
                                bill_type = 1
                            else:
                                bill_type = 3
                        except:
                            bill_type = 3

                        try:
                            # 出库数量
                            drug_number = round(float(data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[5]/div/text()' % (i + 3))[
                                                          0].strip()))
                            if drug_number < 0:
                                if bill_type == 1:
                                    bill_type = 2
                                if bill_type == 3:
                                    bill_type = 4

                        except:
                            drug_number = 0

                        try:
                            # 批号
                            drug_batch = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[4]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            drug_batch = ''

                        try:
                            # 有效期至
                            valid_till = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            valid_till = '2000-01-01'

                        try:
                            # 医院(终端)名称
                            if bill_type == 1 or bill_type == 2:
                                hospital_name = ''
                            else:
                                hospital_name = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[3]/div/text()' % (
                                                i + 3))[0].strip()
                        except:
                            hospital_name = ''

                        try:
                            # 医院(终端)地址
                            if bill_type == 1 or bill_type == 2:
                                hospital_address = ''
                            else:
                                hospital_address = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[13]/div/text()' % (
                                                i + 3))[0].strip()
                        except:
                            hospital_address = ''

                        try:
                            # 销售(制单)时间
                            sell_time = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[1]/div/text()' % (i + 3))[
                                0].strip()

                            if sell_time == '汇  总':
                                sell_time = 0
                        except:
                            sell_time = '2000-01-01'

                        try:
                            # 价格
                            drug_price = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[7]/div/text()' % (i + 3))[
                                0].strip()
                            # '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[0]
                            # print('*'*1000)
                            # print('drug_price_sum', drug_price_sum)
                            # print('drug_price', drug_price)
                            # print('*'*1000)
                        except Exception as e:
                            drug_price = ''
                            # print('drug_price e:', e)

                        try:
                            # 业务编号
                            business_number = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            business_number = ''

                        try:
                            # 客户所属地区
                            customer_area = data_html.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[14]/div/text()' % (i + 3))[
                                0].strip()
                        except:
                            customer_area = ''

                        # 创建时间
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

                        update_time = create_time

                        drug_hashs = "%s %s %s %s" % (drug_name, drug_specification, delivery_id, supplier_name)
                        md5 = hashlib.md5()
                        md5.update(bytes(drug_hashs, encoding="utf-8"))
                        drug_hash = md5.hexdigest()
                        hospital_hashs = "%s %s %s" % (delivery_id, hospital_name, hospital_address)
                        md5 = hashlib.md5()
                        md5.update(bytes(hospital_hashs, encoding="utf-8"))
                        hospital_hash = md5.hexdigest()
                        stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                            company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch,
                            valid_till,
                            hospital_hash, sell_time)
                        md5 = hashlib.md5()
                        md5.update(bytes(stream_hashs, encoding="utf-8"))
                        stream_hash = md5.hexdigest()
                        month = int(str(self.fist).replace('-', '')[0: 6])

                        sql_crm = "insert into order_metadata_xiaoshanyiyuan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                        sql_data_crm = sql_crm.format(company_id, delivery_id, delivery_name, data_version, data_type,
                                                      bill_type, drug_name, drug_price_sum, department,
                                                      drug_specification, supplier_name, drug_unit,
                                                      abs(drug_number), drug_batch, valid_till, hospital_name,
                                                      hospital_address, drug_price, sell_time, create_time, update_time,
                                                      business_number,
                                                      customer_area,
                                                      drug_hash, hospital_hash, stream_hash, month)
                        # print('sql_data', sql_data_crm)
                        try:
                            self.db.ping()
                        except pymysql.MySQLError:
                            self.db.connect()

                        try:
                            if sell_time != 0:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm:%s' % (e, sql_data_crm))

                        self.cursor.execute('select max(id) from order_metadata_xiaoshanyiyuan')
                        foreign_id = self.cursor.fetchone()[0]

                        sql_crm_data = SQL_CRM_DATA
                        sql_data_crm_data = sql_crm_data.format(company_id, delivery_id, delivery_name, table_name,
                                                                foreign_id,
                                                                data_version,
                                                                data_type, bill_type, drug_name, drug_specification,
                                                                supplier_name,
                                                                drug_hash, drug_unit, abs(drug_number), drug_batch,
                                                                valid_till,
                                                                hospital_name,
                                                                hospital_address, hospital_hash, month, sell_time,
                                                                stream_hash,
                                                                create_time, update_time)

                        try:
                            if sell_time != 0:
                                self.cursor.execute(sql_data_crm_data)
                                self.db.commit()
                                # self.crm_cursor.execute(sql_data_crm_data)
                                # self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_data:%s' % (e, sql_data_crm_data))

                        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                self.NavigationCorrector = data_html.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0]
                for i in range(2, 1000):
                    data_datas = {
                        "AjaxScriptManager": "AjaxScriptManager|ReportViewerControl$ctl05$ctl00$CurrentPage",
                        "NavigationCorrector$ScrollPosition": "",
                        "NavigationCorrector$ViewState": "",
                        "NavigationCorrector$PageState": "",
                        "NavigationCorrector$NewViewState": self.NavigationCorrector,
                        "ReportViewerControl$ctl03$ctl00": "",
                        "ReportViewerControl$ctl03$ctl01": "",
                        "ReportViewerControl$ctl10": "ltr",
                        "ReportViewerControl$ctl11": "quirks",
                        "ReportViewerControl$AsyncWait$HiddenCancelField": "False",
                        "ReportViewerControl$ctl04$ctl03$txtValue": "NetSrv_App3",
                        "ReportViewerControl$ctl04$ctl05$txtValue": "192.168.18.1",
                        "ReportViewerControl$ctl04$ctl07$txtValue": "HYg8AE7GMeP2W2YGWaIEpg==",
                        "ReportViewerControl$ctl04$ctl09$txtValue": "mztZ8O0gn1HUBnbz9wW68Q==",
                        "ReportViewerControl$ctl04$ctl11$txtValue": "22",
                        "ReportViewerControl$ctl04$ctl13$txtValue": "CUSR201709041352",
                        "ReportViewerControl$ctl04$ctl15$txtValue": "",
                        "ReportViewerControl$ctl04$ctl17$txtValue": "",
                        # "ReportViewerControl$ctl04$ctl19$txtValue": "2018-07-25",
                        "ReportViewerControl$ctl04$ctl19$txtValue": self.fist,
                        # "ReportViewerControl$ctl04$ctl21$txtValue": "2018-10-25",
                        "ReportViewerControl$ctl04$ctl21$txtValue": self.last,
                        "ReportViewerControl$ctl04$ctl23$txtValue": "",
                        "ReportViewerControl$ctl04$ctl25$txtValue": "",
                        "ReportViewerControl$ctl04$ctl27$txtValue": "",
                        "ReportViewerControl$ToggleParam$store": "",
                        "ReportViewerControl$ToggleParam$collapse": "true",
                        "ReportViewerControl$ctl05$ctl00$CurrentPage": i,
                        "ReportViewerControl$ctl05$ctl03$ctl00": "",
                        "ReportViewerControl$ctl08$ClientClickedId": "",
                        "ReportViewerControl$ctl07$store": "",
                        "ReportViewerControl$ctl07$collapse": "false",
                        "ReportViewerControl$ctl09$VisibilityState$ctl00": "ReportPage",
                        "ReportViewerControl$ctl09$ScrollPosition": "0 0",
                        "ReportViewerControl$ctl09$ReportControl$ctl02": "",
                        "ReportViewerControl$ctl09$ReportControl$ctl03": "",
                        "ReportViewerControl$ctl09$ReportControl$ctl04": "100",
                        "__EVENTTARGET": "ReportViewerControl$ctl05$ctl00$CurrentPage",
                        "__EVENTARGUMENT": "",
                        "__VIEWSTATE": self.__VIEWSTATEs,
                        "__EVENTVALIDATION": self.__EVENTVALIDATIONs,
                        "__ASYNCPOST": "true",
                    }
                    # print('data_datas', data_datas)
                    data_htmls = self.sess_xiaoshanyiyuan.post(
                        url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format(
                            self.fist, self.last),
                        # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=',
                        data=data_datas, headers=self.headers,
                        verify=False)
                    self.__VIEWSTATEs = re.findall(r'__VIEWSTATE\|(.+?)\|', data_htmls.content.decode('utf-8'))[0]
                    self.__EVENTVALIDATIONs = \
                    re.findall(r'__EVENTVALIDATION\|(.+?)\|', data_htmls.content.decode('utf-8'))[0]
                    # print('-' * 1000)
                    # print("data_htmls.content.decode('utf-8')", data_htmls.content.decode('utf-8'))
                    # print('-' * 1000)
                    # try:
                    data_htmls = etree.HTML(data_htmls.content.decode('utf-8'))
                    self.NavigationCorrector = \
                    data_htmls.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0]
                    data_len = int(len(data_htmls.xpath('//tr[@valign="top"]'))) - 1
                    # print('data_len', data_len)
                    md5 = hashlib.md5()
                    for i in range(data_len):
                        # try:
                        # 入驻企业id
                        company_id = company_id
                        # 配送公司id
                        delivery_id = delivery_id
                        # 配送公司名称
                        delivery_name = enterprise_name
                        # 数据版本号
                        data_version = delivery_id + "-" + self.time_stamp
                        # 数据类型:1,phython 2,导入
                        data_type = 1
                        # 单据类型:1进货,2退货,3销售,4销售退货
                        bill_type = 3
                        # 表的名称
                        table_name = 'order_metadata_xiaoshanyiyuan'
                        try:
                            drug_name = data_htmls.xpath(
                                '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (
                                        i + 3))[
                                0].strip().split(' ')[
                                0]
                            if not drug_name:
                                drug_name = 0
                        except:
                            drug_name = 0

                        if drug_name != 0:

                            try:
                                # 金额
                                drug_price_sum = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (
                                            i + 3))[0].strip()
                            except Exception as e:
                                drug_price_sum = ''
                                # print('drug_price_sum e:', e)

                            try:
                                # 药品规格
                                drug_specification = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (
                                            i + 3))[0].strip().split(
                                    ' ')[1]
                            except:
                                drug_specification = ''

                            try:
                                # 生产企业
                                supplier_name = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (
                                            i + 3))[0].strip().split(
                                    ' ')[2]
                            except:
                                supplier_name = ''

                            try:
                                # 计量单位(瓶,盒等)
                                drug_unit = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[6]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                drug_unit = ''

                            try:
                                # 部门
                                department = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[12]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                department = ''

                            try:
                                # 类型
                                bill_types = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[2]/div/text()' % (
                                                i + 3))[0].strip()

                                if bill_types == '进货':
                                    bill_type = 1
                                else:
                                    bill_type = 3
                            except:
                                bill_type = 3

                            try:
                                # 出库数量
                                drug_number = round(float(data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[5]/div/text()' % (
                                                i + 3))[
                                                              0].strip()))
                                if drug_number < 0:
                                    if bill_type == 1:
                                        bill_type = 2
                                    if bill_type == 3:
                                        bill_type = 4

                            except:
                                drug_number = 0

                            try:
                                # 批号
                                drug_batch = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[4]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                drug_batch = ''

                            try:
                                # 有效期至
                                valid_till = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                valid_till = '2000-01-01'

                            try:
                                # 医院(终端)名称
                                if bill_type == 1 or bill_type == 2:
                                    hospital_name = ''
                                else:
                                    hospital_name = data_htmls.xpath(
                                        '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[3]/div/text()' % (
                                                i + 3))[0].strip()
                            except:
                                hospital_name = ''

                            try:
                                # 医院(终端)地址
                                if bill_type == 1 or bill_type == 2:
                                    hospital_address = ''
                                else:
                                    hospital_address = data_htmls.xpath(
                                        '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[13]/div/text()' % (
                                                i + 3))[0].strip()
                            except:
                                hospital_address = ''

                            try:
                                # 销售(制单)时间
                                sell_time = data_htmls.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[1]/div/text()' % (
                                            i + 3))[0].strip()

                                if sell_time == '汇  总':
                                    sell_time = 0
                            except:
                                sell_time = '2000-01-01'

                            try:
                                # 价格
                                drug_price = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[7]/div/text()' % (
                                            i + 3))[0].strip()
                                # '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[0]
                                # print('*'*1000)
                                # print('drug_price_sum', drug_price_sum)
                                # print('drug_price', drug_price)
                                # print('*'*1000)
                            except Exception as e:
                                drug_price = ''
                                # print('drug_price e:', e)

                            try:
                                # 业务编号
                                business_number = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                business_number = ''

                            try:
                                # 客户所属地区
                                customer_area = data_html.xpath(
                                    '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[14]/div/text()' % (
                                            i + 3))[0].strip()
                            except:
                                customer_area = ''

                            # 创建时间
                            create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

                            update_time = create_time

                            drug_hashs = "%s %s %s %s" % (drug_name, drug_specification, delivery_id, supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch,
                                valid_till,
                                hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0: 6])

                            sql_crm = "insert into order_metadata_xiaoshanyiyuan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(company_id, delivery_id, delivery_name, data_version,
                                                          data_type,
                                                          bill_type, drug_name, drug_price_sum, department,
                                                          drug_specification, supplier_name, drug_unit,
                                                          abs(drug_number), drug_batch, valid_till, hospital_name,
                                                          hospital_address, drug_price, sell_time, create_time,
                                                          update_time, business_number,
                                                          customer_area,
                                                          drug_hash, hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                if sell_time != 0:
                                    self.cursor.execute(sql_data_crm)
                                    self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' % (e, sql_data_crm))

                            self.cursor.execute('select max(id) from order_metadata_xiaoshanyiyuan')
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(company_id, delivery_id, delivery_name,
                                                                    table_name,
                                                                    foreign_id,
                                                                    data_version,
                                                                    data_type, bill_type, drug_name,
                                                                    drug_specification,
                                                                    supplier_name,
                                                                    drug_hash, drug_unit, abs(drug_number), drug_batch,
                                                                    valid_till,
                                                                    hospital_name,
                                                                    hospital_address, hospital_hash, month,
                                                                    sell_time,
                                                                    stream_hash,
                                                                    create_time, update_time)

                            try:
                                if sell_time != 0:
                                    self.cursor.execute(sql_data_crm_data)
                                    self.db.commit()
                                    self.crm_cursor.execute(sql_data_crm_data)
                                    self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' % (e, sql_data_crm_data))

                    if sell_time == 0:

                        try:
                            crm_request_data = {
                                'version': delivery_id + "-" + self.time_stamp,
                                'streamType': streamType,
                            }
                            html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers,
                                                 verify=False)
                            self.classify_success = json.loads(html.content.decode('utf-8'))['success']
                        except:
                            print('爬虫调取后端接口错误')

                        get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                        get_date = int(time.strftime("%Y%m%d", time.localtime()))
                        get_status = 1
                        if MONTHS == 0:
                            self.cursor.execute(
                                "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE sell_time='{}' and delivery_name='{}'".format(
                                    self.yesterday, enterprise_name))
                        else:
                            month = int(str(self.fist).replace('-', '')[0: 6])
                            self.cursor.execute(
                                "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE month='{}' and delivery_name='{}'".format(
                                    month, enterprise_name))
                        data_num = self.cursor.fetchone()[0]
                        remark = ''
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                        update_time = create_time
                        sql_crm_record = SQL_CRM_RECORD
                        sql_data_crm_record = sql_crm_record.format(company_id, delivery_id, enterprise_name,
                                                                    get_account, '20-xiaoshanyiyuan',
                                                                    delivery_id + "-" + self.time_stamp, get_time,
                                                                    get_date,
                                                                    get_status, data_num, self.classify_success, remark,
                                                                    create_time,
                                                                    update_time)

                        try:
                            self.cursor.execute(sql_data_crm_record)
                            self.db.commit()
                            self.crm_cursor.execute(sql_data_crm_record)
                            self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_record:%s' % (e, sql_data_crm_record))

                        sql_crm_version = SQL_CRM_VERSION
                        sql_data_crm_version = sql_crm_version.format(delivery_id + "-" + self.time_stamp,
                                                                      enterprise_name,
                                                                      company_id, create_time, update_time,
                                                                      data_num,
                                                                      remark)

                        try:
                            self.cursor.execute(sql_data_crm_version)
                            self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_version:%s' % (e, sql_data_crm_version))
                        break

            except Exception as e:
                print('xiaoshanyiyuan-登入失败:%s' % e)
                print('self.number', self.number)
                if self.number < 4:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE sell_time='{}' and delivery_name='{}'".format(
                                self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0: 6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE month='{}' and delivery_name='{}'".format(
                                month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(company_id, delivery_id, enterprise_name, get_account,
                                                                '20-xiaoshanyiyuan',
                                                                delivery_id + "-" + self.time_stamp, get_time, get_date,
                                                                get_status, data_num, self.classify_success, remark,
                                                                create_time, update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' % (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')
Ejemplo n.º 2
0
    def parse(self, response):
        # delivery_id = 'F617B115D6F3447983E94BB781231258'
        delivery_id = 'DDA100100R'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'"
            .format(delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            self.number += 1
            html = self.sess_zhongan.get(url=self.start_urls[0],
                                         headers=self.headers,
                                         verify=False)
            # print('html', html.content.decode('gb2312'))
            resp = etree.HTML(html.content.decode('utf-8', 'ignorg'))
            __VIEWSTATE = resp.xpath('//*[@id="__VIEWSTATE"]/@value')[0]
            __VIEWSTATEGENERATOR = resp.xpath(
                '//*[@id="__VIEWSTATEGENERATOR"]/@value')[0]
            image = self.sess_zhongan.get(
                url='http://www.zayy.cn/os/tools/VerifyCode1.aspx?',
                headers=self.headers,
                verify=False)
            # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies))
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(
                    os.path.dirname(os.path.dirname(__file__)))
                # print('path', path)
                files = r'{}{}static{}16-zhongan'.format(path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                with open(
                        r'{}{}static{}16-zhongan{}yzm.jpg'.format(
                            path, symbol, symbol, symbol), 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'{}{}static{}16-zhongan{}yzm.jpg'.format(
                    path, symbol, symbol, symbol)
            else:
                with open(r'./16-zhonganyzm.jpg', 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'./16-zhonganyzm.jpg'

            codetype = 4005
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            cid, code_result = ydm.run(filename, codetype, timeout)
            # yzm = input('请输入验证码:')
            # print('cid:%s   code_result:%s' % (cid, code_result))
            yzm = code_result
            # yzm = input('请输入验证码:')

            data = {
                "__VIEWSTATE": __VIEWSTATE,
                "__VIEWSTATEGENERATOR": __VIEWSTATEGENERATOR,
                "username": get_account,
                "userpwd": get_pwd,
                "verifycode": yzm,
                "ImgBtnLogin.x": "0",
                "ImgBtnLogin.y": "0",
            }
            post_html = self.sess_zhongan.post(
                url='http://www.zayy.cn/os/Default.aspx',
                data=data,
                headers=self.headers,
                verify=False)
            # print('data', data)
            # print('*' * 1000)
            # print('post_html', post_html.content.decode('utf-8', 'ignore'))
            # post_html = self.sess_zhongan.get(url='http://www.zayy.cn/os/UserLiuxiang.aspx?time1=2017-11-01&time2=2018-11-01&titlename=&pihao=&page=2',
            post_html = self.sess_zhongan.get(
                url=
                'http://www.zayy.cn/os/UserLiuxiang.aspx?time1={}&time2={}&titlename=&pihao='
                .format(self.fist, self.last),
                headers=self.headers,
                verify=False)
            try:
                re.findall(r'出库时间',
                           post_html.content.decode('utf-8', 'ignore'))[0]
                # print('*' * 1000)
                # print('post_html', post_html.content.decode('utf-8', 'ignore'))
                data_get = etree.HTML(
                    post_html.content.decode('utf-8', 'ignore'))
                try:
                    page_num = int(
                        data_get.xpath(
                            '//*[@id="ctl00_ContentPlaceHolder1_AspNetPager1"]/a/text()'
                        )[-3]) + 1
                except:
                    page_num = 2
                for i in range(1, page_num):
                    data_html = self.sess_zhongan.get(
                        url=
                        'http://www.zayy.cn/os/UserLiuxiang.aspx?time1={}&time2={}&titlename=&pihao=&page={}'
                        .format(self.fist, self.last, i),
                        headers=self.headers,
                        verify=False)
                    # print('*' * 1000)
                    # print('data_html', data_html.content.decode('utf-8', 'ignore'))
                    data_resps = etree.HTML(
                        data_html.content.decode('utf-8', 'ignore'))
                    # //*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tbody/tr[1]
                    data_len = int(
                        len(
                            data_resps.xpath(
                                '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr'
                            ))) - 1
                    # print(data_len)
                    md5 = hashlib.md5()
                    for i in range(data_len):
                        # 入驻企业id
                        company_id = company_id
                        # 配送公司id
                        delivery_id = delivery_id
                        # 配送公司名称
                        delivery_name = enterprise_name
                        # 数据版本号
                        data_version = delivery_id + "-" + self.time_stamp
                        # 数据类型:1,phython 2,导入
                        data_type = 1
                        # 单据类型:1进货,2退货,3销售,4销售退货
                        bill_type = 3
                        try:
                            drug_name = data_resps.xpath(
                                '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[4]/span/text()'
                                % (i + 2))[0].strip()
                            if not drug_name:
                                drug_name = 1
                        except:
                            drug_name = 1

                        if drug_name != 1:
                            try:
                                # 药品规格
                                drug_specification = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[5]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_specification = ''

                            try:
                                # 生产企业
                                supplier_name = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[7]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                # 计量单位(瓶,盒等)
                                drug_unit = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[6]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_unit = ''

                            try:
                                # 出库数量
                                drug_number = int(
                                    data_resps.xpath(
                                        '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[10]/span/text()'
                                        % (i + 2))[0].strip())

                            except:
                                drug_number = 0

                            try:
                                # 批号
                                drug_batch = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[8]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_batch = ''

                            try:
                                # 有效期至
                                valid_till = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[9]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                valid_till = '2000-01-01'

                            try:
                                # 医院(终端)名称
                                hospital_name = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[3]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                hospital_name = ''

                            try:
                                # 医院(终端)地址
                                hospital_address = ''
                            except:
                                hospital_address = ''

                            try:
                                # 销售(制单)时间
                                sell_time = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[2]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                sell_time = '2000-01-01'

                            # 创建时间
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())
                            update_time = create_time

                            table_name = 'order_metadata_zhongan'

                            try:
                                # 单价
                                drug_price = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[11]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_price = ''

                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_zhongan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, sell_time,
                                create_time, update_time, drug_price,
                                drug_hash, hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_zhongan')
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)

                            try:
                                if bill_type != 5:
                                    self.cursor.execute(sql_data_crm_data)
                                    self.db.commit()
                                    self.crm_cursor.execute(sql_data_crm_data)
                                    self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))


                    data_html = self.sess_zhongan.get(
                        url=
                        'http://www.zayy.cn/os/UserDataquery.aspx?time1={}&time2={}&titlename=&pihaotxt='
                        .format(self.fist, self.last),
                        headers=self.headers,
                        verify=False)
                    # print('*' * 1000)
                    # print('data_html', data_html.content.decode('utf-8', 'ignore'))
                    data_resps = etree.HTML(
                        data_html.content.decode('utf-8', 'ignore'))
                    # //*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tbody/tr[1]
                    data_len = int(
                        len(
                            data_resps.xpath(
                                '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr'
                            ))) - 1
                    # print(data_len)
                    md5 = hashlib.md5()
                    for i in range(data_len):
                        # 入驻企业id
                        company_id = company_id
                        # 配送公司id
                        delivery_id = delivery_id
                        # 配送公司名称
                        delivery_name = enterprise_name
                        # 数据版本号
                        data_version = delivery_id + "-" + self.time_stamp
                        # 数据类型:1,phython 2,导入
                        data_type = 1
                        # 单据类型:1进货,2退货,3销售,4销售退货
                        bill_type = 1
                        try:
                            drug_name = data_resps.xpath(
                                '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[4]/span/text()'
                                % (i + 2))[0].strip()
                            if not drug_name:
                                drug_name = 1
                        except:
                            drug_name = 1

                        if drug_name != 1:
                            try:
                                # 药品规格
                                drug_specification = ''
                            except:
                                drug_specification = ''

                            try:
                                # 生产企业
                                supplier_name = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[6]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                # 计量单位(瓶,盒等)
                                drug_unit = ''
                            except:
                                drug_unit = ''

                            try:
                                # 出库数量
                                drug_number = int(
                                    data_resps.xpath(
                                        '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[8]/text()'
                                        % (i + 2))[0].strip())

                            except:
                                drug_number = 0

                            try:
                                # 批号
                                drug_batch = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[5]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_batch = ''

                            try:
                                # 有效期至
                                valid_till = '2000-01-01'
                            except:
                                valid_till = '2000-01-01'

                            try:
                                # 医院(终端)名称
                                hospital_name = ''
                            except:
                                hospital_name = ''

                            try:
                                # 医院(终端)地址
                                hospital_address = ''
                            except:
                                hospital_address = ''

                            try:
                                # 销售(制单)时间
                                sell_time = data_resps.xpath(
                                    '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[9]/span/text()'
                                    % (i + 2))[0].strip()
                            except:
                                sell_time = '2000-01-01'

                            # 创建时间
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())
                            update_time = create_time

                            table_name = 'order_metadata_zhongan'

                            try:
                                # 单价
                                drug_price = ''
                            except:
                                drug_price = ''

                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_zhongan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, sell_time,
                                create_time, update_time, drug_price,
                                drug_hash, hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_zhongan')
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)

                            try:
                                if bill_type != 5:
                                    self.cursor.execute(sql_data_crm_data)
                                    self.db.commit()
                                    self.crm_cursor.execute(sql_data_crm_data)
                                    self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))

                try:
                    crm_request_data = {
                        'version': delivery_id + "-" + self.time_stamp,
                        'streamType': streamType,
                    }
                    html = requests.post(url=CRM_REQUEST_URL,
                                         data=crm_request_data,
                                         headers=self.headers,
                                         verify=False)
                    self.classify_success = json.loads(
                        html.content.decode('utf-8'))['success']
                except:
                    print('爬虫调取后端接口错误')

                get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                get_date = int(time.strftime("%Y%m%d", time.localtime()))
                get_status = 1
                if MONTHS == 0:
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_zhongan WHERE sell_time='{}' and delivery_name='{}'"
                        .format(self.yesterday, enterprise_name))
                else:
                    month = int(str(self.fist).replace('-', '')[0:6])
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_zhongan WHERE month='{}' and delivery_name='{}'"
                        .format(month, enterprise_name))
                data_num = self.cursor.fetchone()[0]
                remark = ''
                create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())
                update_time = create_time
                sql_crm_record = SQL_CRM_RECORD
                sql_data_crm_record = sql_crm_record.format(
                    company_id, delivery_id, enterprise_name, get_account,
                    '16-zhongan', delivery_id + "-" + self.time_stamp,
                    get_time, get_date, get_status, data_num,
                    self.classify_success, remark, create_time, update_time)

                try:
                    self.cursor.execute(sql_data_crm_record)
                    self.db.commit()
                    self.crm_cursor.execute(sql_data_crm_record)
                    self.crm_db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_record:%s' %
                          (e, sql_data_crm_record))

                sql_crm_version = SQL_CRM_VERSION
                sql_data_crm_version = sql_crm_version.format(
                    delivery_id + "-" + self.time_stamp, enterprise_name,
                    company_id, create_time, update_time, data_num, remark)

                try:
                    self.cursor.execute(sql_data_crm_version)
                    self.db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_version:%s' %
                          (e, sql_data_crm_version))

            except Exception as e:
                print('zhongan-登入失败:%s' % e)
                print('self.number', self.number)
                if self.number < 4:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                    get_time = create_time
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_zhongan WHERE sell_time='{}' and delivery_name='{}'"
                            .format(self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0:6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_zhongan WHERE month='{}' and delivery_name='{}'"
                            .format(month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(
                        company_id, delivery_id, enterprise_name, get_account,
                        '16-zhongan', delivery_id + "-" + self.time_stamp,
                        get_time, get_date, get_status, data_num,
                        self.classify_success, remark, create_time,
                        update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' %
                              (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')
Ejemplo n.º 3
0
    def parse(self, response):
        # delivery_id = 'F617B115D6F3447983E94BB781231235'
        delivery_id = 'DDA100100K'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'"
            .format(delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            self.number += 1
            html = self.sess_zhejianglaiyi.get(url=self.start_urls[0],
                                               headers=self.headers,
                                               verify=False)
            image = self.sess_zhejianglaiyi.get(
                url='http://www.600216.com/lx/bmp.asp?flg=login',
                headers=self.headers,
                verify=False)
            # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies))
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(
                    os.path.dirname(os.path.dirname(__file__)))
                files = r'{}{}static{}10-zhejianglaiyi'.format(
                    path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                # print('path', path)
                with open(
                        r'{}{}static{}10-zhejianglaiyi{}yzm.jpg'.format(
                            path, symbol, symbol, symbol), 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'{}{}static{}10-zhejianglaiyi{}yzm.jpg'.format(
                    path, symbol, symbol, symbol)
            else:
                with open(r'./10-zhejianglaiyiyzm.jpg', 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'./10-zhejianglaiyiyzm.jpg'

            codetype = 4004
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            cid, code_result = ydm.run(filename, codetype, timeout)
            # yzm = input('请输入验证码:')
            # print('cid:%s   code_result:%s' % (cid, code_result))
            yzm = code_result
            # yzm = input('请输入验证码:')
            data = {
                "loginName": get_account,
                "LoginPwd": get_pwd,
                "imgcodes": yzm,
                "login": "******",
            }
            self.sess_zhejianglaiyi.post(
                url='http://www.600216.com/lx/index.asp',
                data=data,
                headers=self.headers,
                verify=False)
            data_html = self.sess_zhejianglaiyi.get(
                url='http://www.600216.com/lx/sa.asp?fir=1',
                headers=self.headers,
                verify=False)
            try:
                re.findall(r'销售明细查询', data_html.content.decode('gb2312'))[0]
                # print('data_html.content.decode', data_html.content.decode('gb2312'))
                for i in range(1, 1000):
                    data_html = self.sess_zhejianglaiyi.get(
                        # url='http://www.600216.com/lx/sa.asp?mypage={}&searchcustom=&searchgoodsid=&searchDateBegin=2012-01-01&searchDateEnd=2018-10-19'.format(
                        url=
                        'http://www.600216.com/lx/sa.asp?mypage={}&searchcustom=&searchgoodsid=&searchDateBegin={}&searchDateEnd={}'
                        .format(i, self.fist, self.last),
                        headers=self.headers,
                        verify=False)
                    # print('data_html', data_html.content.decode('gb2312'))
                    # print('*' * 1000)
                    data_html = etree.HTML(data_html.content.decode('gb2312'))
                    data_len = int(
                        len(data_html.xpath('//table/tr/td/table[1]/tr'))) - 2
                    # print(data_len)
                    md5 = hashlib.md5()
                    for i in range(data_len):
                        # try:
                        # 入驻企业id
                        company_id = company_id
                        # 配送公司id
                        delivery_id = delivery_id
                        # 配送公司名称
                        delivery_name = enterprise_name
                        # 数据版本号
                        data_version = delivery_id + "-" + self.time_stamp
                        # 数据类型:1,phython 2,导入
                        data_type = 1
                        # 单据类型:1进货,2退货,3销售,4销售退货
                        bill_type = 3
                        try:
                            drug_name = data_html.xpath(
                                '//table/tr/td/table[1]/tr[%s]/td[5]/text()' %
                                (i + 2))[0].strip()
                        except:
                            drug_name = 1
                        if drug_name != 1:
                            try:
                                # 商品名称
                                trade_name = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[6]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                trade_name = ''

                            try:
                                # 药品规格
                                drug_specification = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[7]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_specification = ''

                            try:
                                # 生产企业
                                supplier_name = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[8]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                # 计量单位(瓶,盒等)
                                drug_unit = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[12]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_unit = ''

                            try:
                                # 医共体成员
                                medical_community_member = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[3]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                medical_community_member = ''

                            try:
                                # 出库数量
                                drug_number = round(
                                    float(
                                        data_html.xpath(
                                            '//table/tr/td/table[1]/tr[%s]/td[11]/b/text()'
                                            % (i + 2))[0].strip()))
                                if drug_number < 0:
                                    bill_type = 4

                            except:
                                drug_number = 0

                            try:
                                # 批号
                                drug_batch = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[9]/b/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_batch = ''

                            try:
                                # 有效期至
                                valid_till = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[10]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                valid_till = '2000-01-01'

                            try:
                                # 医院(终端)名称
                                hospital_name = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[2]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                hospital_name = ''

                            try:
                                # 医院(终端)地址
                                hospital_address = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[4]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                hospital_address = ''

                            try:
                                # 销售(制单)时间
                                sell_time = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[1]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                sell_time = '2000-01-01'

                            try:
                                # 价格
                                drug_price = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[13]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_price = ''

                            # 创建时间
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())
                            update_time = create_time

                            table_name = 'order_metadata_zhejianglaiyi'

                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_zhejianglaiyi(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                trade_name, medical_community_member,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, drug_price,
                                sell_time, create_time, update_time, drug_hash,
                                hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_zhejianglaiyi'
                            )
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)

                            try:
                                if bill_type != 5:
                                    self.cursor.execute(sql_data_crm_data)
                                    self.db.commit()
                                    self.crm_cursor.execute(sql_data_crm_data)
                                    self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))

                    try:
                        next_page = data_html.xpath(
                            '//*[@id="page"]/a[last()]/text()')[0]
                    except:
                        next_page = ''
                    # print('next_page', next_page)
                    if next_page != '下一页>':
                        break
                # ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

                for i in range(1, 1000):
                    data_html_cai = self.sess_zhejianglaiyi.get(
                        # url='http://www.600216.com/lx/su.asp?mypage={}&searchSupply=&searchgoodsid=&searchDateBegin=2012-01-01&searchDateEnd=2018-10-19'.format(
                        url=
                        'http://www.600216.com/lx/su.asp?mypage={}&searchSupply=&searchgoodsid=&searchDateBegin={}&searchDateEnd={}'
                        .format(i, self.fist, self.last),
                        headers=self.headers,
                        verify=False)
                    # print('data_html', data_html.content.decode('gb2312'))
                    # print('*' * 1000)
                    data_html_cai = etree.HTML(
                        data_html_cai.content.decode('gb2312'))
                    data_len = int(
                        len(data_html_cai.xpath(
                            '//table/tr/td/table[1]/tr'))) - 2
                    # print(data_len)
                    md5 = hashlib.md5()
                    for i in range(data_len):
                        # try:
                        # 入驻企业id
                        company_id = company_id
                        # 配送公司id
                        delivery_id = delivery_id
                        # 配送公司名称
                        delivery_name = enterprise_name
                        # 数据版本号
                        data_version = delivery_id + "-" + self.time_stamp
                        # 数据类型:1,phython 2,导入
                        data_type = 1
                        # 单据类型:1进货,2退货,3销售,4销售退货
                        bill_type = 1
                        try:
                            drug_name = data_html_cai.xpath(
                                '//table/tr/td/table[1]/tr[%s]/td[3]/text()' %
                                (i + 2))[0].strip()
                        except:
                            drug_name = 1
                        if drug_name != 1:
                            try:
                                # 商品名称
                                trade_name = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[4]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                trade_name = ''

                            try:
                                # 药品规格
                                drug_specification = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[5]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_specification = ''

                            try:
                                # 生产企业
                                supplier_name = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[2]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                # 计量单位(瓶,盒等)
                                drug_unit = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[10]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_unit = ''

                            try:
                                # 医共体成员
                                medical_community_member = ''
                            except:
                                medical_community_member = ''

                            try:
                                # 出库数量
                                drug_number = round(
                                    float(
                                        data_html_cai.xpath(
                                            '//table/tr/td/table[1]/tr[%s]/td[9]/b/text()'
                                            % (i + 2))[0].strip()))
                                if drug_number < 0:
                                    bill_type = 2

                            except:
                                drug_number = 0

                            try:
                                # 批号
                                drug_batch = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[7]/b/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_batch = ''

                            try:
                                # 有效期至
                                valid_till = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[8]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                valid_till = '2000-01-01'

                            try:
                                # 医院(终端)名称
                                hospital_name = ''
                            except:
                                hospital_name = ''

                            try:
                                # 医院(终端)地址
                                hospital_address = ''
                            except:
                                hospital_address = ''

                            try:
                                # 销售(制单)时间
                                sell_time = data_html_cai.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[1]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                sell_time = '2000-01-01'

                            try:
                                # 价格
                                drug_price = data_html.xpath(
                                    '//table/tr/td/table[1]/tr[%s]/td[13]/text()'
                                    % (i + 2))[0].strip()
                            except:
                                drug_price = ''

                            # 创建时间
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())

                            update_time = create_time

                            table_name = 'order_metadata_zhejianglaiyi'

                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_zhejianglaiyi(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                trade_name, medical_community_member,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, drug_price,
                                sell_time, create_time, update_time, drug_hash,
                                hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_zhejianglaiyi'
                            )
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)

                            try:
                                if bill_type != 5:
                                    self.cursor.execute(sql_data_crm_data)
                                    self.db.commit()
                                    self.crm_cursor.execute(sql_data_crm_data)
                                    self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))
                    try:
                        next_page = data_html_cai.xpath(
                            '//*[@id="page"]/a[last()]/text()')[0]
                    except:
                        next_page = ''
                    # print('next_page', next_page)
                    if next_page != '下一页>':
                        break

                try:
                    crm_request_data = {
                        'version': delivery_id + "-" + self.time_stamp,
                        'streamType': streamType,
                    }
                    html = requests.post(url=CRM_REQUEST_URL,
                                         data=crm_request_data,
                                         headers=self.headers,
                                         verify=False)
                    self.classify_success = json.loads(
                        html.content.decode('utf-8'))['success']
                except:
                    print('爬虫调取后端接口错误')

                get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                get_date = int(time.strftime("%Y%m%d", time.localtime()))
                get_status = 1
                if MONTHS == 0:
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_zhejianglaiyi WHERE sell_time='{}' and delivery_name='{}'"
                        .format(self.yesterday, enterprise_name))
                else:
                    month = int(str(self.fist).replace('-', '')[0:6])
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_zhejianglaiyi WHERE month='{}' and delivery_name='{}'"
                        .format(month, enterprise_name))
                data_num = self.cursor.fetchone()[0]
                remark = ''
                create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())
                update_time = create_time
                sql_crm_record = SQL_CRM_RECORD
                sql_data_crm_record = sql_crm_record.format(
                    company_id, delivery_id, enterprise_name, get_account,
                    '10-zhejianglaiyi', delivery_id + "-" + self.time_stamp,
                    get_time, get_date, get_status, data_num,
                    self.classify_success, remark, create_time, update_time)

                try:
                    self.cursor.execute(sql_data_crm_record)
                    self.db.commit()
                    self.crm_cursor.execute(sql_data_crm_record)
                    self.crm_db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_record:%s' %
                          (e, sql_data_crm_record))

                sql_crm_version = SQL_CRM_VERSION
                sql_data_crm_version = sql_crm_version.format(
                    delivery_id + "-" + self.time_stamp, enterprise_name,
                    company_id, create_time, update_time, data_num, remark)

                try:
                    self.cursor.execute(sql_data_crm_version)
                    self.db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_version:%s' %
                          (e, sql_data_crm_version))

            except Exception as e:
                print('zhejianglaiyi-登入失败:%s' % e)
                if self.number < 4:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                    get_time = create_time
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_zhejianglaiyi WHERE sell_time='{}' and delivery_name='{}'"
                            .format(self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0:6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_zhejianglaiyi WHERE month='{}' and delivery_name='{}'"
                            .format(month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(
                        company_id, delivery_id, enterprise_name, get_account,
                        '10-zhejianglaiyi',
                        delivery_id + "-" + self.time_stamp, get_time,
                        get_date, get_status, data_num, self.classify_success,
                        remark, create_time, update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' %
                              (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')
Ejemplo n.º 4
0
    def parse(self, response):
        delivery_id = 'F617B115D6F3447983E94BB781231271'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'"
            .format(delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            self.number += 1
            html = self.sess_jiaxingyinte.get(url=self.start_urls[0],
                                              headers=self.headers,
                                              verify=False)
            image = self.sess_jiaxingyinte.get(
                url="http://www.drugoogle.com/verifyCode/verifyCode.jsp?%s" %
                (int(time.time() * 1000)),
                headers=self.headers,
                verify=False)
            # print('image', image.url)
            # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies))
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(
                    os.path.dirname(os.path.dirname(__file__)))
                # print('path', path)
                files = r'{}{}static{}21-jiaxingyinte'.format(
                    path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                with open(
                        r'{}{}static{}21-jiaxingyinte{}yzm.jpg'.format(
                            path, symbol, symbol, symbol), 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'{}{}static{}21-jiaxingyinte{}yzm.jpg'.format(
                    path, symbol, symbol, symbol)
            else:
                with open(r'./21-jiaxingyinteyzm.jpg', 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'./21-jiaxingyinteyzm.jpg'

            codetype = 4004
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            cid, code_result = ydm.run(filename, codetype, timeout)
            # yzm = input('请输入验证码:')
            # print('cid:%s   code_result:%s' % (cid, code_result))
            yzm = code_result
            # yzm = input('请输入验证码:')
            data = {
                "username": get_account,
                "password": get_pwd,
                "verifyCode": yzm
            }
            # print('data', data)
            self.sess_jiaxingyinte.post(
                url="http://www.drugoogle.com/index/registerloginjson.jspx?%s"
                % (int(time.time() * 1000)),
                data=data,
                headers=self.headers,
                verify=False)
            data_html = self.sess_jiaxingyinte.get(
                url='http://www.drugoogle.com/member/index.jspx?catlog=4',
                headers=self.headers,
                verify=False)
            try:
                # print("data_html.content.decode('utf-8')", data_html.content.decode('utf-8'))
                re.findall(r'药品流向查询', data_html.content.decode('utf-8'))[0]
                data_html = self.sess_jiaxingyinte.get(
                    url=
                    'http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab4.jspx?entryId=16&medicineId=0&company_name=&timeType=1&startTime={}%2000:00:00&endTime={}%2023:59:59&buyerType=0'
                    .format(self.fist, self.last),
                    # http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab4.jspx?entryId=16&medicineId=0&company_name=&timeType=1&startTime=2018-10-30%2000:00:00&endTime=2018-10-30%2023:59:59&buyerType=0
                    headers=self.headers,
                    verify=False)
                # print('data_html', data_html.content.decode('utf-8'))
                # print('*' * 1000)
                data_html = etree.HTML(data_html.content.decode('utf-8'))
                data_len = int(
                    len(data_html.xpath(
                        '/html/body/table/tr/td/table[1]/tr'))) - 3
                # print('data_len', data_len)
                md5 = hashlib.md5()
                for i in range(data_len):
                    # try:
                    # 入驻企业id
                    company_id = company_id
                    # 配送公司id
                    delivery_id = delivery_id
                    # 配送公司名称
                    delivery_name = enterprise_name
                    # 数据版本号
                    data_version = delivery_id + "-" + self.time_stamp
                    # 数据类型:1,phython 2,导入
                    data_type = 1
                    # 单据类型:1进货,2退货,3销售,4销售退货
                    bill_type = 3
                    try:
                        drug_name = data_html.xpath(
                            '/html/body/table/tr/td/table[1]/tr[%s]/td[5]/text()'
                            % (i + 2))[0].strip()
                        if not drug_name:
                            drug_name = 1
                    except:
                        drug_name = 1

                    if drug_name != 1:
                        try:
                            # 药品id
                            trade_id = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[4]/text()'
                                % (i + 2))[0].strip()
                        except:
                            trade_id = ''

                        try:
                            # 药品规格
                            drug_specification = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[6]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_specification = ''

                        try:
                            # 生产企业
                            supplier_name = ''
                        except:
                            supplier_name = ''

                        try:
                            # 计量单位(瓶,盒等)
                            drug_unit = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[7]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_unit = ''

                        try:
                            # 销售单id
                            sales_ticket_id = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[1]/text()'
                                % (i + 2))[0].strip()
                        except:
                            sales_ticket_id = ''

                        try:
                            # 出库数量
                            drug_number = round(
                                float(
                                    data_html.xpath(
                                        '/html/body/table/tr/td/table[1]/tr[%s]/td[10]/text()'
                                        % (i + 2))[0].strip()))
                            if drug_number < 0:
                                bill_type = 4

                        except:
                            drug_number = 0

                        try:
                            # 批号
                            drug_batch = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[8]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_batch = ''

                        try:
                            # 有效期至
                            valid_till = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[9]/text()'
                                % (i + 2))[0].strip()
                        except:
                            valid_till = '2000-01-01'

                        try:
                            # 医院(终端)名称
                            hospital_name = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[12]/text()'
                                % (i + 2))[0].strip()
                        except:
                            hospital_name = ''

                        try:
                            # 医院(终端)地址
                            hospital_address = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[14]/text()'
                                % (i + 2))[0].strip()
                        except:
                            hospital_address = ''

                        try:
                            # 销售(制单)时间
                            sell_time = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[2]/text()'
                                % (i + 2))[0].strip()
                        except:
                            sell_time = '2000-01-01'

                        try:
                            # 出库帐时间
                            warehouse_time = data_html.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[3]/text()'
                                % (i + 2))[0].strip()
                        except:
                            warehouse_time = '2000-01-01'

                        try:
                            # 价格
                            drug_price = ''
                        except:
                            drug_price = ''

                        # 创建时间
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                    time.localtime())
                        update_time = create_time

                        table_name = 'order_metadata_jiaxingyinte'

                        drug_hashs = "%s %s %s %s" % (
                            drug_name, drug_specification, delivery_id,
                            supplier_name)
                        md5 = hashlib.md5()
                        md5.update(bytes(drug_hashs, encoding="utf-8"))
                        drug_hash = md5.hexdigest()
                        hospital_hashs = "%s %s %s" % (
                            delivery_id, hospital_name, hospital_address)
                        md5 = hashlib.md5()
                        md5.update(bytes(hospital_hashs, encoding="utf-8"))
                        hospital_hash = md5.hexdigest()
                        stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                            company_id, delivery_id, bill_type, drug_hash,
                            drug_unit, abs(drug_number), drug_batch,
                            valid_till, hospital_hash, sell_time)
                        md5 = hashlib.md5()
                        md5.update(bytes(stream_hashs, encoding="utf-8"))
                        stream_hash = md5.hexdigest()
                        month = int(str(self.fist).replace('-', '')[0:6])

                        sql_crm = "insert into order_metadata_jiaxingyinte(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                        sql_data_crm = sql_crm.format(
                            company_id, delivery_id, delivery_name,
                            data_version, data_type, bill_type, drug_name,
                            trade_id, sales_ticket_id,
                            drug_specification, supplier_name, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, sell_time,
                            warehouse_time, create_time, update_time,
                            drug_hash, hospital_hash, stream_hash, drug_price,
                            month)
                        # print('sql_data', sql_data_crm)
                        try:
                            self.db.ping()
                        except pymysql.MySQLError:
                            self.db.connect()

                        try:
                            self.cursor.execute(sql_data_crm)
                            self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm:%s' %
                                  (e, sql_data_crm))

                        self.cursor.execute(
                            'select max(id) from order_metadata_jiaxingyinte')
                        foreign_id = self.cursor.fetchone()[0]

                        sql_crm_data = SQL_CRM_DATA
                        sql_data_crm_data = sql_crm_data.format(
                            company_id, delivery_id, delivery_name, table_name,
                            foreign_id, data_version, data_type, bill_type,
                            drug_name, drug_specification,
                            supplier_name, drug_hash, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, hospital_hash,
                            month, sell_time, stream_hash, create_time,
                            update_time)

                        try:
                            if bill_type != 5:
                                self.cursor.execute(sql_data_crm_data)
                                self.db.commit()
                                self.crm_cursor.execute(sql_data_crm_data)
                                self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_data:%s' %
                                  (e, sql_data_crm_data))



                data_html_cai = self.sess_jiaxingyinte.get(
                    url=
                    'http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab3.jspx?entryId=16&medicineId=0&timeType=1&startTime={}%2000:00:00&endTime={}%2023:59:59'
                    .format(self.fist, self.last),
                    # http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab3.jspx?entryId=16&medicineId=0&timeType=1&startTime=2018-10-30%2000:00:00&endTime=2018-10-30%2023:59:59
                    headers=self.headers,
                    verify=False)
                # print('data_html_cai', data_html_cai.content.decode('utf-8'))
                # print('*' * 1000)
                data_html_cai = etree.HTML(
                    data_html_cai.content.decode('utf-8'))
                data_len = int(
                    len(
                        data_html_cai.xpath(
                            '/html/body/table/tr/td/table[1]/tr'))) - 3
                # print('data_len', data_len)
                md5 = hashlib.md5()
                for i in range(data_len):
                    # try:
                    # 入驻企业id
                    company_id = company_id
                    # 配送公司id
                    delivery_id = delivery_id
                    # 配送公司名称
                    delivery_name = enterprise_name
                    # 数据版本号
                    data_version = delivery_id + "-" + self.time_stamp
                    # 数据类型:1,phython 2,导入
                    data_type = 1
                    # 单据类型:1进货,2退货,3销售,4销售退货
                    bill_type = 1
                    try:
                        drug_name = data_html_cai.xpath(
                            '/html/body/table/tr/td/table[1]/tr[%s]/td[5]/text()'
                            % (i + 2))[0].strip()
                    except:
                        drug_name = 1

                    if drug_name != 1:
                        try:
                            # 药品id
                            trade_id = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[4]/text()'
                                % (i + 2))[0].strip()
                        except:
                            trade_id = ''

                        try:
                            # 药品规格
                            drug_specification = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[6]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_specification = ''

                        try:
                            # 生产企业
                            supplier_name = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[9]/text()'
                                % (i + 2))[0].strip()
                        except:
                            supplier_name = ''

                        try:
                            # 计量单位(瓶,盒等)
                            drug_unit = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[7]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_unit = ''

                        try:
                            # 销售单id
                            sales_ticket_id = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[1]/text()'
                                % (i + 2))[0].strip()
                        except:
                            sales_ticket_id = ''

                        try:
                            # 出库数量
                            drug_number = round(
                                float(
                                    data_html_cai.xpath(
                                        '/html/body/table/tr/td/table[1]/tr[%s]/td[11]/text()'
                                        % (i + 2))[0].strip()))
                            if drug_number < 0:
                                bill_type = 2

                        except:
                            drug_number = 0

                        try:
                            # 批号
                            drug_batch = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[8]/text()'
                                % (i + 2))[0].strip()
                        except:
                            drug_batch = ''

                        try:
                            # 有效期至
                            valid_till = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[10]/text()'
                                % (i + 2))[0].strip()
                        except:
                            valid_till = '2000-01-01'

                        try:
                            # 医院(终端)名称
                            hospital_name = ''
                        except:
                            hospital_name = ''

                        try:
                            # 医院(终端)地址
                            hospital_address = ''
                        except:
                            hospital_address = ''

                        try:
                            # 销售(制单)时间
                            sell_time = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[2]/text()'
                                % (i + 2))[0].strip()
                        except:
                            sell_time = '2000-01-01'

                        try:
                            # 出库帐时间
                            warehouse_time = data_html_cai.xpath(
                                '/html/body/table/tr/td/table[1]/tr[%s]/td[3]/text()'
                                % (i + 2))[0].strip()
                        except:
                            warehouse_time = '2000-01-01'

                        try:
                            # 价格
                            drug_price = ''
                        except:
                            drug_price = ''

                        # 创建时间
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                    time.localtime())

                        update_time = create_time

                        table_name = 'order_metadata_jiaxingyinte'

                        drug_hashs = "%s %s %s %s" % (
                            drug_name, drug_specification, delivery_id,
                            supplier_name)
                        md5 = hashlib.md5()
                        md5.update(bytes(drug_hashs, encoding="utf-8"))
                        drug_hash = md5.hexdigest()
                        hospital_hashs = "%s %s %s" % (
                            delivery_id, hospital_name, hospital_address)
                        md5 = hashlib.md5()
                        md5.update(bytes(hospital_hashs, encoding="utf-8"))
                        hospital_hash = md5.hexdigest()
                        stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                            company_id, delivery_id, bill_type, drug_hash,
                            drug_unit, abs(drug_number), drug_batch,
                            valid_till, hospital_hash, sell_time)
                        md5 = hashlib.md5()
                        md5.update(bytes(stream_hashs, encoding="utf-8"))
                        stream_hash = md5.hexdigest()
                        month = int(str(self.fist).replace('-', '')[0:6])

                        sql_crm = "insert into order_metadata_jiaxingyinte(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                        sql_data_crm = sql_crm.format(
                            company_id, delivery_id, delivery_name,
                            data_version, data_type, bill_type, drug_name,
                            trade_id, sales_ticket_id,
                            drug_specification, supplier_name, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, sell_time,
                            warehouse_time, create_time, update_time,
                            drug_hash, hospital_hash, stream_hash, drug_price,
                            month)
                        # print('sql_data', sql_data_crm)
                        try:
                            self.db.ping()
                        except pymysql.MySQLError:
                            self.db.connect()

                        try:
                            self.cursor.execute(sql_data_crm)
                            self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm:%s' %
                                  (e, sql_data_crm))

                        self.cursor.execute(
                            'select max(id) from order_metadata_jiaxingyinte')
                        foreign_id = self.cursor.fetchone()[0]

                        sql_crm_data = SQL_CRM_DATA
                        sql_data_crm_data = sql_crm_data.format(
                            company_id, delivery_id, delivery_name, table_name,
                            foreign_id, data_version, data_type, bill_type,
                            drug_name, drug_specification,
                            supplier_name, drug_hash, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, hospital_hash,
                            month, sell_time, stream_hash, create_time,
                            update_time)

                        try:
                            if bill_type != 5:
                                self.cursor.execute(sql_data_crm_data)
                                self.db.commit()
                                self.crm_cursor.execute(sql_data_crm_data)
                                self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_data:%s' %
                                  (e, sql_data_crm_data))

                try:
                    crm_request_data = {
                        'version': delivery_id + "-" + self.time_stamp,
                        'streamType': streamType,
                    }
                    html = requests.post(url=CRM_REQUEST_URL,
                                         data=crm_request_data,
                                         headers=self.headers,
                                         verify=False)
                    self.classify_success = json.loads(
                        html.content.decode('utf-8'))['success']
                except:
                    print('爬虫调取后端接口错误')

                get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                get_date = int(time.strftime("%Y%m%d", time.localtime()))
                get_status = 1
                if MONTHS == 0:
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_jiaxingyinte WHERE sell_time='{}' and delivery_name='{}'"
                        .format(self.yesterday, enterprise_name))
                else:
                    month = int(str(self.fist).replace('-', '')[0:6])
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_jiaxingyinte WHERE month='{}' and delivery_name='{}'"
                        .format(month, enterprise_name))
                data_num = self.cursor.fetchone()[0]
                remark = ''
                create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())
                update_time = create_time
                sql_crm_record = SQL_CRM_RECORD
                sql_data_crm_record = sql_crm_record.format(
                    company_id, delivery_id, enterprise_name, get_account,
                    '21-jiaxingyinte', delivery_id + "-" + self.time_stamp,
                    get_time, get_date, get_status, data_num,
                    self.classify_success, remark, create_time, update_time)

                try:
                    self.cursor.execute(sql_data_crm_record)
                    self.db.commit()
                    self.crm_cursor.execute(sql_data_crm_record)
                    self.crm_db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_record:%s' %
                          (e, sql_data_crm_record))

                sql_crm_version = SQL_CRM_VERSION
                sql_data_crm_version = sql_crm_version.format(
                    delivery_id + "-" + self.time_stamp, enterprise_name,
                    company_id, create_time, update_time, data_num, remark)

                try:
                    self.cursor.execute(sql_data_crm_version)
                    self.db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_version:%s' %
                          (e, sql_data_crm_version))

            except Exception as e:
                print('jiaxingyinte-登入失败:%s' % e)
                print('self.number', self.number)
                if self.number < 4:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                    get_time = create_time
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_jiaxingyinte WHERE sell_time='{}' and delivery_name='{}'"
                            .format(self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0:6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_jiaxingyinte WHERE month='{}' and delivery_name='{}'"
                            .format(month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(
                        company_id, delivery_id, enterprise_name, get_account,
                        '21-jiaxingyinte', delivery_id + "-" + self.time_stamp,
                        get_time, get_date, get_status, data_num,
                        self.classify_success, remark, create_time,
                        update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' %
                              (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')
Ejemplo n.º 5
0
    def parse(self, response):
        delivery_id = 'DDA1001003'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'"
            .format(delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            self.number += 1
            # ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
            login_url = "http://www.zjhuiren.com/login.asp"
            post_url = "http://www.zjhuiren.com/login.asp?action=loginsub"

            res1 = self.sess_huiren.get(login_url, headers=self.headers)
            selector = Selector(text=res1.text)
            # print(dict_from_cookiejar(res1.cookies))
            random_value = random.randint(1, 9)
            k = selector.css(
                "input[name='codeKey']::attr(value)").extract_first()
            # print(k)

            code_url = "http://www.zjhuiren.com/DvCode.asp?k=%s&" % (k)
            headers = {
                "User-Agent":
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
                "Host": "www.zjhuiren.com",
                "Referer": "http://www.zjhuiren.com/login.asp"
            }
            res2 = self.sess_huiren.get(code_url, headers=headers)
            # print(res2.text)
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(
                    os.path.dirname(os.path.dirname(__file__)))
                files = r'{}{}static{}03-huiren'.format(path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                with open(
                        r'{}{}static{}03-huiren{}yzm.jpg'.format(
                            path, symbol, symbol, symbol), 'wb') as f:
                    f.write(res2.content)
                filename = r'{}{}static{}03-huiren{}yzm.jpg'.format(
                    path, symbol, symbol, symbol)
            else:
                with open(r'./03-huirenyzm.jpg', 'wb') as f:
                    f.write(res2.content)
                filename = r'./03-huirenyzm.jpg'
            codetype = 4000
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            result = ydm.run(filename, codetype, timeout)
            # print(result)
            code = result[1]
            # code = input("请输入code")

            post_data = {
                "UserID": get_account,
                "UserPass": get_pwd,
                "codeKey": k,
                "code": code,
                "B1": "提交"
            }
            res3 = self.sess_huiren.post(post_url,
                                         data=post_data,
                                         headers=headers)
            res4 = self.sess_huiren.get(
                "http://www.zjhuiren.com/manager.asp?imark=1&ID=1888",
                headers=headers)

            data = {
                # "bgtime": "2018-10-01",
                "bgtime": self.fist,
                # "ovtime": "2018-10-15",
                "ovtime": self.last,
                "oldshow": "purchase",
                "px": "rq",
                "spbm": "",
                "tym": "",
                "dwmch": "",
            }

            data_resp = self.sess_huiren.post(
                url='http://www.zjhuiren.com/gjmx.asp?action=goselect&ID=1888',
                data=data,
                headers=self.headers,
                verify=False)
            # print('11' * 1000)
            # print(data_resp.content.decode('utf-8'))
            # print('11' * 1000)
            try:
                re.findall(r'日期', data_resp.content.decode('utf-8'))[0]
                try:
                    page = int(
                        re.findall(r'【页次:1/(.+?)页】',
                                   data_resp.content.decode('utf-8'))[0]) + 1
                except:
                    page = 0
                # print('page', page)
                # time.sleep(10)
                for i in range(1, page):
                    data_resp = self.sess_huiren.get(
                        url=
                        'http://www.zjhuiren.com/gjmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=purchase'
                        .format(i, self.fist, self.last),
                        # data_resp = self.sess_huiren.get(url='http://www.zjhuiren.com/gjmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=purchase'.format(i, '2018-10-01', '2018-10-15'),
                        headers=self.headers,
                        verify=False)
                    # print('*' * 1000)
                    data_resps = etree.HTML(data_resp.content.decode('utf-8'))
                    data_len = data_resps.xpath('/html/body/table[2]/tr')
                    # print(data_len)
                    # print(len(data_len))
                    md5 = hashlib.md5()
                    for i in range(2, int(len(data_len))):
                        company_id = company_id
                        delivery_id = delivery_id
                        delivery_name = enterprise_name
                        data_version = delivery_id + "-" + self.time_stamp
                        data_type = 1
                        bill_type = 1
                        try:
                            drug_name = data_resps.xpath(
                                '/html/body/table[2]/tr[%s]/td[3]/text()' %
                                i)[0].strip()
                        except:
                            drug_name = 1

                        if drug_name != 1:
                            try:
                                drug_specification = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[5]/text()' %
                                    i)[0].strip()
                            except:
                                drug_specification = ''

                            try:
                                supplier_name = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[6]/text()' %
                                    i)[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                drug_unit = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[7]/text()' %
                                    i)[0].strip()
                            except:
                                drug_unit

                            try:
                                drug_number = int(
                                    data_resps.xpath(
                                        '/html/body/table[2]/tr[%s]/td[8]/text()'
                                        % i)[0].strip())
                                if drug_number < 0:
                                    bill_type = 2
                            except:
                                drug_number = 0

                            try:
                                drug_batch = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[10]/text()'
                                    % i)[0].strip()
                            except:
                                drug_batch = ''
                            try:
                                valid_till = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[9]/text()' %
                                    i)[0].strip()
                                if not valid_till:
                                    valid_till = '2000-01-01'
                            except:
                                valid_till = '2000-01-01'

                            try:
                                if bill_type == 1 or bill_type == 2:
                                    hospital_name = ''
                                else:
                                    hospital_name = data_resps.xpath(
                                        '/html/body/table[2]/tr[%s]/td[13]/text()'
                                        % i)[0].strip()
                            except:
                                hospital_name = ''

                            try:
                                hospital_address = ''
                            except:
                                hospital_address = ''

                            try:
                                sell_time = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[1]/text()' %
                                    i)[0].strip()
                            except:
                                sell_time = ''
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())
                            update_time = create_time

                            table_name = 'order_metadata_huiren'
                            try:
                                drug_price = ''
                            except:
                                drug_price = ''
                            try:
                                drug_price_sum = ''
                            except:
                                drug_price_sum
                            try:
                                goods_id = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[2]/text()' %
                                    i)[0].strip()
                            except:
                                goods_id = ''
                            try:
                                trade_name = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[4]/text()' %
                                    i)[0].strip()
                            except:
                                trade_name = ''
                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_huiren(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, sell_time,
                                create_time, update_time, drug_price,
                                drug_price_sum, goods_id, trade_name,
                                drug_hash, hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_huiren')
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)

                            try:
                                self.crm_db.ping()
                            except pymysql.MySQLError:
                                self.crm_db.connect()
                            try:
                                self.cursor.execute(sql_data_crm_data)
                                self.db.commit()
                                self.crm_cursor.execute(sql_data_crm_data)
                                self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))


                data = {
                    # "bgtime": "2018-10-01",
                    "bgtime": self.fist,
                    # "ovtime": "2018-10-15",
                    "ovtime": self.last,
                    "oldshow": "sale",
                    "px": "rq",
                    "spbm": "",
                    "tym": "",
                    "dwmch": "",
                }

                data_resp = self.sess_huiren.post(
                    url=
                    'http://www.zjhuiren.com/xsmx.asp?action=goselect&ID=1888',
                    data=data,
                    headers=self.headers,
                    verify=False)
                try:
                    page = int(
                        re.findall(r'【页次:1/(.+?)页】',
                                   data_resp.content.decode('utf-8'))[0]) + 1
                except:
                    page = 1
                # print(page)
                # time.sleep(10)
                for i in range(1, page):
                    # url = 'http://www.zjhuiren.com/xsmx.asp?Page=%s&ID=1888&bgtime=2018-10-01&ovtime=2018-10-15&spbm=&tym=&dwmch=&px=rq&oldshow=sale' % i
                    url = 'http://www.zjhuiren.com/xsmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=sale'.format(
                        i, self.fist, self.last)
                    # print(url)
                    data_resp = self.sess_huiren.get(url=url,
                                                     headers=self.headers,
                                                     verify=False)
                    # print('*' * 1000)
                    data_resps = etree.HTML(data_resp.content.decode('utf-8'))
                    data_len = data_resps.xpath('/html/body/table[2]/tr')
                    # print(data_len)
                    # print(len(data_len))
                    md5 = hashlib.md5()
                    for i in range(2, int(len(data_len))):
                        company_id = company_id
                        delivery_id = delivery_id
                        delivery_name = enterprise_name
                        data_version = delivery_id + "-" + self.time_stamp
                        data_type = 1
                        bill_type = 3
                        try:
                            drug_name = data_resps.xpath(
                                '/html/body/table[2]/tr[%s]/td[3]/text()' %
                                i)[0].strip()
                        except:
                            drug_name = 1

                        if drug_name != 1:
                            try:
                                drug_specification = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[5]/text()' %
                                    i)[0].strip()
                            except:
                                drug_specification = ''

                            try:
                                supplier_name = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[6]/text()' %
                                    i)[0].strip()
                            except:
                                supplier_name = ''

                            try:
                                drug_unit = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[7]/text()' %
                                    i)[0].strip()
                            except:
                                drug_unit = ''

                            try:
                                drug_number = int(
                                    data_resps.xpath(
                                        '/html/body/table[2]/tr[%s]/td[9]/text()'
                                        % i)[0].strip())
                                if drug_number < 0:
                                    bill_type = 4
                            except:
                                drug_number = 0

                            try:
                                drug_batch = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[8]/text()' %
                                    i)[0].strip()
                            except:
                                drug_batch = ''
                            try:
                                valid_till = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[12]/text()'
                                    % i)[0].strip()
                                if not valid_till:
                                    valid_till = '2000-01-01'
                            except:
                                valid_till = '2000-01-01'

                            try:
                                if bill_type == 1 or bill_type == 2:
                                    hospital_name = ''
                                else:
                                    hospital_name = data_resps.xpath(
                                        '/html/body/table[2]/tr[%s]/td[13]/text()'
                                        % i)[0].strip()
                            except:
                                hospital_name = ''

                            try:
                                hospital_address = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[14]/text()'
                                    % i)[0].strip()
                            except:
                                hospital_address = ''

                            try:
                                sell_time = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[1]/text()' %
                                    i)[0].strip()
                            except:
                                sell_time = ''
                            create_time = time.strftime(
                                "%Y-%m-%d %H:%M:%S", time.localtime())
                            update_time = create_time

                            table_name = 'order_metadata_huiren'
                            try:
                                drug_price = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[10]/text()'
                                    % i)[0].strip()
                            except:
                                drug_price = ''
                            try:
                                drug_price_sum = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[11]/text()'
                                    % i)[0].strip()
                            except:
                                drug_price_sum
                            try:
                                goods_id = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[2]/text()' %
                                    i)[0].strip()
                            except:
                                goods_id = ''
                            try:
                                trade_name = data_resps.xpath(
                                    '/html/body/table[2]/tr[%s]/td[4]/text()' %
                                    i)[0].strip()
                            except:
                                trade_name = ''
                            drug_hashs = "%s %s %s %s" % (
                                drug_name, drug_specification, delivery_id,
                                supplier_name)
                            md5 = hashlib.md5()
                            md5.update(bytes(drug_hashs, encoding="utf-8"))
                            drug_hash = md5.hexdigest()
                            hospital_hashs = "%s %s %s" % (
                                delivery_id, hospital_name, hospital_address)
                            md5 = hashlib.md5()
                            md5.update(bytes(hospital_hashs, encoding="utf-8"))
                            hospital_hash = md5.hexdigest()
                            stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                                company_id, delivery_id, bill_type, drug_hash,
                                drug_unit, abs(drug_number), drug_batch,
                                valid_till, hospital_hash, sell_time)
                            md5 = hashlib.md5()
                            md5.update(bytes(stream_hashs, encoding="utf-8"))
                            stream_hash = md5.hexdigest()
                            month = int(str(self.fist).replace('-', '')[0:6])

                            sql_crm = "insert into order_metadata_huiren(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                            sql_data_crm = sql_crm.format(
                                company_id, delivery_id, delivery_name,
                                data_version, data_type, bill_type, drug_name,
                                drug_specification, supplier_name, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, sell_time,
                                create_time, update_time, drug_price,
                                drug_price_sum, goods_id, trade_name,
                                drug_hash, hospital_hash, stream_hash, month)
                            # print('sql_data', sql_data_crm)
                            try:
                                self.db.ping()
                            except pymysql.MySQLError:
                                self.db.connect()

                            try:
                                self.cursor.execute(sql_data_crm)
                                self.db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm:%s' %
                                      (e, sql_data_crm))

                            self.cursor.execute(
                                'select max(id) from order_metadata_huiren')
                            foreign_id = self.cursor.fetchone()[0]

                            sql_crm_data = SQL_CRM_DATA
                            sql_data_crm_data = sql_crm_data.format(
                                company_id, delivery_id, delivery_name,
                                table_name, foreign_id, data_version,
                                data_type, bill_type, drug_name,
                                drug_specification,
                                supplier_name, drug_hash, drug_unit,
                                abs(drug_number), drug_batch, valid_till,
                                hospital_name, hospital_address, hospital_hash,
                                month, sell_time, stream_hash, create_time,
                                update_time)
                            try:
                                self.crm_db.ping()
                            except pymysql.MySQLError:
                                self.crm_db.connect()

                            try:
                                self.cursor.execute(sql_data_crm_data)
                                self.db.commit()
                                self.crm_cursor.execute(sql_data_crm_data)
                                self.crm_db.commit()
                            except Exception as e:
                                print('插入失败:%s  sql_data_crm_data:%s' %
                                      (e, sql_data_crm_data))

                try:
                    crm_request_data = {
                        'version': delivery_id + "-" + self.time_stamp,
                        'streamType': streamType,
                    }
                    html = requests.post(url=CRM_REQUEST_URL,
                                         data=crm_request_data,
                                         headers=self.headers,
                                         verify=False)
                    self.classify_success = json.loads(
                        html.content.decode('utf-8'))['success']
                except:
                    print('爬虫调取后端接口错误')

                get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                get_date = int(time.strftime("%Y%m%d", time.localtime()))
                get_status = 1
                if MONTHS == 0:
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_huiren WHERE sell_time='{}' and delivery_name='{}'"
                        .format(self.yesterday, enterprise_name))
                else:
                    month = int(str(self.fist).replace('-', '')[0:6])
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_huiren WHERE month='{}' and delivery_name='{}'"
                        .format(month, enterprise_name))
                data_num = self.cursor.fetchone()[0]
                remark = ''
                create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())
                update_time = create_time
                sql_crm_record = SQL_CRM_RECORD
                sql_data_crm_record = sql_crm_record.format(
                    company_id, delivery_id, enterprise_name, get_account,
                    '03-huiren', delivery_id + "-" + self.time_stamp, get_time,
                    get_date, get_status, data_num, self.classify_success,
                    remark, create_time, update_time)

                try:
                    self.cursor.execute(sql_data_crm_record)
                    self.db.commit()
                    self.crm_cursor.execute(sql_data_crm_record)
                    self.crm_db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_record:%s' %
                          (e, sql_data_crm_record))

                sql_crm_version = SQL_CRM_VERSION
                sql_data_crm_version = sql_crm_version.format(
                    delivery_id + "-" + self.time_stamp, enterprise_name,
                    company_id, create_time, update_time, data_num, remark)

                try:
                    self.cursor.execute(sql_data_crm_version)
                    self.db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_version:%s' %
                          (e, sql_data_crm_version))

            except Exception as e:
                print('huiren-登入失败:%s' % e)
                print('self.number', self.number)
                if self.number < 8:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                    get_time = create_time
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_huiren WHERE sell_time='{}' and delivery_name='{}'"
                            .format(self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0:6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_huiren WHERE month='{}' and delivery_name='{}'"
                            .format(month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(
                        company_id, delivery_id, enterprise_name, get_account,
                        '03-huiren', delivery_id + "-" + self.time_stamp,
                        get_time, get_date, get_status, data_num,
                        self.classify_success, remark, create_time,
                        update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' %
                              (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')
Ejemplo n.º 6
0
    def parse(self, response):
        # delivery_id = 'F617B115D6F3447983E94BB781231231'
        delivery_id = 'DDA1001009'
        self.crm_cursor.execute(
            "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'"
            .format(delivery_id))

        data_tupl = self.crm_cursor.fetchall()
        for data_info in data_tupl:
            company_id = data_info[0]
            enterprise_name = data_info[1]
            get_account = data_info[2]
            get_pwd = data_info[3]
            is_enable = data_info[4]

        if is_enable == 1:
            self.number += 1
            html = self.sess_shangyao.get(url=self.start_urls[0],
                                          headers=self.headers,
                                          verify=False)
            resp = etree.HTML(html.content.decode('utf-8'))
            post_url = 'http://passport.shaphar.com/' + resp.xpath(
                '//*[@id="form1"]/@action')[0]
            lt = resp.xpath('//*[@name="lt"]/@value')[0]
            image = self.sess_shangyao.get(
                url='http://passport.shaphar.com/cas-webapp-server/kaptcha.jpg',
                headers=self.headers,
                verify=False)
            # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies))
            if SCRAPYD_TYPE == 1:
                if 'indow' in platform.system():
                    symbol = r'\\'
                else:
                    symbol = r'/'
                path = os.path.dirname(
                    os.path.dirname(os.path.dirname(__file__)))
                files = r'{}{}static{}08-shangyao'.format(path, symbol, symbol)
                if not os.path.exists(files):
                    os.makedirs(files)
                with open(
                        r'{}{}static{}08-shangyao{}yzm.jpg'.format(
                            path, symbol, symbol, symbol), 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'{}{}static{}08-shangyao{}yzm.jpg'.format(
                    path, symbol, symbol, symbol)
            else:
                with open(r'./08-shangyaoyzm.jpg', 'wb') as f:
                    f.write(image.content)
                # 图片文件
                filename = r'./08-shangyaoyzm.jpg'

            codetype = 1004
            # 超时时间,秒
            timeout = 60
            ydm = YDMHttp()
            cid, code_result = ydm.run(filename, codetype, timeout)
            # yzm = input('请输入验证码:')
            # print('cid:%s   code_result:%s' % (cid, code_result))
            yzm = code_result
            # yzm = input('请输入验证码:')
            data = {
                "username": get_account,
                "password": get_pwd,
                "captcha": yzm,
                "lt": lt,
                "_eventId": "submit",
                "submit": "登录",
            }
            self.sess_shangyao.post(url=post_url,
                                    data=data,
                                    headers=self.headers,
                                    verify=False)
            self.sess_shangyao.get(
                url=
                'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs&portalname=FE8EC3D50BFD98BBC9C1D07E55C9E019',
                headers=self.headers,
                verify=False)

            # '''
            try:
                js_html = self.sess_shangyao.get(
                    url=
                    'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22078',
                    headers=self.headers,
                    verify=False)
                sessionID = re.findall(r'sessionID=(.+?)"',
                                       js_html.content.decode('gbk'))[0]

                time_data = {
                    '__parameters__':
                    '{"AS_CLIENT":"","ENDDATE":"%s","COM_GOODS":"","AS_SALE_TYPE":false,"TCXT":false,"INV_OWNER":"","LABEL0":"[5e93][5b58][62e5][6709][8005][ff1a]","AS_DATE_TYPE":"SEND","SALE_ORG":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL0_C_C_C_C_C":"[4ea7][54c1][ff1a]","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C_C_C":"[5ba2][6237][ff1a]","LABEL0_C_C_C":"[9500][552e][90e8][95e8][ff1a]","UPDATE":"%s","STARTDATE":"%s"}'
                    % (self.last, self.last, self.fist),
                }
                time_url = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format(
                    sessionID)
                self.sess_shangyao.post(url=time_url,
                                        data=time_data,
                                        headers=self.headers,
                                        verify=False)
                time_time = int(time.time() * 1000)
                data_url = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format(
                    time_time, sessionID)
                data_htmls = self.sess_shangyao.get(url=data_url,
                                                    headers=self.headers,
                                                    verify=False)
                data_resps = etree.HTML(data_htmls.content.decode('gbk'))

                # --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                js_html = self.sess_shangyao.get(
                    url=
                    'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22079',
                    headers=self.headers,
                    verify=False)
                sessionID = re.findall(r'sessionID=(.+?)"',
                                       js_html.content.decode('gbk'))[0]
                time_data_bian = {
                    '__parameters__':
                    '{"AS_CLIENT":"","ENDDATE":"%s","AS_LOT":"","LABEL0_C_C_C_C_C_C_C_C":"[6279][53f7][ff1a]","AS_COM_GOODS":"","AS_SALE_TYPE":false,"TCXT":false,"AS_INV_OWNER":"","LABEL0":"[5e93][5b58][62e5][6709][8005][ff1a]","AS_DATE_TYPE":"SEND","AS_SALE_ORG":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL0_C_C_C_C_C":"[4ea7][54c1][ff1a]","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C_C_C":"[5ba2][6237][ff1a]","LABEL0_C_C_C":"[9500][552e][90e8][95e8][ff1a]","UPDATE":"%s","STARTDATE":"%s"}'
                    % (self.last, self.last, self.fist)
                }
                time_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format(
                    sessionID)
                self.sess_shangyao.post(url=time_url_bian,
                                        data=time_data_bian,
                                        headers=self.headers,
                                        verify=False)
                time_times = int(time.time() * 1000)
                data_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format(
                    time_times, sessionID)
                data_htmls_bian = self.sess_shangyao.get(url=data_url_bian,
                                                         headers=self.headers,
                                                         verify=False)
                data_resps_bian = etree.HTML(
                    data_htmls_bian.content.decode('gbk'))
                data_len = len(
                    data_resps_bian.xpath(
                        '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr'
                    ))
                # print(data_len)
                md5 = hashlib.md5()
                for i in range(data_len):
                    # 入驻企业id
                    company_id = company_id
                    # 配送公司id
                    delivery_id = delivery_id
                    # 配送公司名称
                    delivery_name = enterprise_name
                    # 数据版本号
                    data_version = delivery_id + "-" + self.time_stamp
                    # 数据类型:1,phython 2,导入
                    data_type = 1
                    # 单据类型:1进货,2退货,3销售,4销售退货
                    bill_type = 3
                    try:
                        drug_name = data_resps.xpath(
                            '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                            % (i + 1))[0].strip().split('-')[0]
                    except:
                        drug_name = 1

                    if drug_name != 1:
                        try:
                            # 药品规格
                            drug_specification = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                                % (i + 1))[0].strip().split('-')[1]
                        except:
                            drug_specification = ''

                        try:
                            # 生产企业
                            supplier_name = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                                % (i + 1))[0].strip().split('-')[2]
                        except:
                            supplier_name = ''

                        try:
                            # 计量单位(瓶,盒等)
                            drug_unit = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_unit = ''

                        try:
                            # 出库数量
                            drug_number = int(
                                data_resps.xpath(
                                    '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[8]/div/text()'
                                    % (i + 1))[0].strip())
                            if drug_number < 0:
                                bill_type = 4
                        except:
                            drug_number = 0

                        try:
                            # 订单数量
                            indent_number = int(
                                data_resps.xpath(
                                    '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[7]/div/text()'
                                    % (i + 1))[0].strip())
                        except:
                            indent_number = 0

                        try:
                            # 批号
                            drug_batch = data_resps_bian.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[8]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_batch = ''

                        try:
                            # 有效期至
                            valid_till = data_resps_bian.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[9]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            valid_till = '2000-01-01'

                        try:
                            # 医院(终端)名称
                            hospital_name = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[6]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            hospital_name = ''

                        try:
                            # 医院(终端)地址
                            hospital_address = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[11]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            hospital_address = ''

                        try:
                            # 销售(制单)时间
                            sell_time = data_resps.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            sell_time = '2000-01-01'

                        try:
                            # 出库日期
                            out_put_time = data_resps.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            out_put_time = '2000-01-01'

                        # 创建时间
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                    time.localtime())
                        update_time = create_time

                        table_name = 'order_metadata_shangyao'

                        try:
                            # 单价
                            drug_price = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[9]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_price = ''
                        try:
                            # 单次总价
                            drug_price_sum = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[10]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_price_sum = ''

                        try:
                            # 商品id
                            goods_id = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            goods_id = ''

                        try:
                            # 订单号
                            order_number = data_resps.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            order_number = ''

                        try:
                            # 销售部门
                            sales_departments = data_resps.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            sales_departments = '西药销售部'

                        drug_hashs = "%s %s %s %s" % (
                            drug_name, drug_specification, delivery_id,
                            supplier_name)
                        md5 = hashlib.md5()
                        md5.update(bytes(drug_hashs, encoding="utf-8"))
                        drug_hash = md5.hexdigest()
                        hospital_hashs = "%s %s %s" % (
                            delivery_id, hospital_name, hospital_address)
                        md5 = hashlib.md5()
                        md5.update(bytes(hospital_hashs, encoding="utf-8"))
                        hospital_hash = md5.hexdigest()
                        stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                            company_id, delivery_id, bill_type, drug_hash,
                            drug_unit, abs(drug_number), drug_batch,
                            valid_till, hospital_hash, sell_time)
                        md5 = hashlib.md5()
                        md5.update(bytes(stream_hashs, encoding="utf-8"))
                        stream_hash = md5.hexdigest()
                        month = int(str(self.fist).replace('-', '')[0:6])

                        sql_crm = "insert into order_metadata_shangyao(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                        sql_data_crm = sql_crm.format(
                            company_id, delivery_id, delivery_name,
                            data_version, data_type, bill_type, drug_name,
                            drug_specification, supplier_name, drug_unit,
                            abs(drug_number), indent_number, drug_batch,
                            valid_till, hospital_name, hospital_address,
                            sell_time, out_put_time, create_time, update_time,
                            drug_price, drug_price_sum, goods_id, order_number,
                            sales_departments, drug_hash, hospital_hash,
                            stream_hash, month)
                        # print('sql_data', sql_data_crm)
                        try:
                            self.db.ping()
                        except pymysql.MySQLError:
                            self.db.connect()

                        try:
                            self.cursor.execute(sql_data_crm)
                            self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm:%s' %
                                  (e, sql_data_crm))

                        self.cursor.execute(
                            'select max(id) from order_metadata_shangyao')
                        foreign_id = self.cursor.fetchone()[0]

                        sql_crm_data = SQL_CRM_DATA
                        sql_data_crm_data = sql_crm_data.format(
                            company_id, delivery_id, delivery_name, table_name,
                            foreign_id, data_version, data_type, bill_type,
                            drug_name, drug_specification,
                            supplier_name, drug_hash, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, hospital_hash,
                            month, sell_time, stream_hash, create_time,
                            update_time)

                        try:
                            self.cursor.execute(sql_data_crm_data)
                            self.db.commit()
                            self.crm_cursor.execute(sql_data_crm_data)
                            self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_data:%s' %
                                  (e, sql_data_crm_data))

                # '''
                # -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                jin_html = self.sess_shangyao.get(
                    url=
                    'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22077',
                    headers=self.headers,
                    verify=False)
                sessionID = re.findall(r'sessionID=(.+?)"',
                                       jin_html.content.decode('gbk'))[0]
                time_data_jin = {
                    '__parameters__':
                    '{"AS_COM_GOODS":"","ENDDATE":"%s","LABEL5":"[4ed3][5e93][ff1a]","LABEL3":"[5e93][5b58][62e5][6709][8005][ff1a]","LABEL2":"[4ea7][54c1][ff1a]","LABEL0":"[91c7][8d2d][65e5][671f][ff1a]","AS_INV_STORAGE":"","AS_INV_OWNER":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C":"[2014]","UPDATE":"%s","STARTDATE":"%s"}'
                    % (self.last, self.last, self.fist)
                }
                time_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format(
                    sessionID)
                self.sess_shangyao.post(url=time_url_bian,
                                        data=time_data_jin,
                                        headers=self.headers,
                                        verify=False)
                time_times = int(time.time() * 1000)
                data_url_jin = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format(
                    time_times, sessionID)
                data_htmls_jin = self.sess_shangyao.get(url=data_url_jin,
                                                        headers=self.headers,
                                                        verify=False)
                data_resps_jin = etree.HTML(
                    data_htmls_jin.content.decode('gbk'))
                data_len = len(
                    data_resps_jin.xpath(
                        '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr'
                    ))
                # print(data_len)
                md5 = hashlib.md5()
                for i in range(data_len):
                    # 入驻企业id
                    company_id = company_id
                    # 配送公司id
                    delivery_id = delivery_id
                    # 配送公司名称
                    delivery_name = enterprise_name
                    # 数据版本号
                    data_version = delivery_id + "-" + self.time_stamp
                    # 数据类型:1,phython 2,导入
                    data_type = 1
                    # 单据类型:1进货,2退货,3销售,4销售退货
                    bill_type = 1
                    try:
                        drug_name = data_resps_jin.xpath(
                            '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()'
                            % (i + 1))[0].strip().split('-')[0]
                    except:
                        drug_name = 1

                    if drug_name != 1:
                        try:
                            # 药品规格
                            drug_specification = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()'
                                % (i + 1))[0].strip().split('-')[1]
                        except:
                            drug_specification = ''

                        try:
                            # 生产企业
                            supplier_name = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()'
                                % (i + 1))[0].strip().split('-')[2]
                        except:
                            supplier_name = ''

                        try:
                            # 计量单位(瓶,盒等)
                            drug_unit = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_unit = ''

                        try:
                            # 出库数量
                            drug_number = int(
                                data_resps_jin.xpath(
                                    '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                                    % (i + 1))[0].strip())
                            if drug_number < 0:
                                bill_type = 2
                        except:
                            drug_number = 0

                        try:
                            # 订单数量
                            indent_number = int(0)
                        except:
                            indent_number = 0

                        try:
                            # 批号
                            drug_batch = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_batch = ''

                        try:
                            # 有效期至
                            valid_till = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[5]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            valid_till = ''

                        try:
                            # 医院(终端)名称
                            hospital_name = ''
                        except:
                            hospital_name = ''

                        try:
                            # 医院(终端)地址
                            hospital_address = ''
                        except:
                            hospital_address = ''

                        try:
                            # 销售(制单)时间
                            sell_time = data_resps_jin.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            sell_time = '2000-01-01'

                        try:
                            # 出库日期
                            out_put_time = data_resps_jin.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            out_put_time = '2000-01-01'

                        # 创建时间
                        create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                    time.localtime())
                        update_time = create_time

                        table_name = 'order_metadata_shangyao'

                        try:
                            # 单价
                            drug_price = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[6]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_price = ''
                        try:
                            # 单次总价
                            drug_price_sum = data_resps_jin.xpath(
                                '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[7]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            drug_price_sum = ''

                        try:
                            # 商品id
                            goods_id = data_resps_jin.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[5]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            goods_id = ''

                        try:
                            # 订单号
                            order_number = data_resps_jin.xpath(
                                '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()'
                                % (i + 1))[0].strip()
                        except:
                            order_number = ''

                        try:
                            # 销售部门
                            sales_departments = ''
                        except:
                            sales_departments = ''

                        drug_hashs = "%s %s %s %s" % (
                            drug_name, drug_specification, delivery_id,
                            supplier_name)
                        md5 = hashlib.md5()
                        md5.update(bytes(drug_hashs, encoding="utf-8"))
                        drug_hash = md5.hexdigest()
                        hospital_hashs = "%s %s %s" % (
                            delivery_id, hospital_name, hospital_address)
                        md5 = hashlib.md5()
                        md5.update(bytes(hospital_hashs, encoding="utf-8"))
                        hospital_hash = md5.hexdigest()
                        stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % (
                            company_id, delivery_id, bill_type, drug_hash,
                            drug_unit, abs(drug_number), drug_batch,
                            valid_till, hospital_hash, sell_time)
                        md5 = hashlib.md5()
                        md5.update(bytes(stream_hashs, encoding="utf-8"))
                        stream_hash = md5.hexdigest()
                        month = int(str(self.fist).replace('-', '')[0:6])

                        sql_crm = "insert into order_metadata_shangyao(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})"
                        sql_data_crm = sql_crm.format(
                            company_id, delivery_id, delivery_name,
                            data_version, data_type, bill_type, drug_name,
                            drug_specification, supplier_name, drug_unit,
                            abs(drug_number), indent_number, drug_batch,
                            valid_till, hospital_name, hospital_address,
                            sell_time, out_put_time, create_time, update_time,
                            drug_price, drug_price_sum, goods_id, order_number,
                            sales_departments, drug_hash, hospital_hash,
                            stream_hash, month)
                        # print('sql_data', sql_data_crm)
                        try:
                            self.db.ping()
                        except pymysql.MySQLError:
                            self.db.connect()

                        try:
                            self.cursor.execute(sql_data_crm)
                            self.db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm:%s' %
                                  (e, sql_data_crm))

                        self.cursor.execute(
                            'select max(id) from order_metadata_shangyao')
                        foreign_id = self.cursor.fetchone()[0]

                        sql_crm_data = SQL_CRM_DATA
                        sql_data_crm_data = sql_crm_data.format(
                            company_id, delivery_id, delivery_name, table_name,
                            foreign_id, data_version, data_type, bill_type,
                            drug_name, drug_specification,
                            supplier_name, drug_hash, drug_unit,
                            abs(drug_number), drug_batch, valid_till,
                            hospital_name, hospital_address, hospital_hash,
                            month, sell_time, stream_hash, create_time,
                            update_time)

                        try:
                            self.cursor.execute(sql_data_crm_data)
                            self.db.commit()
                            self.crm_cursor.execute(sql_data_crm_data)
                            self.crm_db.commit()
                        except Exception as e:
                            print('插入失败:%s  sql_data_crm_data:%s' %
                                  (e, sql_data_crm_data))

                try:
                    crm_request_data = {
                        'version': delivery_id + "-" + self.time_stamp,
                        'streamType': streamType,
                    }
                    html = requests.post(url=CRM_REQUEST_URL,
                                         data=crm_request_data,
                                         headers=self.headers,
                                         verify=False)
                    self.classify_success = json.loads(
                        html.content.decode('utf-8'))['success']
                except:
                    print('爬虫调取后端接口错误')

                get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
                get_date = int(time.strftime("%Y%m%d", time.localtime()))
                get_status = 1
                if MONTHS == 0:
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_shangyao WHERE sell_time='{}' and delivery_name='{}'"
                        .format(self.yesterday, enterprise_name))
                else:
                    month = int(str(self.fist).replace('-', '')[0:6])
                    self.cursor.execute(
                        "SELECT count(*) from order_metadata_shangyao WHERE month='{}' and delivery_name='{}'"
                        .format(month, enterprise_name))
                data_num = self.cursor.fetchone()[0]
                remark = ''
                create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                            time.localtime())
                update_time = create_time
                sql_crm_record = SQL_CRM_RECORD
                sql_data_crm_record = sql_crm_record.format(
                    company_id, delivery_id, enterprise_name, get_account,
                    '08-shangyao', delivery_id + "-" + self.time_stamp,
                    get_time, get_date, get_status, data_num,
                    self.classify_success, remark, create_time, update_time)

                try:
                    self.cursor.execute(sql_data_crm_record)
                    self.db.commit()
                    self.crm_cursor.execute(sql_data_crm_record)
                    self.crm_db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_record:%s' %
                          (e, sql_data_crm_record))

                sql_crm_version = SQL_CRM_VERSION
                sql_data_crm_version = sql_crm_version.format(
                    delivery_id + "-" + self.time_stamp, enterprise_name,
                    company_id, create_time, update_time, data_num, remark)

                try:
                    self.cursor.execute(sql_data_crm_version)
                    self.db.commit()
                except Exception as e:
                    print('插入失败:%s  sql_data_crm_version:%s' %
                          (e, sql_data_crm_version))

            except Exception as e:
                print('shangyao-登入失败:%s' % e)
                print('self.number', self.number)
                if self.number < 4:
                    self.parse('aa')
                else:
                    create_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                    get_time = create_time
                    get_date = int(time.strftime("%Y%m%d", time.localtime()))
                    get_status = 2
                    if MONTHS == 0:
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_shangyao WHERE sell_time='{}' and delivery_name='{}'"
                            .format(self.yesterday, enterprise_name))
                    else:
                        month = int(str(self.fist).replace('-', '')[0:6])
                        self.cursor.execute(
                            "SELECT count(*) from order_metadata_shangyao WHERE month='{}' and delivery_name='{}'"
                            .format(month, enterprise_name))
                    data_num = self.cursor.fetchone()[0]
                    remark = '账号或密码错了'
                    update_time = create_time
                    sql_crm_record = SQL_CRM_RECORD
                    sql_data_crm_record = sql_crm_record.format(
                        company_id, delivery_id, enterprise_name, get_account,
                        '08-shangyao', delivery_id + "-" + self.time_stamp,
                        get_time, get_date, get_status, data_num,
                        self.classify_success, remark, create_time,
                        update_time)

                    try:
                        self.cursor.execute(sql_data_crm_record)
                        self.db.commit()
                        self.crm_cursor.execute(sql_data_crm_record)
                        self.crm_db.commit()
                    except Exception as e:
                        print('插入失败:%s  sql_data_crm_record:%s' %
                              (e, sql_data_crm_record))
                    print('账号密码或者验证码错误')