def parse(self, response): self.number += 1 # delivery_id = 'F617B115D6F3447983E94BB781231244' delivery_id = 'DDA1001010' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'".format( delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: sell_time = 0 html = self.sess_xiaoshanyiyuan.get(url=self.start_urls[0], headers=self.headers, verify=False) resp = etree.HTML(html.content.decode('utf-8')) __VIEWSTATE = resp.xpath('//*[@id="__VIEWSTATE"]/@value')[0] image = self.sess_xiaoshanyiyuan.get( url="http://www.hzxsyy.com.cn:8080/verifyimage.aspx?time=%s" % (random.random()), headers=self.headers, verify=False) # print('image', image.url) # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies)) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) # print('path', path) files = r'{}{}static{}20-xiaoshanyiyuan'.format(path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) with open(r'{}{}static{}20-xiaoshanyiyuan{}yzm.jpg'.format(path, symbol, symbol, symbol), 'wb') as f: f.write(image.content) # 图片文件 filename = r'{}{}static{}20-xiaoshanyiyuan{}yzm.jpg'.format(path, symbol, symbol, symbol) else: with open(r'./20-xiaoshanyiyuanyzm.jpg', 'wb') as f: f.write(image.content) # 图片文件 filename = r'./20-xiaoshanyiyuanyzm.jpg' codetype = 4004 # 超时时间,秒 timeout = 60 ydm = YDMHttp() cid, code_result = ydm.run(filename, codetype, timeout) # yzm = input('请输入验证码:') # print('cid:%s code_result:%s' % (cid, code_result)) yzm = code_result # yzm = input('请输入验证码:') data = { "__VIEWSTATE": __VIEWSTATE, "UserName": get_account, "PassWord": get_pwd, "vcode": yzm, } self.sess_xiaoshanyiyuan.post( url="http://www.hzxsyy.com.cn:8080/syslogin.aspx?result=3&username=10690&txtCompanyID=22", data=data, headers=self.headers, verify=False) psot_data_html = self.sess_xiaoshanyiyuan.get( url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?/BNumberTrafficFlowQuery&rs:Command=Render&rc:Parameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format( self.fist, self.last), # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?/BNumberTrafficFlowQuery&rs:Command=Render&rc:Parameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=', headers=self.headers, verify=False) # print('psot_data_html', psot_data_html.content.decode('utf-8')) # print('*' * 1000) psot_data_html = etree.HTML(psot_data_html.content.decode('utf-8')) __VIEWSTATE = psot_data_html.xpath('//*[@id="__VIEWSTATE"]/@value')[0] __EVENTVALIDATION = psot_data_html.xpath('//*[@id="__EVENTVALIDATION"]/@value')[0] data_data = { "AjaxScriptManager": "AjaxScriptManager|ReportViewerControl$ctl09$Reserved_AsyncLoadTarget", "__EVENTTARGET": "ReportViewerControl$ctl09$Reserved_AsyncLoadTarget", "__EVENTARGUMENT": "", "__VIEWSTATE": __VIEWSTATE, "__EVENTVALIDATION": __EVENTVALIDATION, "NavigationCorrector$ScrollPosition": "", "NavigationCorrector$ViewState": "", "NavigationCorrector$PageState": "", "NavigationCorrector$NewViewState": "", "ReportViewerControl$ctl03$ctl00": "", "ReportViewerControl$ctl03$ctl01": "", "ReportViewerControl$ctl10": "ltr", "ReportViewerControl$ctl11": "quirks", "ReportViewerControl$AsyncWait$HiddenCancelField": "False", "ReportViewerControl$ctl04$ctl03$txtValue": "NetSrv_App3", "ReportViewerControl$ctl04$ctl05$txtValue": "192.168.18.1", "ReportViewerControl$ctl04$ctl07$txtValue": "HYg8AE7GMeP2W2YGWaIEpg==", "ReportViewerControl$ctl04$ctl09$txtValue": "mztZ8O0gn1HUBnbz9wW68Q==", "ReportViewerControl$ctl04$ctl11$txtValue": "22", "ReportViewerControl$ctl04$ctl13$txtValue": "CUSR201709041352", "ReportViewerControl$ctl04$ctl15$txtValue": "", "ReportViewerControl$ctl04$ctl17$txtValue": "", # "ReportViewerControl$ctl04$ctl19$txtValue": "2018-07-25", "ReportViewerControl$ctl04$ctl19$txtValue": self.fist, # "ReportViewerControl$ctl04$ctl21$txtValue": "2018-10-25", "ReportViewerControl$ctl04$ctl21$txtValue": self.last, "ReportViewerControl$ctl04$ctl23$txtValue": "", "ReportViewerControl$ctl04$ctl25$txtValue": "", "ReportViewerControl$ctl04$ctl27$txtValue": "", "ReportViewerControl$ToggleParam$store": "", "ReportViewerControl$ToggleParam$collapse": "true", "ReportViewerControl$ctl05$ctl00$CurrentPage": "", "ReportViewerControl$ctl05$ctl03$ctl00": "", "ReportViewerControl$ctl08$ClientClickedId": "", "ReportViewerControl$ctl07$store": "", "ReportViewerControl$ctl07$collapse": "false", "ReportViewerControl$ctl09$VisibilityState$ctl00": "None", "ReportViewerControl$ctl09$ScrollPosition": "", "ReportViewerControl$ctl09$ReportControl$ctl02": "", "ReportViewerControl$ctl09$ReportControl$ctl03": "", "ReportViewerControl$ctl09$ReportControl$ctl04": "100", "__ASYNCPOST": "true", } # print('data_data', data_data) # print('-' * 1000) data_html = self.sess_xiaoshanyiyuan.post( url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format( self.fist, self.last), # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=', data=data_data, headers=self.headers, verify=False) # print('data_html', data_html.content.decode('utf-8')) # print('*' * 1000) self.__VIEWSTATEs = re.findall(r'__VIEWSTATE\|(.+?)\|', data_html.content.decode('utf-8'))[0] self.__EVENTVALIDATIONs = re.findall(r'__EVENTVALIDATION\|(.+?)\|', data_html.content.decode('utf-8'))[0] data_html = etree.HTML(data_html.content.decode('utf-8')) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ try: self.NavigationCorrector = data_html.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0] data_len = int(len(data_html.xpath('//tr[@valign="top"]'))) - 1 # print('data_len', data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 # 表的名称 table_name = 'order_metadata_xiaoshanyiyuan' try: drug_name = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[ 0].strip().split( ' ')[ 0] except: drug_name = 0 if drug_name != 0: try: # 金额 drug_price_sum = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[ 0].strip() except Exception as e: drug_price_sum = '' # print('drug_price_sum e:', e) try: # 药品规格 drug_specification = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[ 0].strip().split( ' ')[1] except: drug_specification = '' try: # 生产企业 supplier_name = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % (i + 3))[ 0].strip().split( ' ')[2] except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[6]/div/text()' % (i + 3))[ 0].strip() except: drug_unit = '' try: # 部门 department = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[12]/div/text()' % (i + 3))[ 0].strip() except: department = '' try: # 类型 bill_types = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[2]/div/text()' % (i + 3))[ 0].strip() if bill_types == '进货': bill_type = 1 else: bill_type = 3 except: bill_type = 3 try: # 出库数量 drug_number = round(float(data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[5]/div/text()' % (i + 3))[ 0].strip())) if drug_number < 0: if bill_type == 1: bill_type = 2 if bill_type == 3: bill_type = 4 except: drug_number = 0 try: # 批号 drug_batch = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[4]/div/text()' % (i + 3))[ 0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (i + 3))[ 0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 if bill_type == 1 or bill_type == 2: hospital_name = '' else: hospital_name = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[3]/div/text()' % ( i + 3))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 if bill_type == 1 or bill_type == 2: hospital_address = '' else: hospital_address = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[13]/div/text()' % ( i + 3))[0].strip() except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[1]/div/text()' % (i + 3))[ 0].strip() if sell_time == '汇 总': sell_time = 0 except: sell_time = '2000-01-01' try: # 价格 drug_price = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[7]/div/text()' % (i + 3))[ 0].strip() # '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[0] # print('*'*1000) # print('drug_price_sum', drug_price_sum) # print('drug_price', drug_price) # print('*'*1000) except Exception as e: drug_price = '' # print('drug_price e:', e) try: # 业务编号 business_number = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % (i + 3))[ 0].strip() except: business_number = '' try: # 客户所属地区 customer_area = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[14]/div/text()' % (i + 3))[ 0].strip() except: customer_area = '' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time drug_hashs = "%s %s %s %s" % (drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % (delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0: 6]) sql_crm = "insert into order_metadata_xiaoshanyiyuan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: if sell_time != 0: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute('select max(id) from order_metadata_xiaoshanyiyuan') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format(company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if sell_time != 0: self.cursor.execute(sql_data_crm_data) self.db.commit() # self.crm_cursor.execute(sql_data_crm_data) # self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- self.NavigationCorrector = data_html.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0] for i in range(2, 1000): data_datas = { "AjaxScriptManager": "AjaxScriptManager|ReportViewerControl$ctl05$ctl00$CurrentPage", "NavigationCorrector$ScrollPosition": "", "NavigationCorrector$ViewState": "", "NavigationCorrector$PageState": "", "NavigationCorrector$NewViewState": self.NavigationCorrector, "ReportViewerControl$ctl03$ctl00": "", "ReportViewerControl$ctl03$ctl01": "", "ReportViewerControl$ctl10": "ltr", "ReportViewerControl$ctl11": "quirks", "ReportViewerControl$AsyncWait$HiddenCancelField": "False", "ReportViewerControl$ctl04$ctl03$txtValue": "NetSrv_App3", "ReportViewerControl$ctl04$ctl05$txtValue": "192.168.18.1", "ReportViewerControl$ctl04$ctl07$txtValue": "HYg8AE7GMeP2W2YGWaIEpg==", "ReportViewerControl$ctl04$ctl09$txtValue": "mztZ8O0gn1HUBnbz9wW68Q==", "ReportViewerControl$ctl04$ctl11$txtValue": "22", "ReportViewerControl$ctl04$ctl13$txtValue": "CUSR201709041352", "ReportViewerControl$ctl04$ctl15$txtValue": "", "ReportViewerControl$ctl04$ctl17$txtValue": "", # "ReportViewerControl$ctl04$ctl19$txtValue": "2018-07-25", "ReportViewerControl$ctl04$ctl19$txtValue": self.fist, # "ReportViewerControl$ctl04$ctl21$txtValue": "2018-10-25", "ReportViewerControl$ctl04$ctl21$txtValue": self.last, "ReportViewerControl$ctl04$ctl23$txtValue": "", "ReportViewerControl$ctl04$ctl25$txtValue": "", "ReportViewerControl$ctl04$ctl27$txtValue": "", "ReportViewerControl$ToggleParam$store": "", "ReportViewerControl$ToggleParam$collapse": "true", "ReportViewerControl$ctl05$ctl00$CurrentPage": i, "ReportViewerControl$ctl05$ctl03$ctl00": "", "ReportViewerControl$ctl08$ClientClickedId": "", "ReportViewerControl$ctl07$store": "", "ReportViewerControl$ctl07$collapse": "false", "ReportViewerControl$ctl09$VisibilityState$ctl00": "ReportPage", "ReportViewerControl$ctl09$ScrollPosition": "0 0", "ReportViewerControl$ctl09$ReportControl$ctl02": "", "ReportViewerControl$ctl09$ReportControl$ctl03": "", "ReportViewerControl$ctl09$ReportControl$ctl04": "100", "__EVENTTARGET": "ReportViewerControl$ctl05$ctl00$CurrentPage", "__EVENTARGUMENT": "", "__VIEWSTATE": self.__VIEWSTATEs, "__EVENTVALIDATION": self.__EVENTVALIDATIONs, "__ASYNCPOST": "true", } # print('data_datas', data_datas) data_htmls = self.sess_xiaoshanyiyuan.post( url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=22&UserID=CUSR201709041352&BizType=&GoodsID=&BeginDate={}&EndDate={}&serverNames=192.168.18.1&sqlName=NetSrv_App3&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName='.format( self.fist, self.last), # url='http://app1.yy5u.com:8080/ReportServer/Pages/ReportViewer.aspx?%2fBNumberTrafficFlowQuery&rs%3aCommand=Render&rc%3aParameters=false&SysCompanyID=1&UserID=CUSR201703151777&BizType=&GoodsID=&BeginDate=2017-10-01&EndDate=2018-10-20&serverNames=192.168.18.1&sqlName=NetSrv_App1&userName=mztZ8O0gn1HUBnbz9wW68Q%3d%3d&pass=HYg8AE7GMeP2W2YGWaIEpg%3d%3d&CustomerName=&BatchNo=&DepartName=', data=data_datas, headers=self.headers, verify=False) self.__VIEWSTATEs = re.findall(r'__VIEWSTATE\|(.+?)\|', data_htmls.content.decode('utf-8'))[0] self.__EVENTVALIDATIONs = \ re.findall(r'__EVENTVALIDATION\|(.+?)\|', data_htmls.content.decode('utf-8'))[0] # print('-' * 1000) # print("data_htmls.content.decode('utf-8')", data_htmls.content.decode('utf-8')) # print('-' * 1000) # try: data_htmls = etree.HTML(data_htmls.content.decode('utf-8')) self.NavigationCorrector = \ data_htmls.xpath('//input[@id="NavigationCorrector_NewViewState"]/@value')[0] data_len = int(len(data_htmls.xpath('//tr[@valign="top"]'))) - 1 # print('data_len', data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 # 表的名称 table_name = 'order_metadata_xiaoshanyiyuan' try: drug_name = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % ( i + 3))[ 0].strip().split(' ')[ 0] if not drug_name: drug_name = 0 except: drug_name = 0 if drug_name != 0: try: # 金额 drug_price_sum = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % ( i + 3))[0].strip() except Exception as e: drug_price_sum = '' # print('drug_price_sum e:', e) try: # 药品规格 drug_specification = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % ( i + 3))[0].strip().split( ' ')[1] except: drug_specification = '' try: # 生产企业 supplier_name = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[9]/div/text()' % ( i + 3))[0].strip().split( ' ')[2] except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[6]/div/text()' % ( i + 3))[0].strip() except: drug_unit = '' try: # 部门 department = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[12]/div/text()' % ( i + 3))[0].strip() except: department = '' try: # 类型 bill_types = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[2]/div/text()' % ( i + 3))[0].strip() if bill_types == '进货': bill_type = 1 else: bill_type = 3 except: bill_type = 3 try: # 出库数量 drug_number = round(float(data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[5]/div/text()' % ( i + 3))[ 0].strip())) if drug_number < 0: if bill_type == 1: bill_type = 2 if bill_type == 3: bill_type = 4 except: drug_number = 0 try: # 批号 drug_batch = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[4]/div/text()' % ( i + 3))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % ( i + 3))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 if bill_type == 1 or bill_type == 2: hospital_name = '' else: hospital_name = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[3]/div/text()' % ( i + 3))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 if bill_type == 1 or bill_type == 2: hospital_address = '' else: hospital_address = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[13]/div/text()' % ( i + 3))[0].strip() except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_htmls.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[1]/div/text()' % ( i + 3))[0].strip() if sell_time == '汇 总': sell_time = 0 except: sell_time = '2000-01-01' try: # 价格 drug_price = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[7]/div/text()' % ( i + 3))[0].strip() # '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[8]/div/text()' % (i + 3))[0] # print('*'*1000) # print('drug_price_sum', drug_price_sum) # print('drug_price', drug_price) # print('*'*1000) except Exception as e: drug_price = '' # print('drug_price e:', e) try: # 业务编号 business_number = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[10]/div/text()' % ( i + 3))[0].strip() except: business_number = '' try: # 客户所属地区 customer_area = data_html.xpath( '//div[@dir="LTR"]/table/tr/td/table/tr/td/table/tr[%s]/td[14]/div/text()' % ( i + 3))[0].strip() except: customer_area = '' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time drug_hashs = "%s %s %s %s" % (drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % (delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0: 6]) sql_crm = "insert into order_metadata_xiaoshanyiyuan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_price_sum, department, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, business_number, customer_area, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: if sell_time != 0: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute('select max(id) from order_metadata_xiaoshanyiyuan') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format(company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if sell_time != 0: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) if sell_time == 0: try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads(html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE sell_time='{}' and delivery_name='{}'".format( self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0: 6]) self.cursor.execute( "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE month='{}' and delivery_name='{}'".format( month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format(company_id, delivery_id, enterprise_name, get_account, '20-xiaoshanyiyuan', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format(delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) break except Exception as e: print('xiaoshanyiyuan-登入失败:%s' % e) print('self.number', self.number) if self.number < 4: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE sell_time='{}' and delivery_name='{}'".format( self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0: 6]) self.cursor.execute( "SELECT count(*) from order_metadata_xiaoshanyiyuan WHERE month='{}' and delivery_name='{}'".format( month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format(company_id, delivery_id, enterprise_name, get_account, '20-xiaoshanyiyuan', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')
def parse(self, response): # delivery_id = 'F617B115D6F3447983E94BB781231258' delivery_id = 'DDA100100R' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'" .format(delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: self.number += 1 html = self.sess_zhongan.get(url=self.start_urls[0], headers=self.headers, verify=False) # print('html', html.content.decode('gb2312')) resp = etree.HTML(html.content.decode('utf-8', 'ignorg')) __VIEWSTATE = resp.xpath('//*[@id="__VIEWSTATE"]/@value')[0] __VIEWSTATEGENERATOR = resp.xpath( '//*[@id="__VIEWSTATEGENERATOR"]/@value')[0] image = self.sess_zhongan.get( url='http://www.zayy.cn/os/tools/VerifyCode1.aspx?', headers=self.headers, verify=False) # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies)) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname( os.path.dirname(os.path.dirname(__file__))) # print('path', path) files = r'{}{}static{}16-zhongan'.format(path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) with open( r'{}{}static{}16-zhongan{}yzm.jpg'.format( path, symbol, symbol, symbol), 'wb') as f: f.write(image.content) # 图片文件 filename = r'{}{}static{}16-zhongan{}yzm.jpg'.format( path, symbol, symbol, symbol) else: with open(r'./16-zhonganyzm.jpg', 'wb') as f: f.write(image.content) # 图片文件 filename = r'./16-zhonganyzm.jpg' codetype = 4005 # 超时时间,秒 timeout = 60 ydm = YDMHttp() cid, code_result = ydm.run(filename, codetype, timeout) # yzm = input('请输入验证码:') # print('cid:%s code_result:%s' % (cid, code_result)) yzm = code_result # yzm = input('请输入验证码:') data = { "__VIEWSTATE": __VIEWSTATE, "__VIEWSTATEGENERATOR": __VIEWSTATEGENERATOR, "username": get_account, "userpwd": get_pwd, "verifycode": yzm, "ImgBtnLogin.x": "0", "ImgBtnLogin.y": "0", } post_html = self.sess_zhongan.post( url='http://www.zayy.cn/os/Default.aspx', data=data, headers=self.headers, verify=False) # print('data', data) # print('*' * 1000) # print('post_html', post_html.content.decode('utf-8', 'ignore')) # post_html = self.sess_zhongan.get(url='http://www.zayy.cn/os/UserLiuxiang.aspx?time1=2017-11-01&time2=2018-11-01&titlename=&pihao=&page=2', post_html = self.sess_zhongan.get( url= 'http://www.zayy.cn/os/UserLiuxiang.aspx?time1={}&time2={}&titlename=&pihao=' .format(self.fist, self.last), headers=self.headers, verify=False) try: re.findall(r'出库时间', post_html.content.decode('utf-8', 'ignore'))[0] # print('*' * 1000) # print('post_html', post_html.content.decode('utf-8', 'ignore')) data_get = etree.HTML( post_html.content.decode('utf-8', 'ignore')) try: page_num = int( data_get.xpath( '//*[@id="ctl00_ContentPlaceHolder1_AspNetPager1"]/a/text()' )[-3]) + 1 except: page_num = 2 for i in range(1, page_num): data_html = self.sess_zhongan.get( url= 'http://www.zayy.cn/os/UserLiuxiang.aspx?time1={}&time2={}&titlename=&pihao=&page={}' .format(self.fist, self.last, i), headers=self.headers, verify=False) # print('*' * 1000) # print('data_html', data_html.content.decode('utf-8', 'ignore')) data_resps = etree.HTML( data_html.content.decode('utf-8', 'ignore')) # //*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tbody/tr[1] data_len = int( len( data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr' ))) - 1 # print(data_len) md5 = hashlib.md5() for i in range(data_len): # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 try: drug_name = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[4]/span/text()' % (i + 2))[0].strip() if not drug_name: drug_name = 1 except: drug_name = 1 if drug_name != 1: try: # 药品规格 drug_specification = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[5]/span/text()' % (i + 2))[0].strip() except: drug_specification = '' try: # 生产企业 supplier_name = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[7]/span/text()' % (i + 2))[0].strip() except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[6]/span/text()' % (i + 2))[0].strip() except: drug_unit = '' try: # 出库数量 drug_number = int( data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[10]/span/text()' % (i + 2))[0].strip()) except: drug_number = 0 try: # 批号 drug_batch = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[8]/span/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[9]/span/text()' % (i + 2))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[3]/span/text()' % (i + 2))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 hospital_address = '' except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[2]/span/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' # 创建时间 create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_zhongan' try: # 单价 drug_price = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[11]/span/text()' % (i + 2))[0].strip() except: drug_price = '' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_zhongan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_zhongan') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data_html = self.sess_zhongan.get( url= 'http://www.zayy.cn/os/UserDataquery.aspx?time1={}&time2={}&titlename=&pihaotxt=' .format(self.fist, self.last), headers=self.headers, verify=False) # print('*' * 1000) # print('data_html', data_html.content.decode('utf-8', 'ignore')) data_resps = etree.HTML( data_html.content.decode('utf-8', 'ignore')) # //*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tbody/tr[1] data_len = int( len( data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr' ))) - 1 # print(data_len) md5 = hashlib.md5() for i in range(data_len): # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 1 try: drug_name = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[4]/span/text()' % (i + 2))[0].strip() if not drug_name: drug_name = 1 except: drug_name = 1 if drug_name != 1: try: # 药品规格 drug_specification = '' except: drug_specification = '' try: # 生产企业 supplier_name = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[6]/span/text()' % (i + 2))[0].strip() except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = '' except: drug_unit = '' try: # 出库数量 drug_number = int( data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[8]/text()' % (i + 2))[0].strip()) except: drug_number = 0 try: # 批号 drug_batch = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[5]/span/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = '2000-01-01' except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = '' except: hospital_name = '' try: # 医院(终端)地址 hospital_address = '' except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_resps.xpath( '//*[@id="ctl00_ContentPlaceHolder1_DGProduct"]/tr[%s]/td[9]/span/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' # 创建时间 create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_zhongan' try: # 单价 drug_price = '' except: drug_price = '' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_zhongan(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_zhongan') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads( html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_zhongan WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_zhongan WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '16-zhongan', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format( delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) except Exception as e: print('zhongan-登入失败:%s' % e) print('self.number', self.number) if self.number < 4: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = create_time get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_zhongan WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_zhongan WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '16-zhongan', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')
def parse(self, response): # delivery_id = 'F617B115D6F3447983E94BB781231235' delivery_id = 'DDA100100K' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'" .format(delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: self.number += 1 html = self.sess_zhejianglaiyi.get(url=self.start_urls[0], headers=self.headers, verify=False) image = self.sess_zhejianglaiyi.get( url='http://www.600216.com/lx/bmp.asp?flg=login', headers=self.headers, verify=False) # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies)) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname( os.path.dirname(os.path.dirname(__file__))) files = r'{}{}static{}10-zhejianglaiyi'.format( path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) # print('path', path) with open( r'{}{}static{}10-zhejianglaiyi{}yzm.jpg'.format( path, symbol, symbol, symbol), 'wb') as f: f.write(image.content) # 图片文件 filename = r'{}{}static{}10-zhejianglaiyi{}yzm.jpg'.format( path, symbol, symbol, symbol) else: with open(r'./10-zhejianglaiyiyzm.jpg', 'wb') as f: f.write(image.content) # 图片文件 filename = r'./10-zhejianglaiyiyzm.jpg' codetype = 4004 # 超时时间,秒 timeout = 60 ydm = YDMHttp() cid, code_result = ydm.run(filename, codetype, timeout) # yzm = input('请输入验证码:') # print('cid:%s code_result:%s' % (cid, code_result)) yzm = code_result # yzm = input('请输入验证码:') data = { "loginName": get_account, "LoginPwd": get_pwd, "imgcodes": yzm, "login": "******", } self.sess_zhejianglaiyi.post( url='http://www.600216.com/lx/index.asp', data=data, headers=self.headers, verify=False) data_html = self.sess_zhejianglaiyi.get( url='http://www.600216.com/lx/sa.asp?fir=1', headers=self.headers, verify=False) try: re.findall(r'销售明细查询', data_html.content.decode('gb2312'))[0] # print('data_html.content.decode', data_html.content.decode('gb2312')) for i in range(1, 1000): data_html = self.sess_zhejianglaiyi.get( # url='http://www.600216.com/lx/sa.asp?mypage={}&searchcustom=&searchgoodsid=&searchDateBegin=2012-01-01&searchDateEnd=2018-10-19'.format( url= 'http://www.600216.com/lx/sa.asp?mypage={}&searchcustom=&searchgoodsid=&searchDateBegin={}&searchDateEnd={}' .format(i, self.fist, self.last), headers=self.headers, verify=False) # print('data_html', data_html.content.decode('gb2312')) # print('*' * 1000) data_html = etree.HTML(data_html.content.decode('gb2312')) data_len = int( len(data_html.xpath('//table/tr/td/table[1]/tr'))) - 2 # print(data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 try: drug_name = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[5]/text()' % (i + 2))[0].strip() except: drug_name = 1 if drug_name != 1: try: # 商品名称 trade_name = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[6]/text()' % (i + 2))[0].strip() except: trade_name = '' try: # 药品规格 drug_specification = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[7]/text()' % (i + 2))[0].strip() except: drug_specification = '' try: # 生产企业 supplier_name = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[8]/text()' % (i + 2))[0].strip() except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[12]/text()' % (i + 2))[0].strip() except: drug_unit = '' try: # 医共体成员 medical_community_member = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[3]/text()' % (i + 2))[0].strip() except: medical_community_member = '' try: # 出库数量 drug_number = round( float( data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[11]/b/text()' % (i + 2))[0].strip())) if drug_number < 0: bill_type = 4 except: drug_number = 0 try: # 批号 drug_batch = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[9]/b/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[10]/text()' % (i + 2))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[2]/text()' % (i + 2))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 hospital_address = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[4]/text()' % (i + 2))[0].strip() except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[1]/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' try: # 价格 drug_price = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[13]/text()' % (i + 2))[0].strip() except: drug_price = '' # 创建时间 create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_zhejianglaiyi' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_zhejianglaiyi(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_zhejianglaiyi' ) foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: next_page = data_html.xpath( '//*[@id="page"]/a[last()]/text()')[0] except: next_page = '' # print('next_page', next_page) if next_page != '下一页>': break # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- for i in range(1, 1000): data_html_cai = self.sess_zhejianglaiyi.get( # url='http://www.600216.com/lx/su.asp?mypage={}&searchSupply=&searchgoodsid=&searchDateBegin=2012-01-01&searchDateEnd=2018-10-19'.format( url= 'http://www.600216.com/lx/su.asp?mypage={}&searchSupply=&searchgoodsid=&searchDateBegin={}&searchDateEnd={}' .format(i, self.fist, self.last), headers=self.headers, verify=False) # print('data_html', data_html.content.decode('gb2312')) # print('*' * 1000) data_html_cai = etree.HTML( data_html_cai.content.decode('gb2312')) data_len = int( len(data_html_cai.xpath( '//table/tr/td/table[1]/tr'))) - 2 # print(data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 1 try: drug_name = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[3]/text()' % (i + 2))[0].strip() except: drug_name = 1 if drug_name != 1: try: # 商品名称 trade_name = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[4]/text()' % (i + 2))[0].strip() except: trade_name = '' try: # 药品规格 drug_specification = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[5]/text()' % (i + 2))[0].strip() except: drug_specification = '' try: # 生产企业 supplier_name = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[2]/text()' % (i + 2))[0].strip() except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[10]/text()' % (i + 2))[0].strip() except: drug_unit = '' try: # 医共体成员 medical_community_member = '' except: medical_community_member = '' try: # 出库数量 drug_number = round( float( data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[9]/b/text()' % (i + 2))[0].strip())) if drug_number < 0: bill_type = 2 except: drug_number = 0 try: # 批号 drug_batch = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[7]/b/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[8]/text()' % (i + 2))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = '' except: hospital_name = '' try: # 医院(终端)地址 hospital_address = '' except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_html_cai.xpath( '//table/tr/td/table[1]/tr[%s]/td[1]/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' try: # 价格 drug_price = data_html.xpath( '//table/tr/td/table[1]/tr[%s]/td[13]/text()' % (i + 2))[0].strip() except: drug_price = '' # 创建时间 create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_zhejianglaiyi' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_zhejianglaiyi(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_name, medical_community_member, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, drug_price, sell_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_zhejianglaiyi' ) foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: next_page = data_html_cai.xpath( '//*[@id="page"]/a[last()]/text()')[0] except: next_page = '' # print('next_page', next_page) if next_page != '下一页>': break try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads( html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_zhejianglaiyi WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_zhejianglaiyi WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '10-zhejianglaiyi', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format( delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) except Exception as e: print('zhejianglaiyi-登入失败:%s' % e) if self.number < 4: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = create_time get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_zhejianglaiyi WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_zhejianglaiyi WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '10-zhejianglaiyi', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')
def parse(self, response): delivery_id = 'F617B115D6F3447983E94BB781231271' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'" .format(delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: self.number += 1 html = self.sess_jiaxingyinte.get(url=self.start_urls[0], headers=self.headers, verify=False) image = self.sess_jiaxingyinte.get( url="http://www.drugoogle.com/verifyCode/verifyCode.jsp?%s" % (int(time.time() * 1000)), headers=self.headers, verify=False) # print('image', image.url) # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies)) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname( os.path.dirname(os.path.dirname(__file__))) # print('path', path) files = r'{}{}static{}21-jiaxingyinte'.format( path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) with open( r'{}{}static{}21-jiaxingyinte{}yzm.jpg'.format( path, symbol, symbol, symbol), 'wb') as f: f.write(image.content) # 图片文件 filename = r'{}{}static{}21-jiaxingyinte{}yzm.jpg'.format( path, symbol, symbol, symbol) else: with open(r'./21-jiaxingyinteyzm.jpg', 'wb') as f: f.write(image.content) # 图片文件 filename = r'./21-jiaxingyinteyzm.jpg' codetype = 4004 # 超时时间,秒 timeout = 60 ydm = YDMHttp() cid, code_result = ydm.run(filename, codetype, timeout) # yzm = input('请输入验证码:') # print('cid:%s code_result:%s' % (cid, code_result)) yzm = code_result # yzm = input('请输入验证码:') data = { "username": get_account, "password": get_pwd, "verifyCode": yzm } # print('data', data) self.sess_jiaxingyinte.post( url="http://www.drugoogle.com/index/registerloginjson.jspx?%s" % (int(time.time() * 1000)), data=data, headers=self.headers, verify=False) data_html = self.sess_jiaxingyinte.get( url='http://www.drugoogle.com/member/index.jspx?catlog=4', headers=self.headers, verify=False) try: # print("data_html.content.decode('utf-8')", data_html.content.decode('utf-8')) re.findall(r'药品流向查询', data_html.content.decode('utf-8'))[0] data_html = self.sess_jiaxingyinte.get( url= 'http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab4.jspx?entryId=16&medicineId=0&company_name=&timeType=1&startTime={}%2000:00:00&endTime={}%2023:59:59&buyerType=0' .format(self.fist, self.last), # http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab4.jspx?entryId=16&medicineId=0&company_name=&timeType=1&startTime=2018-10-30%2000:00:00&endTime=2018-10-30%2023:59:59&buyerType=0 headers=self.headers, verify=False) # print('data_html', data_html.content.decode('utf-8')) # print('*' * 1000) data_html = etree.HTML(data_html.content.decode('utf-8')) data_len = int( len(data_html.xpath( '/html/body/table/tr/td/table[1]/tr'))) - 3 # print('data_len', data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 try: drug_name = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[5]/text()' % (i + 2))[0].strip() if not drug_name: drug_name = 1 except: drug_name = 1 if drug_name != 1: try: # 药品id trade_id = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[4]/text()' % (i + 2))[0].strip() except: trade_id = '' try: # 药品规格 drug_specification = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[6]/text()' % (i + 2))[0].strip() except: drug_specification = '' try: # 生产企业 supplier_name = '' except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[7]/text()' % (i + 2))[0].strip() except: drug_unit = '' try: # 销售单id sales_ticket_id = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[1]/text()' % (i + 2))[0].strip() except: sales_ticket_id = '' try: # 出库数量 drug_number = round( float( data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[10]/text()' % (i + 2))[0].strip())) if drug_number < 0: bill_type = 4 except: drug_number = 0 try: # 批号 drug_batch = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[8]/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[9]/text()' % (i + 2))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[12]/text()' % (i + 2))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 hospital_address = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[14]/text()' % (i + 2))[0].strip() except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[2]/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' try: # 出库帐时间 warehouse_time = data_html.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[3]/text()' % (i + 2))[0].strip() except: warehouse_time = '2000-01-01' try: # 价格 drug_price = '' except: drug_price = '' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_jiaxingyinte' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_jiaxingyinte(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_jiaxingyinte') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ data_html_cai = self.sess_jiaxingyinte.get( url= 'http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab3.jspx?entryId=16&medicineId=0&timeType=1&startTime={}%2000:00:00&endTime={}%2023:59:59' .format(self.fist, self.last), # http://www.drugoogle.com/member/agentman/medicineGoto/medicinegototab3.jspx?entryId=16&medicineId=0&timeType=1&startTime=2018-10-30%2000:00:00&endTime=2018-10-30%2023:59:59 headers=self.headers, verify=False) # print('data_html_cai', data_html_cai.content.decode('utf-8')) # print('*' * 1000) data_html_cai = etree.HTML( data_html_cai.content.decode('utf-8')) data_len = int( len( data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr'))) - 3 # print('data_len', data_len) md5 = hashlib.md5() for i in range(data_len): # try: # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 1 try: drug_name = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[5]/text()' % (i + 2))[0].strip() except: drug_name = 1 if drug_name != 1: try: # 药品id trade_id = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[4]/text()' % (i + 2))[0].strip() except: trade_id = '' try: # 药品规格 drug_specification = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[6]/text()' % (i + 2))[0].strip() except: drug_specification = '' try: # 生产企业 supplier_name = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[9]/text()' % (i + 2))[0].strip() except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[7]/text()' % (i + 2))[0].strip() except: drug_unit = '' try: # 销售单id sales_ticket_id = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[1]/text()' % (i + 2))[0].strip() except: sales_ticket_id = '' try: # 出库数量 drug_number = round( float( data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[11]/text()' % (i + 2))[0].strip())) if drug_number < 0: bill_type = 2 except: drug_number = 0 try: # 批号 drug_batch = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[8]/text()' % (i + 2))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[10]/text()' % (i + 2))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = '' except: hospital_name = '' try: # 医院(终端)地址 hospital_address = '' except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[2]/text()' % (i + 2))[0].strip() except: sell_time = '2000-01-01' try: # 出库帐时间 warehouse_time = data_html_cai.xpath( '/html/body/table/tr/td/table[1]/tr[%s]/td[3]/text()' % (i + 2))[0].strip() except: warehouse_time = '2000-01-01' try: # 价格 drug_price = '' except: drug_price = '' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_jiaxingyinte' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_jiaxingyinte(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, trade_id, sales_ticket_id, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, warehouse_time, create_time, update_time, drug_hash, hospital_hash, stream_hash, drug_price, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_jiaxingyinte') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: if bill_type != 5: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads( html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_jiaxingyinte WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_jiaxingyinte WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '21-jiaxingyinte', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format( delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) except Exception as e: print('jiaxingyinte-登入失败:%s' % e) print('self.number', self.number) if self.number < 4: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = create_time get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_jiaxingyinte WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_jiaxingyinte WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '21-jiaxingyinte', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')
def parse(self, response): delivery_id = 'DDA1001003' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'" .format(delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: self.number += 1 # ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- login_url = "http://www.zjhuiren.com/login.asp" post_url = "http://www.zjhuiren.com/login.asp?action=loginsub" res1 = self.sess_huiren.get(login_url, headers=self.headers) selector = Selector(text=res1.text) # print(dict_from_cookiejar(res1.cookies)) random_value = random.randint(1, 9) k = selector.css( "input[name='codeKey']::attr(value)").extract_first() # print(k) code_url = "http://www.zjhuiren.com/DvCode.asp?k=%s&" % (k) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", "Host": "www.zjhuiren.com", "Referer": "http://www.zjhuiren.com/login.asp" } res2 = self.sess_huiren.get(code_url, headers=headers) # print(res2.text) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname( os.path.dirname(os.path.dirname(__file__))) files = r'{}{}static{}03-huiren'.format(path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) with open( r'{}{}static{}03-huiren{}yzm.jpg'.format( path, symbol, symbol, symbol), 'wb') as f: f.write(res2.content) filename = r'{}{}static{}03-huiren{}yzm.jpg'.format( path, symbol, symbol, symbol) else: with open(r'./03-huirenyzm.jpg', 'wb') as f: f.write(res2.content) filename = r'./03-huirenyzm.jpg' codetype = 4000 # 超时时间,秒 timeout = 60 ydm = YDMHttp() result = ydm.run(filename, codetype, timeout) # print(result) code = result[1] # code = input("请输入code") post_data = { "UserID": get_account, "UserPass": get_pwd, "codeKey": k, "code": code, "B1": "提交" } res3 = self.sess_huiren.post(post_url, data=post_data, headers=headers) res4 = self.sess_huiren.get( "http://www.zjhuiren.com/manager.asp?imark=1&ID=1888", headers=headers) data = { # "bgtime": "2018-10-01", "bgtime": self.fist, # "ovtime": "2018-10-15", "ovtime": self.last, "oldshow": "purchase", "px": "rq", "spbm": "", "tym": "", "dwmch": "", } data_resp = self.sess_huiren.post( url='http://www.zjhuiren.com/gjmx.asp?action=goselect&ID=1888', data=data, headers=self.headers, verify=False) # print('11' * 1000) # print(data_resp.content.decode('utf-8')) # print('11' * 1000) try: re.findall(r'日期', data_resp.content.decode('utf-8'))[0] try: page = int( re.findall(r'【页次:1/(.+?)页】', data_resp.content.decode('utf-8'))[0]) + 1 except: page = 0 # print('page', page) # time.sleep(10) for i in range(1, page): data_resp = self.sess_huiren.get( url= 'http://www.zjhuiren.com/gjmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=purchase' .format(i, self.fist, self.last), # data_resp = self.sess_huiren.get(url='http://www.zjhuiren.com/gjmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=purchase'.format(i, '2018-10-01', '2018-10-15'), headers=self.headers, verify=False) # print('*' * 1000) data_resps = etree.HTML(data_resp.content.decode('utf-8')) data_len = data_resps.xpath('/html/body/table[2]/tr') # print(data_len) # print(len(data_len)) md5 = hashlib.md5() for i in range(2, int(len(data_len))): company_id = company_id delivery_id = delivery_id delivery_name = enterprise_name data_version = delivery_id + "-" + self.time_stamp data_type = 1 bill_type = 1 try: drug_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[3]/text()' % i)[0].strip() except: drug_name = 1 if drug_name != 1: try: drug_specification = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[5]/text()' % i)[0].strip() except: drug_specification = '' try: supplier_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[6]/text()' % i)[0].strip() except: supplier_name = '' try: drug_unit = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[7]/text()' % i)[0].strip() except: drug_unit try: drug_number = int( data_resps.xpath( '/html/body/table[2]/tr[%s]/td[8]/text()' % i)[0].strip()) if drug_number < 0: bill_type = 2 except: drug_number = 0 try: drug_batch = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[10]/text()' % i)[0].strip() except: drug_batch = '' try: valid_till = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[9]/text()' % i)[0].strip() if not valid_till: valid_till = '2000-01-01' except: valid_till = '2000-01-01' try: if bill_type == 1 or bill_type == 2: hospital_name = '' else: hospital_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[13]/text()' % i)[0].strip() except: hospital_name = '' try: hospital_address = '' except: hospital_address = '' try: sell_time = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[1]/text()' % i)[0].strip() except: sell_time = '' create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_huiren' try: drug_price = '' except: drug_price = '' try: drug_price_sum = '' except: drug_price_sum try: goods_id = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[2]/text()' % i)[0].strip() except: goods_id = '' try: trade_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[4]/text()' % i)[0].strip() except: trade_name = '' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_huiren(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_huiren') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: self.crm_db.ping() except pymysql.MySQLError: self.crm_db.connect() try: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data = { # "bgtime": "2018-10-01", "bgtime": self.fist, # "ovtime": "2018-10-15", "ovtime": self.last, "oldshow": "sale", "px": "rq", "spbm": "", "tym": "", "dwmch": "", } data_resp = self.sess_huiren.post( url= 'http://www.zjhuiren.com/xsmx.asp?action=goselect&ID=1888', data=data, headers=self.headers, verify=False) try: page = int( re.findall(r'【页次:1/(.+?)页】', data_resp.content.decode('utf-8'))[0]) + 1 except: page = 1 # print(page) # time.sleep(10) for i in range(1, page): # url = 'http://www.zjhuiren.com/xsmx.asp?Page=%s&ID=1888&bgtime=2018-10-01&ovtime=2018-10-15&spbm=&tym=&dwmch=&px=rq&oldshow=sale' % i url = 'http://www.zjhuiren.com/xsmx.asp?Page={}&ID=1888&bgtime={}&ovtime={}&spbm=&tym=&dwmch=&px=rq&oldshow=sale'.format( i, self.fist, self.last) # print(url) data_resp = self.sess_huiren.get(url=url, headers=self.headers, verify=False) # print('*' * 1000) data_resps = etree.HTML(data_resp.content.decode('utf-8')) data_len = data_resps.xpath('/html/body/table[2]/tr') # print(data_len) # print(len(data_len)) md5 = hashlib.md5() for i in range(2, int(len(data_len))): company_id = company_id delivery_id = delivery_id delivery_name = enterprise_name data_version = delivery_id + "-" + self.time_stamp data_type = 1 bill_type = 3 try: drug_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[3]/text()' % i)[0].strip() except: drug_name = 1 if drug_name != 1: try: drug_specification = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[5]/text()' % i)[0].strip() except: drug_specification = '' try: supplier_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[6]/text()' % i)[0].strip() except: supplier_name = '' try: drug_unit = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[7]/text()' % i)[0].strip() except: drug_unit = '' try: drug_number = int( data_resps.xpath( '/html/body/table[2]/tr[%s]/td[9]/text()' % i)[0].strip()) if drug_number < 0: bill_type = 4 except: drug_number = 0 try: drug_batch = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[8]/text()' % i)[0].strip() except: drug_batch = '' try: valid_till = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[12]/text()' % i)[0].strip() if not valid_till: valid_till = '2000-01-01' except: valid_till = '2000-01-01' try: if bill_type == 1 or bill_type == 2: hospital_name = '' else: hospital_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[13]/text()' % i)[0].strip() except: hospital_name = '' try: hospital_address = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[14]/text()' % i)[0].strip() except: hospital_address = '' try: sell_time = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[1]/text()' % i)[0].strip() except: sell_time = '' create_time = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_huiren' try: drug_price = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[10]/text()' % i)[0].strip() except: drug_price = '' try: drug_price_sum = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[11]/text()' % i)[0].strip() except: drug_price_sum try: goods_id = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[2]/text()' % i)[0].strip() except: goods_id = '' try: trade_name = data_resps.xpath( '/html/body/table[2]/tr[%s]/td[4]/text()' % i)[0].strip() except: trade_name = '' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_huiren(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, sell_time, create_time, update_time, drug_price, drug_price_sum, goods_id, trade_name, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_huiren') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: self.crm_db.ping() except pymysql.MySQLError: self.crm_db.connect() try: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads( html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_huiren WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_huiren WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '03-huiren', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format( delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) except Exception as e: print('huiren-登入失败:%s' % e) print('self.number', self.number) if self.number < 8: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = create_time get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_huiren WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_huiren WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '03-huiren', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')
def parse(self, response): # delivery_id = 'F617B115D6F3447983E94BB781231231' delivery_id = 'DDA1001009' self.crm_cursor.execute( "select company_id, enterprise_name, get_account, get_pwd, is_enable from base_delivery_enterprise where delivery_id = '{}'" .format(delivery_id)) data_tupl = self.crm_cursor.fetchall() for data_info in data_tupl: company_id = data_info[0] enterprise_name = data_info[1] get_account = data_info[2] get_pwd = data_info[3] is_enable = data_info[4] if is_enable == 1: self.number += 1 html = self.sess_shangyao.get(url=self.start_urls[0], headers=self.headers, verify=False) resp = etree.HTML(html.content.decode('utf-8')) post_url = 'http://passport.shaphar.com/' + resp.xpath( '//*[@id="form1"]/@action')[0] lt = resp.xpath('//*[@name="lt"]/@value')[0] image = self.sess_shangyao.get( url='http://passport.shaphar.com/cas-webapp-server/kaptcha.jpg', headers=self.headers, verify=False) # print('dict_from_cookiejar(image.cookies)', dict_from_cookiejar(image.cookies)) if SCRAPYD_TYPE == 1: if 'indow' in platform.system(): symbol = r'\\' else: symbol = r'/' path = os.path.dirname( os.path.dirname(os.path.dirname(__file__))) files = r'{}{}static{}08-shangyao'.format(path, symbol, symbol) if not os.path.exists(files): os.makedirs(files) with open( r'{}{}static{}08-shangyao{}yzm.jpg'.format( path, symbol, symbol, symbol), 'wb') as f: f.write(image.content) # 图片文件 filename = r'{}{}static{}08-shangyao{}yzm.jpg'.format( path, symbol, symbol, symbol) else: with open(r'./08-shangyaoyzm.jpg', 'wb') as f: f.write(image.content) # 图片文件 filename = r'./08-shangyaoyzm.jpg' codetype = 1004 # 超时时间,秒 timeout = 60 ydm = YDMHttp() cid, code_result = ydm.run(filename, codetype, timeout) # yzm = input('请输入验证码:') # print('cid:%s code_result:%s' % (cid, code_result)) yzm = code_result # yzm = input('请输入验证码:') data = { "username": get_account, "password": get_pwd, "captcha": yzm, "lt": lt, "_eventId": "submit", "submit": "登录", } self.sess_shangyao.post(url=post_url, data=data, headers=self.headers, verify=False) self.sess_shangyao.get( url= 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs&portalname=FE8EC3D50BFD98BBC9C1D07E55C9E019', headers=self.headers, verify=False) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ # ''' try: js_html = self.sess_shangyao.get( url= 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22078', headers=self.headers, verify=False) sessionID = re.findall(r'sessionID=(.+?)"', js_html.content.decode('gbk'))[0] time_data = { '__parameters__': '{"AS_CLIENT":"","ENDDATE":"%s","COM_GOODS":"","AS_SALE_TYPE":false,"TCXT":false,"INV_OWNER":"","LABEL0":"[5e93][5b58][62e5][6709][8005][ff1a]","AS_DATE_TYPE":"SEND","SALE_ORG":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL0_C_C_C_C_C":"[4ea7][54c1][ff1a]","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C_C_C":"[5ba2][6237][ff1a]","LABEL0_C_C_C":"[9500][552e][90e8][95e8][ff1a]","UPDATE":"%s","STARTDATE":"%s"}' % (self.last, self.last, self.fist), } time_url = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format( sessionID) self.sess_shangyao.post(url=time_url, data=time_data, headers=self.headers, verify=False) time_time = int(time.time() * 1000) data_url = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format( time_time, sessionID) data_htmls = self.sess_shangyao.get(url=data_url, headers=self.headers, verify=False) data_resps = etree.HTML(data_htmls.content.decode('gbk')) # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- js_html = self.sess_shangyao.get( url= 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22079', headers=self.headers, verify=False) sessionID = re.findall(r'sessionID=(.+?)"', js_html.content.decode('gbk'))[0] time_data_bian = { '__parameters__': '{"AS_CLIENT":"","ENDDATE":"%s","AS_LOT":"","LABEL0_C_C_C_C_C_C_C_C":"[6279][53f7][ff1a]","AS_COM_GOODS":"","AS_SALE_TYPE":false,"TCXT":false,"AS_INV_OWNER":"","LABEL0":"[5e93][5b58][62e5][6709][8005][ff1a]","AS_DATE_TYPE":"SEND","AS_SALE_ORG":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL0_C_C_C_C_C":"[4ea7][54c1][ff1a]","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C_C_C":"[5ba2][6237][ff1a]","LABEL0_C_C_C":"[9500][552e][90e8][95e8][ff1a]","UPDATE":"%s","STARTDATE":"%s"}' % (self.last, self.last, self.fist) } time_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format( sessionID) self.sess_shangyao.post(url=time_url_bian, data=time_data_bian, headers=self.headers, verify=False) time_times = int(time.time() * 1000) data_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format( time_times, sessionID) data_htmls_bian = self.sess_shangyao.get(url=data_url_bian, headers=self.headers, verify=False) data_resps_bian = etree.HTML( data_htmls_bian.content.decode('gbk')) data_len = len( data_resps_bian.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr' )) # print(data_len) md5 = hashlib.md5() for i in range(data_len): # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 3 try: drug_name = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip().split('-')[0] except: drug_name = 1 if drug_name != 1: try: # 药品规格 drug_specification = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip().split('-')[1] except: drug_specification = '' try: # 生产企业 supplier_name = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip().split('-')[2] except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()' % (i + 1))[0].strip() except: drug_unit = '' try: # 出库数量 drug_number = int( data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[8]/div/text()' % (i + 1))[0].strip()) if drug_number < 0: bill_type = 4 except: drug_number = 0 try: # 订单数量 indent_number = int( data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[7]/div/text()' % (i + 1))[0].strip()) except: indent_number = 0 try: # 批号 drug_batch = data_resps_bian.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[8]/div/text()' % (i + 1))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_resps_bian.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[9]/div/text()' % (i + 1))[0].strip() except: valid_till = '2000-01-01' try: # 医院(终端)名称 hospital_name = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[6]/div/text()' % (i + 1))[0].strip() except: hospital_name = '' try: # 医院(终端)地址 hospital_address = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[11]/div/text()' % (i + 1))[0].strip() except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_resps.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()' % (i + 1))[0].strip() except: sell_time = '2000-01-01' try: # 出库日期 out_put_time = data_resps.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip() except: out_put_time = '2000-01-01' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_shangyao' try: # 单价 drug_price = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[9]/div/text()' % (i + 1))[0].strip() except: drug_price = '' try: # 单次总价 drug_price_sum = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[10]/div/text()' % (i + 1))[0].strip() except: drug_price_sum = '' try: # 商品id goods_id = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()' % (i + 1))[0].strip() except: goods_id = '' try: # 订单号 order_number = data_resps.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()' % (i + 1))[0].strip() except: order_number = '' try: # 销售部门 sales_departments = data_resps.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()' % (i + 1))[0].strip() except: sales_departments = '西药销售部' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_shangyao(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_shangyao') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) # ''' # ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- jin_html = self.sess_shangyao.get( url= 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fs_main&cmd=entry_report&id=22077', headers=self.headers, verify=False) sessionID = re.findall(r'sessionID=(.+?)"', jin_html.content.decode('gbk'))[0] time_data_jin = { '__parameters__': '{"AS_COM_GOODS":"","ENDDATE":"%s","LABEL5":"[4ed3][5e93][ff1a]","LABEL3":"[5e93][5b58][62e5][6709][8005][ff1a]","LABEL2":"[4ea7][54c1][ff1a]","LABEL0":"[91c7][8d2d][65e5][671f][ff1a]","AS_INV_STORAGE":"","AS_INV_OWNER":"","SORT5":"asc","COLUMN5":"","SORT4":"asc","COLUMN4":"","SORT3":"asc","COLUMN3":"","SORT2":"asc","COLUMN2":"","SORT1":"asc","COLUMN1":"","LABEL1":"[6392][5e8f][ff1a]","LABEL0_C_C":"[2014]","UPDATE":"%s","STARTDATE":"%s"}' % (self.last, self.last, self.fist) } time_url_bian = 'http://applyreport.shaphar.com/WebReport1/ReportServer?op=fr_dialog&cmd=parameters_d&sessionID={}'.format( sessionID) self.sess_shangyao.post(url=time_url_bian, data=time_data_jin, headers=self.headers, verify=False) time_times = int(time.time() * 1000) data_url_jin = 'http://applyreport.shaphar.com/WebReport1/ReportServer?_={}&__boxModel__=true&op=fr_write&cmd=read_w_content&sessionID={}&reportIndex=0&browserWidth=1690&__cutpage__=v&pn=1'.format( time_times, sessionID) data_htmls_jin = self.sess_shangyao.get(url=data_url_jin, headers=self.headers, verify=False) data_resps_jin = etree.HTML( data_htmls_jin.content.decode('gbk')) data_len = len( data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr' )) # print(data_len) md5 = hashlib.md5() for i in range(data_len): # 入驻企业id company_id = company_id # 配送公司id delivery_id = delivery_id # 配送公司名称 delivery_name = enterprise_name # 数据版本号 data_version = delivery_id + "-" + self.time_stamp # 数据类型:1,phython 2,导入 data_type = 1 # 单据类型:1进货,2退货,3销售,4销售退货 bill_type = 1 try: drug_name = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()' % (i + 1))[0].strip().split('-')[0] except: drug_name = 1 if drug_name != 1: try: # 药品规格 drug_specification = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()' % (i + 1))[0].strip().split('-')[1] except: drug_specification = '' try: # 生产企业 supplier_name = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[1]/div/text()' % (i + 1))[0].strip().split('-')[2] except: supplier_name = '' try: # 计量单位(瓶,盒等) drug_unit = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()' % (i + 1))[0].strip() except: drug_unit = '' try: # 出库数量 drug_number = int( data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip()) if drug_number < 0: bill_type = 2 except: drug_number = 0 try: # 订单数量 indent_number = int(0) except: indent_number = 0 try: # 批号 drug_batch = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[4]/div/text()' % (i + 1))[0].strip() except: drug_batch = '' try: # 有效期至 valid_till = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[5]/div/text()' % (i + 1))[0].strip() except: valid_till = '' try: # 医院(终端)名称 hospital_name = '' except: hospital_name = '' try: # 医院(终端)地址 hospital_address = '' except: hospital_address = '' try: # 销售(制单)时间 sell_time = data_resps_jin.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()' % (i + 1))[0].strip() except: sell_time = '2000-01-01' try: # 出库日期 out_put_time = data_resps_jin.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[2]/div/text()' % (i + 1))[0].strip() except: out_put_time = '2000-01-01' # 创建时间 create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time table_name = 'order_metadata_shangyao' try: # 单价 drug_price = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[6]/div/text()' % (i + 1))[0].strip() except: drug_price = '' try: # 单次总价 drug_price_sum = data_resps_jin.xpath( '//div[@id="frozen-center"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[7]/div/text()' % (i + 1))[0].strip() except: drug_price_sum = '' try: # 商品id goods_id = data_resps_jin.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[5]/div/text()' % (i + 1))[0].strip() except: goods_id = '' try: # 订单号 order_number = data_resps_jin.xpath( '//div[@id="frozen-west"]/table[@class="x-table"]/tbody[@class="rows-height-counter"]/tr[%s]/td[3]/div/text()' % (i + 1))[0].strip() except: order_number = '' try: # 销售部门 sales_departments = '' except: sales_departments = '' drug_hashs = "%s %s %s %s" % ( drug_name, drug_specification, delivery_id, supplier_name) md5 = hashlib.md5() md5.update(bytes(drug_hashs, encoding="utf-8")) drug_hash = md5.hexdigest() hospital_hashs = "%s %s %s" % ( delivery_id, hospital_name, hospital_address) md5 = hashlib.md5() md5.update(bytes(hospital_hashs, encoding="utf-8")) hospital_hash = md5.hexdigest() stream_hashs = "%s %s %s %s %s %s %s %s %s %s" % ( company_id, delivery_id, bill_type, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_hash, sell_time) md5 = hashlib.md5() md5.update(bytes(stream_hashs, encoding="utf-8")) stream_hash = md5.hexdigest() month = int(str(self.fist).replace('-', '')[0:6]) sql_crm = "insert into order_metadata_shangyao(company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, drug_number, indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) values('{}', '{}', '{}', '{}', {}, '{}', '{}', '{}', '{}', '{}', {}, {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', {})" sql_data_crm = sql_crm.format( company_id, delivery_id, delivery_name, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_unit, abs(drug_number), indent_number, drug_batch, valid_till, hospital_name, hospital_address, sell_time, out_put_time, create_time, update_time, drug_price, drug_price_sum, goods_id, order_number, sales_departments, drug_hash, hospital_hash, stream_hash, month) # print('sql_data', sql_data_crm) try: self.db.ping() except pymysql.MySQLError: self.db.connect() try: self.cursor.execute(sql_data_crm) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm:%s' % (e, sql_data_crm)) self.cursor.execute( 'select max(id) from order_metadata_shangyao') foreign_id = self.cursor.fetchone()[0] sql_crm_data = SQL_CRM_DATA sql_data_crm_data = sql_crm_data.format( company_id, delivery_id, delivery_name, table_name, foreign_id, data_version, data_type, bill_type, drug_name, drug_specification, supplier_name, drug_hash, drug_unit, abs(drug_number), drug_batch, valid_till, hospital_name, hospital_address, hospital_hash, month, sell_time, stream_hash, create_time, update_time) try: self.cursor.execute(sql_data_crm_data) self.db.commit() self.crm_cursor.execute(sql_data_crm_data) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_data:%s' % (e, sql_data_crm_data)) try: crm_request_data = { 'version': delivery_id + "-" + self.time_stamp, 'streamType': streamType, } html = requests.post(url=CRM_REQUEST_URL, data=crm_request_data, headers=self.headers, verify=False) self.classify_success = json.loads( html.content.decode('utf-8'))['success'] except: print('爬虫调取后端接口错误') get_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 1 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_shangyao WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_shangyao WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '' create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '08-shangyao', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) sql_crm_version = SQL_CRM_VERSION sql_data_crm_version = sql_crm_version.format( delivery_id + "-" + self.time_stamp, enterprise_name, company_id, create_time, update_time, data_num, remark) try: self.cursor.execute(sql_data_crm_version) self.db.commit() except Exception as e: print('插入失败:%s sql_data_crm_version:%s' % (e, sql_data_crm_version)) except Exception as e: print('shangyao-登入失败:%s' % e) print('self.number', self.number) if self.number < 4: self.parse('aa') else: create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) get_time = create_time get_date = int(time.strftime("%Y%m%d", time.localtime())) get_status = 2 if MONTHS == 0: self.cursor.execute( "SELECT count(*) from order_metadata_shangyao WHERE sell_time='{}' and delivery_name='{}'" .format(self.yesterday, enterprise_name)) else: month = int(str(self.fist).replace('-', '')[0:6]) self.cursor.execute( "SELECT count(*) from order_metadata_shangyao WHERE month='{}' and delivery_name='{}'" .format(month, enterprise_name)) data_num = self.cursor.fetchone()[0] remark = '账号或密码错了' update_time = create_time sql_crm_record = SQL_CRM_RECORD sql_data_crm_record = sql_crm_record.format( company_id, delivery_id, enterprise_name, get_account, '08-shangyao', delivery_id + "-" + self.time_stamp, get_time, get_date, get_status, data_num, self.classify_success, remark, create_time, update_time) try: self.cursor.execute(sql_data_crm_record) self.db.commit() self.crm_cursor.execute(sql_data_crm_record) self.crm_db.commit() except Exception as e: print('插入失败:%s sql_data_crm_record:%s' % (e, sql_data_crm_record)) print('账号密码或者验证码错误')