class CustomsUpdate: DATABASES = { 'host': '111.230.242.51', 'port': 3306, 'user': '******', 'password': '******', 'db': 'goldtwo8.1', 'charset': 'utf8', } def __init__(self, file_path): self.file_path = file_path self.sql = Sql(self.DATABASES) def parse_excel(self): data = pd.read_excel(self.file_path) data.columns = ['code', 'name'] for row in data.itertuples(index=False, name='Pandas'): yield {'code': row[0], 'name': row[1], 'NameAbbr': ''} def write_2_sql(self, data): print('data = ', data) self.sql.insert('CustomsCode', **data) def run(self): with aniu_atomic(self.sql): for index, data in enumerate(self.parse_excel()): self.write_2_sql(data) print('第{}条数据已插入'.format(index + 1)) print('插入完成')
def __init__(self, file_path, name): self.spider = self.SPIDER_MAP.get(name)() self.sql = Sql(settings.DATABASES_GOLD_8_3) self.timer = TimeHelper() self.file_path = file_path os.makedirs(self.GENERATE_FILE_DIR, exist_ok=True) self.generate_file_path = os.path.join(self.GENERATE_FILE_DIR, name+self.timer.get_str_now_time) + '.xlsx' self.error = []
def __init__(self, name): self.name = name self.timer = TimeHelper() self.spider = self.SPIDER_MAP.get(self.name)() self.sql = Sql(settings.DATABASES_GOLD_8_3) self.file_path = os.path.join(self.GENERATE_FILE_DIR, self.timer.get_today_str) os.makedirs(self.file_path, exist_ok=True) self.error = []
def to_excel(): sql = Sql(settings.DATABASES_SERVER) dic = collections.OrderedDict() a = sql.select('DecList', '*', where={'id': 999}, first=True) ret = sql.raw_sql("SELECT * from DecList where GName LIKE '%%集成电路%%'") d = pd.DataFrame([pd.Series(i) for i in ret.get('ret_tuples')]) d.columns = a.keys() d.to_excel(r'C:\Financesystem\data\declist.xlsx', index_label='序号') print(d)
def __init__(self): self.resister_singal() self.sql = Sql(self.DATABASES) self.errors = [] self.rate_0 = [] self.file_path = os.path.join(GetExcelData.BASE_PATH, '退税记录.xls') # self.data = GetExcelData().parse_excel() self.data = GetExcelData().new_parse_excel() self.init_app()
class GenerateExcel(object): SPIDER_MAP = { "机保": NemsSpider, "前海": SpecialNemsSpider, } FIELDS_MAP = { 'bondInvtNo': '清单编号', 'listStatName': '数据状态', 'vrfdedMarkcdName': '核扣标志', 'inputTime': '录入时间', 'invtIochkptStucdName': '清单进出卡口状态', 'rltEntryBizopEtpsNm': '关联报关单境内收发货人名称', 'gdsSeqno': '备案序号/自动备案序号', 'gdsMtno': '商品料号', 'gdecd': '商品编码', 'gdsNm': '商品名称', 'dclCurrcdName': '币制', 'dclUnitcdName': '申报计量单位', 'lawfUnitcdName': '法定计量单位', 'dclQty': '申报数量', 'lawfQty': '法定数量', 'dclTotalAmt': '企业申报总价', 'putrecNo': '手账册编号', } IE_MAP = { 'I': '进口', 'E': '出口', } STATUS_MAP = { 'B': '海关终审通过', 'P': '预审批通过', } GENERATE_FILE_DIR = r"d:\周晓琳" def __init__(self, name): self.name = name self.timer = TimeHelper() self.spider = self.SPIDER_MAP.get(self.name)() self.sql = Sql(settings.DATABASES_GOLD_8_3) self.file_path = os.path.join(self.GENERATE_FILE_DIR, self.timer.get_today_str) os.makedirs(self.file_path, exist_ok=True) self.error = [] def generate_file_path(self, ie_flag, status, start_date, end_date): return os.path.join( self.file_path, self.name + ie_flag + status + self.date_2_str(start_date) + '-' + self.date_2_str(end_date)) + '.xlsx' def date_2_str(self, date): if date: return date.strftime("%Y%m%d") else: return "" def get_eid_ieflag(self, bondInvtNo): return bondInvtNo, bondInvtNo[8:9] def write_eid_local(self, rets, ie_flag, status, start_date, end_date): file_path = os.path.join( self.file_path, self.name + ie_flag + status + self.date_2_str(start_date) + '-' + self.date_2_str(end_date)) with open(file_path, 'wb') as f: pickle.dump(rets, f) def load_eid_local(self, ie_flag, status, start_date, end_date): file_path = os.path.join( self.file_path, self.name + ie_flag + status + self.date_2_str(start_date) + '-' + self.date_2_str(end_date)) print('file_path = ', file_path) try: with open(file_path, 'rb') as f: # f 是一个迭代器,只能迭代一次 return pickle.load(f) or [] except: return [] def get_eid_by_single(self, ie_flag, status=None, start_date="", end_date="", i=0): try: rets = self.spider.get_search_post_info_all( ie_flag, status=status, start_date=self.date_2_str(start_date), end_date=self.date_2_str(end_date)) except: print(traceback.print_exc()) rets = [] return rets def get_eids(self, ie_flag, status, start_date, end_date): rets = self.load_eid_local(ie_flag, status, start_date, end_date) if not rets: # 本地没有,就去线上取 rets = self.get_eid_by_single(ie_flag, status, start_date, end_date) if rets: self.write_eid_local(rets, ie_flag, status, start_date, end_date) # 从线上取的序列化到本地 if rets: log.info('共有数据{!r}条'.format(len(rets))) print(list(map(lambda x: x.get('bondInvtNo'), rets))) ret_database = self.sql.select('nems_info_copy', 'bondInvtNo') make_datas = {} for data in rets: make_datas[data['bondInvtNo']] = data tmp = list(map(lambda x: x['bondInvtNo'], rets)) eids = set(tmp) - set( list(map(lambda x: x['bondInvtNo'], ret_database))) if eids != set(tmp): setattr(self, 'duandian', True) else: setattr(self, 'duandian', False) eids = list(map(lambda x: make_datas.get(x), eids)) log.info('还需要下载数据{!r}条'.format(len(eids))) else: # 线上没取到,就赋为空列表 eids = [] log.info('共有数据0条') return eids def get_data_2_sql(self, ie_flag, rets=None, status=None, start_date="", end_date="", retry=0): """从单一窗口拿数据""" self.error.clear() # 清空错误 if not rets: rets = self.get_eids(ie_flag, status, start_date, end_date) if rets and not getattr(self, 'duandian', False): # 如果不是断点传输的话,清空数据库 self.sql.delete('nems_info_copy', where={'id__gt': 0}) print('数据库已清空') for index, ret in enumerate(rets): bondInvtNo = ret.get('bondInvtNo') try: with aniu_atomic(self.sql): seqno = ret.get('seqNo') listStatName = ret.get('listStatName') vrfdedMarkcdName = ret.get('vrfdedMarkcdName') head, lists = self.spider.get_head_and_list_detail(seqno) inputTime = head.get('inputTime') invtIochkptStucdName = head.get('invtIochkptStucdName') rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm') putrecNo = head.get('putrecNo') for list_info in lists: gdsSeqno = list_info.get( 'putrecSeqno') or list_info.get('param3') or '' gdsMtno = list_info.get('gdsMtno') gdecd = list_info.get('gdecd') gdsNm = list_info.get('gdsNm') dclCurrcdName = list_info.get('dclCurrcdName') dclUnitcdName = list_info.get('dclUnitcdName') lawfUnitcdName = list_info.get('lawfUnitcdName') dclQty = list_info.get('dclQty') lawfQty = list_info.get('lawfQty') dclTotalAmt = list_info.get('dclTotalAmt') _dict = { 'bondInvtNo': bondInvtNo, 'listStatName': listStatName, 'vrfdedMarkcdName': vrfdedMarkcdName, 'inputTime': inputTime, 'invtIochkptStucdName': invtIochkptStucdName, 'rltEntryBizopEtpsNm': rltEntryBizopEtpsNm, 'gdsSeqno': gdsSeqno, 'gdsMtno': gdsMtno, 'gdecd': gdecd, 'gdsNm': gdsNm, 'dclCurrcdName': dclCurrcdName, 'dclUnitcdName': dclUnitcdName, 'lawfUnitcdName': lawfUnitcdName, 'dclQty': dclQty, 'lawfQty': lawfQty, 'dclTotalAmt': dclTotalAmt, 'putrecNo': putrecNo, } self.sql.insert('nems_info_copy', **_dict) log.info('第{0}条数据{1}插入成功'.format(index + 1, bondInvtNo)) except: log.info(traceback.format_exc()) self.error.append(ret) else: if not rets and retry < 2: # # 只有当rets获取为空才会走这个分支 重试三次 self.get_data_2_sql(ie_flag, rets=rets, status=status, start_date=start_date, end_date=end_date, retry=retry + 1) if rets: # 只有获取到ret不为空的时候才会走这个分支,所以下面的最多只会打一次 log.info('数据已全部插入') if self.error: log.info('错误记录: {}'.format(self.error)) def generate_excel(self, ie_flg, status, start_date, end_date): conn = pymysql.connect(**settings.DATABASES_GOLD_8_3) sql = 'select * from nems_info_copy' data = pd.read_sql(sql, con=conn) data.drop('id', axis=1, inplace=True) data.columns = list(map(lambda x: self.FIELDS_MAP.get(x), data.columns)) generate_file_path = self.generate_file_path(ie_flg, status, start_date, end_date) if any([not data.empty, self.error]): try: with pd.ExcelWriter(generate_file_path) as writer: if not data.empty: data.to_excel(writer, sheet_name='数据统计', index=None) log.warning('self.error = {}'.format(self.error)) if self.error: print("pd.DataFrame({'海关编号': self.error}) = ", pd.DataFrame({'海关编号': self.error})) pd.DataFrame({ '海关编号': self.error }).to_excel(writer, sheet_name='错误记录', index=None) # 写入报错的记录 except: log.info(traceback.format_exc()) log.info("data = {0}, self.error = {1}".format( data, self.error)) self.sql.delete('nems_info_copy', where={'id__gt': 0}) @used_time def run(self, *args): if len(args): rets = self._get_search_conditions(*args) else: rets = self.get_search_conditions() # for ie_flag, status, start_date, end_date in rets: self.get_data_2_sql(ie_flag, status=status, start_date=start_date, end_date=end_date) rety_count = 3 while rety_count: rety_count -= 1 rets = self.get_eids(ie_flag, status, start_date, end_date) if rets: self.get_data_2_sql(ie_flag, rets=rets, status=status, start_date=start_date, end_date=end_date) else: break if rety_count == 0: ret = self.get_eids(ie_flag, status, start_date, end_date) if ret: log.warning(f'还是有漏网之鱼{ret}') self.generate_excel(self.IE_MAP.get(ie_flag), self.STATUS_MAP.get(status), start_date, end_date) def get_month_first_last_day(self, year=None, month=None): if year: assert isinstance( year, int) and year <= self.timer.year, '年份输入有误,年份应是小于{}的正整数'.format( self.timer.year) if month: assert isinstance(month, int) and month in range( 1, 13), '月份输入有误,应是大于0小于13的正整数' return (self.timer.get_month_first_day(year=year, month=month), self.timer.get_month_last_day(year=year, month=month)) def get_search_conditions(self, today=True): ret = [] rets = [] data = [] ie_list = ['I', 'E'] status_list = ['B', 'P'] for i in ie_list: for k in status_list: ret.append([i, k]) if today: rets.append((self.timer.get_today, self.timer.get_today)) else: for month in range(1, 3): rets.append(self.get_month_first_last_day(month=month)) rets.append((self.timer.get_month_first_day(month=3), "")) for i in ret: for start, end in rets: tmp = copy.deepcopy(i) tmp.extend([start, end]) data.append(tmp) return data def _get_search_conditions(self, start_time, end_time): ret = [] rets = [] data = [] ie_list = ['I', 'E'] status_list = ['B', 'P'] from itertools import product for i, k in product(ie_list, status_list): ret.append([i, k]) rets.append((self.timer.strptime(start_time), self.timer.strptime(end_time) if end_time else "")) for i in ret: for start, end in rets: tmp = copy.deepcopy(i) tmp.extend([start, end]) data.append(tmp) return data
def __init__(self, file_path): self.file_path = file_path self.sql = Sql(self.DATABASES)
class SyncSqlGold(object): """从线上数据库同步gold_8.1数据库""" def __init__(self): self.sql_gold = Sql(settings.DATABASES_GOLD_8_1) self.__sql_online = Sql(settings.DATABASES) # 双下划綫开头,表示为私有对象,只可被类内访问,不可继承 def query_online(self, sql, *args): return self.__sql_online.raw_sql(sql, *args) def run_sync(self): self.sync_npts_db() self.sync_nems_db() self.sync_bwl_db() self.sync_special_bwl_db() def sync_npts_db(self): self.sync_npts_head() self.sync_npts_img() self.sync_npts_exg() self.sync_npts_cm() def sync_nems_db(self): self.sync_nems_head() self.sync_nems_img() self.sync_nems_exg() self.sync_nems_cm() def sync_bwl_db(self): self.sync_bwl_head() self.sync_bwl_list() def sync_special_bwl_db(self): self.sync_special_bwl_head() self.sync_special_bwl_list() def get_distinct_seqno(self, sql, table_name): """获取表中所有不重复的SEQNO""" ret = [] sql_str = "SELECT DISTINCT SEQNO from {}".format(table_name) results = sql(sql_str) if results.get('status'): for seqno in results.get('ret_tuples'): ret.append(seqno[0]) # 列表的方式就是数据大了这个方式就行不通了,会太耗内存 return ret def take_the_difference_set(self, list_online, list_gold): """取线上与GOLD的差集""" return list(set(list_online)-set(list_gold)) def sync_tatm_seq(self, miss_seqno_list, table_name): for seqno in miss_seqno_list: rets = self.__sql_online.select(table_name, where={'SEQNO': seqno}) for ret in rets: ret.pop('id') self.sql_gold.insert(table_name, **ret) def get_miss_seqno_list(self, table_name): seq_online_list = self.get_distinct_seqno(self.query_online, table_name) seq_gold_list = self.get_distinct_seqno(self.sql_gold.raw_sql, table_name) return self.take_the_difference_set(seq_online_list, seq_gold_list) def sync_npts_head(self): miss_seqno_list = self.get_miss_seqno_list('NptsEmlHead') self.sync_tatm_seq(miss_seqno_list, 'NptsEmlHead') def sync_npts_img(self): miss_seqno_list = self.get_miss_seqno_list('NptsEmlImgType') self.sync_tatm_seq(miss_seqno_list, 'NptsEmlImgType') self.sync_update_tatm_list('NptsEmlImgType', 'GDSSEQNO') def sync_npts_exg(self): miss_seqno_list = self.get_miss_seqno_list('NptsEmlExgType') self.sync_tatm_seq(miss_seqno_list, 'NptsEmlExgType') self.sync_update_tatm_list('NptsEmlExgType', 'GDSSEQNO') def sync_npts_cm(self): miss_seqno_list = self.get_miss_seqno_list('NptsEmlConsumeType') self.sync_tatm_seq(miss_seqno_list, 'NptsEmlConsumeType') self.sync_update_tatm_list('NptsEmlConsumeType', 'GSEQNO') def sync_nems_head(self): miss_seqno_list = self.get_miss_seqno_list('NemsHead') self.sync_tatm_seq(miss_seqno_list, 'NemsHead') def sync_nems_img(self): miss_seqno_list = self.get_miss_seqno_list('NemsImgList') self.sync_tatm_seq(miss_seqno_list, 'NemsImgList') self.sync_update_tatm_list('NemsImgList', 'GDSSEQNO') def sync_nems_exg(self): miss_seqno_list = self.get_miss_seqno_list('NemsExgList') self.sync_tatm_seq(miss_seqno_list, 'NemsExgList') self.sync_update_tatm_list('NemsExgList', 'GDSSEQNO') def sync_nems_cm(self): miss_seqno_list = self.get_miss_seqno_list('NemsCmList') self.sync_tatm_seq(miss_seqno_list, 'NemsCmList') self.sync_update_tatm_list('NemsCmList', 'GSEQNO') def sync_bwl_seqno(self, miss_seqno_list, table_name): for seqno in miss_seqno_list: rets = self.__sql_online.select(table_name, where={'SeqNo': seqno}) for ret in rets: ret.pop('Id') self.sql_gold.insert(table_name, **ret) def get_bwl_miss_seqno_list(self, table_name): seq_online_list = self.get_distinct_seqno(self.query_online, table_name) seq_gold_list = self.get_distinct_seqno(self.sql_gold.raw_sql, table_name) return self.take_the_difference_set(seq_online_list, seq_gold_list) def sync_bwl_head(self): miss_seqno_list = self.get_bwl_miss_seqno_list('BwlHeadType') self.sync_bwl_seqno(miss_seqno_list, 'SpecialBwlHeadType') def sync_special_bwl_head(self): miss_seqno_list = self.get_bwl_miss_seqno_list('SpecialBwlHeadType') self.sync_bwl_seqno(miss_seqno_list, 'BwlHeadType') def sync_update_2_max(self, table_name, field, seqno): local_max_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.sql_gold, table_name, field, seqno) online_max_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.__sql_online, table_name, field, seqno) if local_max_gdsseqno < online_max_gdsseqno: rets = self.__sql_online.select(table_name, where={'SeqNo': seqno, '{}__gt'.format(field): local_max_gdsseqno}) for ret in rets: if ret.__contains__('Id'): ret.pop('Id') elif ret.__contains__('id'): ret.pop('id') else: pass self.sql_gold.insert(table_name, **ret) def sync_update_2_min(self, table_name, field, seqno): local_min_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.sql_gold, table_name, field, seqno, max=False) online_min_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.__sql_online, table_name, field, seqno, max=False) if local_min_gdsseqno > online_min_gdsseqno: rets = self.__sql_online.select(table_name, where={'SeqNo': seqno, '{}__lt'.format(field): local_min_gdsseqno}) for ret in rets: if ret.__contains__('Id'): ret.pop('Id') elif ret.__contains__('id'): ret.pop('id') else: pass self.sql_gold.insert(table_name, **ret)\ @error_2_send_email def sync_update_tatm_list(self, table_name, field): assert not self.get_miss_seqno_list(table_name), '数据库{}的SeqNo同步未完成'.format(table_name) for seqno in self.get_distinct_seqno(self.query_online, table_name): self.sync_update_2_max(table_name, field, seqno) self.sync_update_2_min(table_name, field, seqno) @error_2_send_email def sync_update_bwl_list(self, table_name, field): assert not self.get_bwl_miss_seqno_list(table_name), '数据库{}的SeqNo同步未完成'.format(table_name) for seqno in self.get_distinct_seqno(self.query_online, table_name): self.sync_update_2_max(table_name, field, seqno) self.sync_update_2_min(table_name, field, seqno) def sync_bwl_list(self): miss_seqno_list = self.get_bwl_miss_seqno_list('BwlListType') self.sync_bwl_seqno(miss_seqno_list, 'BwlListType') self.sync_update_bwl_list('BwlListType', 'GdsSeqno') def sync_special_bwl_list(self): miss_seqno_list = self.get_bwl_miss_seqno_list('SpecialBwlListType') self.sync_bwl_seqno(miss_seqno_list, 'SpecialBwlListType') self.sync_update_bwl_list('SpecialBwlListType', 'GdsSeqno') def get_local_db_max_or_min_gdsseqno(self, sql, tabname, field, seqNo, max=True): if max: _sql = 'SELECT max({}) as gdsSeqno FROM {} WHERE SEQNO = '.format(field, tabname) + "%s" else: _sql = 'SELECT min({}) as gdsSeqno FROM {} WHERE SEQNO = '.format(field, tabname) + "%s" ret = sql.raw_sql(_sql, seqNo) if ret.get('status'): gdsSeqno = ret['ret_tuples'][0][0] return gdsSeqno def sync_spec_bwl_list(self): """此函數無用,是因為爬錯表,所以用來更新另外一張表的""" seqno_list = ['201800000000030528', '201800000000030527', '201800000000030485', '201800000000005604'] for seqno in seqno_list: ret_list = self.__sql_online.select('SpecialBwlHeadType', '*', where={'SeqNo': seqno}) for ret in ret_list: self.sql_gold.insert('SpecialBwlHeadType', **ret) for seqno in seqno_list: ret_list = self.__sql_online.select('SpecialBwlListType', '*', where={'SeqNo': seqno}) for ret in ret_list: ret.pop('Id') self.sql_gold.insert('SpecialBwlListType', **ret) def update_spec_bwl_list(self): """此函數無用,更新BwlList2Head字段""" seqno_list = ['201800000000030528', '201800000000030527', '201800000000030485', '201800000000005604'] for seqno in seqno_list: id = self.sql_gold.select('SpecialBwlHeadType', 'Id', where={'SeqNo': seqno}) print('id = ', id) if id: self.sql_gold.update('SpecialBwlListType', where={'SeqNo': seqno}, BwlList2Head=id[0].get('Id')) def update_bwl_list2head(self): self.__sql_online.update('BwlListType', no_where=True, BwlList2Head=2)
def __init__(self): self.sql_gold = Sql(settings.DATABASES_GOLD_8_1) self.__sql_online = Sql(settings.DATABASES) # 双下划綫开头,表示为私有对象,只可被类内访问,不可继承
class AutoWin(BaseCls): """自动录入数据""" DATABASES = { 'host': 'gz-cdb-ld4ka6l5.sql.tencentcdb.com', 'port': 63482, 'user': '******', 'password': '******', 'db': 'GMBGTEO', 'charset': 'utf8', } CLASS_NAME = ['WindowsForms10.Window.8.app.0.201d787_r14_ad1', 'WindowsForms10.Window.8.app.0.21093c0_r15_ad1', 'WindowsForms10.Window.8.app.0.33ec00f_r14_ad1', 'WindowsForms10.Window.8.app.0.134c08f_r15_ad1', 'WindowsForms10.Window.8.app.0.201d787_r9_ad1'] def __init__(self): self.resister_singal() self.sql = Sql(self.DATABASES) self.errors = [] self.rate_0 = [] self.file_path = os.path.join(GetExcelData.BASE_PATH, '退税记录.xls') # self.data = GetExcelData().parse_excel() self.data = GetExcelData().new_parse_excel() self.init_app() def handle(self, signalnum, *args, **kwargs): if signalnum == signal.SIGINT: self.print_error(" --- 检测到 Ctrl + C, 将暂停程序 ---") # self.print_error(' --- 程序退出 ---') # raise SystemExit() os.system("pause") self.dlg.click_input() def init_app(self, index=0): """实例化app""" try: app = application.Application().connect(title_re='外贸企业出口退税申报系统2.1版', class_name=self.CLASS_NAME[index]) # 实例化 dlg = app['外贸企业出口退税申报系统2.1版'] # print('dlg = ', dir(dlg)) # print('1 = ', dlg.PrintControlIdentifiers) # print('2 = ', dlg.print_control_identifiers()) setattr(self, 'app', app) setattr(self, 'dlg', dlg) setattr(self, 'class_name', self.CLASS_NAME[index]) except: if index >= len(self.CLASS_NAME): self.print_error(' ----- 请打开退税系统 !!! -----') index = -1 self.healper() index += 1 self.init_app(index) @staticmethod def get_total(datas): total = 0 for data in datas: total += int(data.get('数量')) return total def auto_operate(self, datas): for data in datas: try: # TODO 程序出错处理,比如点击提示框或者点击确认框 self.dlg.click_input() # os.system("pause") # self.give_add() self.dlg.Edit0.set_text(data.get('关联号')) self.dlg.Edit4.set_text(data.get('申报年月')) self.dlg.Edit5.set_text(data.get('申报批次')) self.dlg.Edit6.set_text(data.get('序号')) self.dlg.Edit9.set_text(data.get('进货凭证号')) self.dlg.Edit11.set_text(data.get('供货方纳税号')) self.dlg.Edit12.set_text(str(data.get('开票日期'))) self.dlg.Edit13.set_text(data.get('商品编码')[:8]) self.dlg.Edit13.type_keys('~') # 商品代码按回车 unit = self.dlg.Edit15.texts()[0] # 单位 assert unit, '单位不可为空' rate = self.dlg.Edit20.texts()[0] # 退税率 assert self.is_float_or_number(rate), '退税率不可为空' if self.is_rate_0(rate, data): continue self.dlg.Edit16.set_text(self.deal_unit(unit, datas, data)) # 数量 # self.dlg.Edit16.set_text(50) # 数量 self.dlg.Edit17.set_text(data.get('调整后人民币不含税总价')) # 计税金额 self.dlg.Edit17.type_keys('~') # 计税金额按回车 self.dlg.Edit25.type_keys('~') # 备注按回车 time.sleep(1) self.click_sys_prompt('保存') time.sleep(1) except Exception as e: log.error(traceback.format_exc()) self.click_sys_ask('确认') self.click_sys_prompt('确认') self.give_up() # 放弃 self.print_error('->'.join((str(e), str(data)))) log.error('->'.join((str(e), str(data)))) self.errors.append(data) time.sleep(1) def give_add(self): """增加""" print("self.dlg['增加'] = ", self.dlg['增加']) if self.dlg['放弃'].is_enabled(): self.give_up() self.click_button('增加') def give_up(self): """放弃""" self.click_button('放弃') self.click_sys_ask('确认') def is_rate_0(self, rate, data): """处理退税率为0的情况""" if rate and int(rate.split('.')[0]) == 0: self.rate_0.append(data) return True return False def deal_unit(self, unit, datas, data): """处理单位问题""" if pd.isnull(data.get('报关单号')): return data.get('数量') declist_keys = ['CodeTs', 'GName', 'GUnit', 'FirstUnit', 'GQty', 'FirstQty', 'DeclPrice'] msgs = {'DecMsg': 'DecList', 'SpecialDecmsg': 'SpecialDeclist', 'SpiderDecMsg': 'SpiderDeclist', 'HisDecMsg': 'HisDeclist'} for msg in msgs: ret = self.sql.select(msg, 'DecId', where={'QpEntryId': data.get('报关单号'), 'DeleteFlag': 0}, first=True) if ret: dec_lists = self.sql.select(msgs.get(msg), *declist_keys, where={'DecId': ret.get('DecId'), 'DeleteFlag': 0}) log.debug('报关单号:{0!r}, msg: {1!r}, list: {2!r}'.format(data.get('报关单号'), msg, msgs.get(msg))) break else: raise Exception('报关单号{0!r}不存在,请检查'.format(data.get('报关单号'))) total = self.get_total(datas) # 应该是总量要相等 declist = self.confirm_declist(data, dec_lists) assert declist, '未找到对应表体记录,关联号:{0!r},商品编码:{1!r},商品名称:{2!r},数量:{3!r},报关单号:{4!r}'.format( data['关联号'], data['商品编码'], data['商品名称'], data['数量'], data['报关单号']) _map = { self.unit_code_2_name(declist['GUnit']): declist['GQty'], self.unit_code_2_name(declist['FirstUnit']): declist['FirstQty'], } log.debug('_map.get(unit) = {}'.format(_map.get(unit))) log.debug("Decimal(data['数量']).quantize(Decimal('0.0000')) = {}".format(Decimal(data['数量']).quantize(Decimal('0.0000')))) log.debug("declist['GQty'] = {}".format(declist['GQty'])) return str(_map.get(unit)*Decimal(data['数量']).quantize(Decimal('0.0000'))/declist['GQty']) # 报关单数量(个/千克)*模板数量(个)/申报数量(个) def unit_code_2_name(self, unit): """单位code转名称""" unit_name = self.sql.select('UnitCode', 'Name', where={'Code': unit}, first=True) assert unit_name, '转换单位失败,在UnitCode表中不存在Code等于{0!r}的记录'.format(unit) return unit_name.get('Name') def confirm_declist(self, data, declists): """确认表体""" for declist in declists: if declist['CodeTs'] == data['商品编码'] and declist['GName'] == data['商品名称'] and ( declist['GQty'] == Decimal(data['数量']).quantize(Decimal('0.0000')) or declist['FirstQty'] == Decimal(data['数量']).quantize(Decimal('0.0000'))): # 商品编码+品名确定报关单表体 return declist def write_error_and_rate0_2_file(self): """将错误和rate为0的记录下来并写入excel""" log.debug('发生错误没录入的记录 = {}'.format(self.errors)) log.debug('退税率为零的记录 = {}'.format(self.rate_0)) with pd.ExcelWriter(self.file_path) as writer: if self.errors: pd.DataFrame(self.errors).to_excel(writer, sheet_name='错误记录', index=None) # 写入报错的记录 if self.rate_0: pd.DataFrame(self.rate_0).to_excel(writer, sheet_name='退税率为0的记录', index=None) # 写入退税率为0的记录 def click_sys_ask(self, value): """系统询问点确认""" try: self.app.window(title_re='系统询问', class_name=self.class_name)[value].click() # 确认 except: pass def click_sys_prompt(self, value): """系统提示点确认""" try: self.app.window(title_re='系统提示', class_name=self.class_name)[value].click() # 确认 except: pass def click_button(self, button_name): if self.dlg[button_name].is_enabled(): self.dlg[button_name].click() self.dlg[button_name].wait('visible') def is_float_or_number(self, v): regex = re.compile(r'(^-?\d+?.\d+?$)|(^-?\d+$)') if re.match(regex, v): return True return False def retry_errors(self, retry=0): if self.errors: retry_errors = self.errors[:] self.errors.clear() self.auto_operate(retry_errors) else: return if retry > 0: self.retry_errors(retry-1) def run(self): for data in self.data: if not isinstance(data, list): data = [data] self.auto_operate(data) self.retry_errors(RETRY_ERROR_COUNT) self.write_error_and_rate0_2_file() print(' 谢谢您的使用,祝您生活愉快,再见') print(' 程序执行完毕') time.sleep(1)
class GenerateExcel(object): SPIDER_MAP = { "机保": NemsSpider, "前海": SpecialNemsSpider, } FIELDS_MAP = { 'bondInvtNo': '清单编号', 'listStatName': '数据状态', 'vrfdedMarkcdName': '核扣标志', 'inputTime': '录入时间', 'invtIochkptStucdName': '清单进出卡口状态', 'rltEntryBizopEtpsNm': '关联报关单境内收发货人名称', 'gdsSeqno': '备案序号/自动备案序号', 'gdsMtno': '商品料号', 'gdecd': '商品编码', 'gdsNm': '商品名称', 'dclCurrcdName': '币制', 'dclUnitcdName': '申报计量单位', 'lawfUnitcdName': '法定计量单位', 'dclQty': '申报数量', 'lawfQty': '法定数量', 'dclTotalAmt': '企业申报总价', 'putrecNo': '手账册编号', } GENERATE_FILE_DIR = r"d:\周晓琳" def __init__(self, file_path, name): self.spider = self.SPIDER_MAP.get(name)() self.sql = Sql(settings.DATABASES_GOLD_8_3) self.timer = TimeHelper() self.file_path = file_path os.makedirs(self.GENERATE_FILE_DIR, exist_ok=True) self.generate_file_path = os.path.join(self.GENERATE_FILE_DIR, name+self.timer.get_str_now_time) + '.xlsx' self.error = [] def get_eid_ieflag(self, bondInvtNo): return bondInvtNo, bondInvtNo[8:9] def get_bondInvtNos(self): data = pd.read_excel(self.file_path, skiprows=0, sheet_name='核注清单', converters={'海关编号': str}) data = data.drop_duplicates() # 列去重,只有一列,所以不需要制定那一列了 print('共有数据{}条'.format(data.ix[:, 0].size)) setattr(self, 'datas', data.loc[:, ['海关编号']]) # print('self.datas = ', self.datas, ', type = ', type(self.datas)) # for index, _data in enumerate(data.ix[:, 0]): # yield _data # print('已处理{0}条数据, sqeno = {1}'.format(index+1, _data)) # def generate_excel(self, datas): # f = pd.concat(datas, ignore_index=True) # 将DataFrame纵向连接起来 # f.to_excel('D://单一窗口核注清单.xlsx', index_label='序号') def get_data_2_sql(self): """从单一窗口拿数据""" self.get_bondInvtNos() for index, bondInvtNo in enumerate(self.datas.ix[:, 0]): try: with aniu_atomic(self.sql): if len(str(bondInvtNo)) < 18 or self.sql.select( 'nems_info_20190313', 'id', where={'bondInvtNo': bondInvtNo}, first=True): print('bondInvtNo = ', bondInvtNo, len(str(bondInvtNo)), ', 数据库可能已存在此数据') continue ret = self.spider._get_search_post_info('', *self.get_eid_ieflag(bondInvtNo)) seqno = ret.get('seqNo') listStatName = ret.get('listStatName') regex = re.compile(r'(删除)|(退单)|(暂存)') if re.search(regex, listStatName): raise Exception('状态为{}的单不处理,直接跳过') vrfdedMarkcdName = ret.get('vrfdedMarkcdName') head, lists = self.spider.get_head_and_list_detail(seqno) inputTime = head.get('inputTime') invtIochkptStucdName = head.get('invtIochkptStucdName') rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm') putrecNo = head.get('putrecNo') for list_info in lists: gdsSeqno = list_info.get('putrecSeqno') or list_info.get('param3') or '' gdsMtno = list_info.get('gdsMtno') gdecd = list_info.get('gdecd') gdsNm = list_info.get('gdsNm') dclCurrcdName = list_info.get('dclCurrcdName') dclUnitcdName = list_info.get('dclUnitcdName') lawfUnitcdName = list_info.get('lawfUnitcdName') dclQty = list_info.get('dclQty') lawfQty = list_info.get('lawfQty') dclTotalAmt = list_info.get('dclTotalAmt') _dict = { 'bondInvtNo': bondInvtNo, 'listStatName': listStatName, 'vrfdedMarkcdName': vrfdedMarkcdName, 'inputTime': inputTime, 'invtIochkptStucdName': invtIochkptStucdName, 'rltEntryBizopEtpsNm': rltEntryBizopEtpsNm, 'gdsSeqno': gdsSeqno, 'gdsMtno': gdsMtno, 'gdecd': gdecd, 'gdsNm': gdsNm, 'dclCurrcdName': dclCurrcdName, 'dclUnitcdName': dclUnitcdName, 'lawfUnitcdName': lawfUnitcdName, 'dclQty': dclQty, 'lawfQty': lawfQty, 'dclTotalAmt': dclTotalAmt, 'putrecNo': putrecNo, } self.sql.insert('nems_info_20190313', **_dict) print('第{0}条数据{1}插入成功'.format(index+1, bondInvtNo)) except Exception as e: if str(e).find('直接跳过') > -1: print(bondInvtNo, str(e)) continue print(traceback.print_exc()) self.error.append(bondInvtNo) if self.error: print('错误记录: {}'.format(','.join(self.error))) print('数据已全部插入') def get_data(self): """帮晓林哥统计核注清单清单编号对应的备案序号/自动备案序号对应申报数量""" for bondInvtNo in self.get_bondInvtNos(): _dict = {'清单编号': [], '备案序号/自动备案序号': [], '申报数量': [], '数据状态': [], '核扣标志': [], '录入时间': [], '清单进出卡口状态': [], '关联报关单境内收发货人名称': [], '商品料号': [], '商品编码': [], '商品名称': [], '币制': [], '申报计量单位': [], '法定计量单位': [], '法定数量': [], '企业申报总价': [],} ret = self.spider._get_search_post_info('', *self.get_eid_ieflag(bondInvtNo)) seqno = ret.get('seqNo') listStatName = ret.get('listStatName') vrfdedMarkcdName = ret.get('vrfdedMarkcdName') head, lists = self.spider.get_head_and_list_detail(seqno) inputTime = head.get('inputTime') invtIochkptStucdName = head.get('invtIochkptStucdName') rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm') for list_info in lists: gdsSeqno = list_info.get('putrecSeqno') or list_info.get('param3') or '' gdsMtno = list_info.get('gdsMtno') gdecd = list_info.get('gdecd') gdsNm = list_info.get('gdsNm') dclCurrcdName = list_info.get('dclCurrcdName') dclUnitcdName = list_info.get('dclUnitcdName') lawfUnitcdName = list_info.get('lawfUnitcdName') dclQty = list_info.get('dclQty') lawfQty = list_info.get('lawfQty') dclTotalAmt = list_info.get('dclTotalAmt') _dict['清单编号'].append(bondInvtNo) _dict['数据状态'].append(listStatName) _dict['核扣标志'].append(vrfdedMarkcdName) _dict['录入时间'].append(inputTime) _dict['清单进出卡口状态'].append(invtIochkptStucdName) _dict['关联报关单境内收发货人名称'].append(rltEntryBizopEtpsNm) _dict['备案序号/自动备案序号'].append(gdsSeqno) _dict['商品料号'].append(gdsMtno) _dict['商品编码'].append(gdecd) _dict['商品名称'].append(gdsNm) _dict['币制'].append(dclCurrcdName) _dict['申报计量单位'].append(dclUnitcdName) _dict['法定计量单位'].append(lawfUnitcdName) _dict['申报数量'].append(dclQty) _dict['法定数量'].append(lawfQty) _dict['企业申报总价'].append(dclTotalAmt) f = pd.DataFrame(_dict) yield f def generate_excel(self): conn = pymysql.connect(**settings.DATABASES_GOLD_8_3) sql = 'select * from nems_info_20190313' data = pd.read_sql(sql, con=conn) data.drop('id', axis=1, inplace=True) data.columns = list(map(lambda x: self.FIELDS_MAP.get(x), data.columns)) print('self.generate_file_path = ', self.generate_file_path) with pd.ExcelWriter(self.generate_file_path) as writer: if hasattr(self, 'datas') and not self.datas.empty: self.datas.to_excel(writer, sheet_name='原始数据', index=None) if not data.empty: data.to_excel(writer, sheet_name='数据统计', index=None) print('self.error = ', self.error) if self.error: pd.DataFrame({'海关编号': self.error}).to_excel(writer, sheet_name='错误记录', index=None) # 写入报错的记录 @used_time def run(self): self.sql.delete('nems_info_20190313', where={'id__gt': 0}) self.get_data_2_sql() self.generate_excel()