コード例 #1
0
class CustomsUpdate:
    DATABASES = {
        'host': '111.230.242.51',
        'port': 3306,
        'user': '******',
        'password': '******',
        'db': 'goldtwo8.1',
        'charset': 'utf8',
    }

    def __init__(self, file_path):
        self.file_path = file_path
        self.sql = Sql(self.DATABASES)

    def parse_excel(self):
        data = pd.read_excel(self.file_path)
        data.columns = ['code', 'name']
        for row in data.itertuples(index=False, name='Pandas'):
            yield {'code': row[0], 'name': row[1], 'NameAbbr': ''}

    def write_2_sql(self, data):
        print('data = ', data)
        self.sql.insert('CustomsCode', **data)

    def run(self):
        with aniu_atomic(self.sql):
            for index, data in enumerate(self.parse_excel()):
                self.write_2_sql(data)
                print('第{}条数据已插入'.format(index + 1))
        print('插入完成')
コード例 #2
0
ファイル: generate_excel.py プロジェクト: Zzaniu/nothing
 def __init__(self, file_path, name):
     self.spider = self.SPIDER_MAP.get(name)()
     self.sql = Sql(settings.DATABASES_GOLD_8_3)
     self.timer = TimeHelper()
     self.file_path = file_path
     os.makedirs(self.GENERATE_FILE_DIR, exist_ok=True)
     self.generate_file_path = os.path.join(self.GENERATE_FILE_DIR, name+self.timer.get_str_now_time) + '.xlsx'
     self.error = []
コード例 #3
0
 def __init__(self, name):
     self.name = name
     self.timer = TimeHelper()
     self.spider = self.SPIDER_MAP.get(self.name)()
     self.sql = Sql(settings.DATABASES_GOLD_8_3)
     self.file_path = os.path.join(self.GENERATE_FILE_DIR,
                                   self.timer.get_today_str)
     os.makedirs(self.file_path, exist_ok=True)
     self.error = []
コード例 #4
0
ファイル: test_excel.py プロジェクト: Zzaniu/HGSpiders
def to_excel():
    sql = Sql(settings.DATABASES_SERVER)
    dic = collections.OrderedDict()
    a = sql.select('DecList', '*', where={'id': 999}, first=True)
    ret = sql.raw_sql("SELECT * from DecList where GName LIKE '%%集成电路%%'")
    d = pd.DataFrame([pd.Series(i) for i in ret.get('ret_tuples')])
    d.columns = a.keys()
    d.to_excel(r'C:\Financesystem\data\declist.xlsx', index_label='序号')
    print(d)
コード例 #5
0
 def __init__(self):
     self.resister_singal()
     self.sql = Sql(self.DATABASES)
     self.errors = []
     self.rate_0 = []
     self.file_path = os.path.join(GetExcelData.BASE_PATH, '退税记录.xls')
     # self.data = GetExcelData().parse_excel()
     self.data = GetExcelData().new_parse_excel()
     self.init_app()
コード例 #6
0
class GenerateExcel(object):
    SPIDER_MAP = {
        "机保": NemsSpider,
        "前海": SpecialNemsSpider,
    }
    FIELDS_MAP = {
        'bondInvtNo': '清单编号',
        'listStatName': '数据状态',
        'vrfdedMarkcdName': '核扣标志',
        'inputTime': '录入时间',
        'invtIochkptStucdName': '清单进出卡口状态',
        'rltEntryBizopEtpsNm': '关联报关单境内收发货人名称',
        'gdsSeqno': '备案序号/自动备案序号',
        'gdsMtno': '商品料号',
        'gdecd': '商品编码',
        'gdsNm': '商品名称',
        'dclCurrcdName': '币制',
        'dclUnitcdName': '申报计量单位',
        'lawfUnitcdName': '法定计量单位',
        'dclQty': '申报数量',
        'lawfQty': '法定数量',
        'dclTotalAmt': '企业申报总价',
        'putrecNo': '手账册编号',
    }
    IE_MAP = {
        'I': '进口',
        'E': '出口',
    }
    STATUS_MAP = {
        'B': '海关终审通过',
        'P': '预审批通过',
    }
    GENERATE_FILE_DIR = r"d:\周晓琳"

    def __init__(self, name):
        self.name = name
        self.timer = TimeHelper()
        self.spider = self.SPIDER_MAP.get(self.name)()
        self.sql = Sql(settings.DATABASES_GOLD_8_3)
        self.file_path = os.path.join(self.GENERATE_FILE_DIR,
                                      self.timer.get_today_str)
        os.makedirs(self.file_path, exist_ok=True)
        self.error = []

    def generate_file_path(self, ie_flag, status, start_date, end_date):
        return os.path.join(
            self.file_path,
            self.name + ie_flag + status + self.date_2_str(start_date) + '-' +
            self.date_2_str(end_date)) + '.xlsx'

    def date_2_str(self, date):
        if date:
            return date.strftime("%Y%m%d")
        else:
            return ""

    def get_eid_ieflag(self, bondInvtNo):
        return bondInvtNo, bondInvtNo[8:9]

    def write_eid_local(self, rets, ie_flag, status, start_date, end_date):
        file_path = os.path.join(
            self.file_path, self.name + ie_flag + status +
            self.date_2_str(start_date) + '-' + self.date_2_str(end_date))
        with open(file_path, 'wb') as f:
            pickle.dump(rets, f)

    def load_eid_local(self, ie_flag, status, start_date, end_date):
        file_path = os.path.join(
            self.file_path, self.name + ie_flag + status +
            self.date_2_str(start_date) + '-' + self.date_2_str(end_date))
        print('file_path = ', file_path)
        try:
            with open(file_path, 'rb') as f:  # f 是一个迭代器,只能迭代一次
                return pickle.load(f) or []
        except:
            return []

    def get_eid_by_single(self,
                          ie_flag,
                          status=None,
                          start_date="",
                          end_date="",
                          i=0):
        try:
            rets = self.spider.get_search_post_info_all(
                ie_flag,
                status=status,
                start_date=self.date_2_str(start_date),
                end_date=self.date_2_str(end_date))
        except:
            print(traceback.print_exc())
            rets = []
        return rets

    def get_eids(self, ie_flag, status, start_date, end_date):
        rets = self.load_eid_local(ie_flag, status, start_date, end_date)
        if not rets:  # 本地没有,就去线上取
            rets = self.get_eid_by_single(ie_flag, status, start_date,
                                          end_date)
            if rets:
                self.write_eid_local(rets, ie_flag, status, start_date,
                                     end_date)  # 从线上取的序列化到本地
        if rets:
            log.info('共有数据{!r}条'.format(len(rets)))
            print(list(map(lambda x: x.get('bondInvtNo'), rets)))
            ret_database = self.sql.select('nems_info_copy', 'bondInvtNo')
            make_datas = {}
            for data in rets:
                make_datas[data['bondInvtNo']] = data
            tmp = list(map(lambda x: x['bondInvtNo'], rets))
            eids = set(tmp) - set(
                list(map(lambda x: x['bondInvtNo'], ret_database)))
            if eids != set(tmp): setattr(self, 'duandian', True)
            else: setattr(self, 'duandian', False)
            eids = list(map(lambda x: make_datas.get(x), eids))
            log.info('还需要下载数据{!r}条'.format(len(eids)))
        else:  # 线上没取到,就赋为空列表
            eids = []
            log.info('共有数据0条')
        return eids

    def get_data_2_sql(self,
                       ie_flag,
                       rets=None,
                       status=None,
                       start_date="",
                       end_date="",
                       retry=0):
        """从单一窗口拿数据"""
        self.error.clear()  # 清空错误
        if not rets:
            rets = self.get_eids(ie_flag, status, start_date, end_date)
        if rets and not getattr(self, 'duandian', False):  # 如果不是断点传输的话,清空数据库
            self.sql.delete('nems_info_copy', where={'id__gt': 0})
            print('数据库已清空')
        for index, ret in enumerate(rets):
            bondInvtNo = ret.get('bondInvtNo')
            try:
                with aniu_atomic(self.sql):
                    seqno = ret.get('seqNo')
                    listStatName = ret.get('listStatName')
                    vrfdedMarkcdName = ret.get('vrfdedMarkcdName')
                    head, lists = self.spider.get_head_and_list_detail(seqno)
                    inputTime = head.get('inputTime')
                    invtIochkptStucdName = head.get('invtIochkptStucdName')
                    rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm')
                    putrecNo = head.get('putrecNo')
                    for list_info in lists:
                        gdsSeqno = list_info.get(
                            'putrecSeqno') or list_info.get('param3') or ''
                        gdsMtno = list_info.get('gdsMtno')
                        gdecd = list_info.get('gdecd')
                        gdsNm = list_info.get('gdsNm')
                        dclCurrcdName = list_info.get('dclCurrcdName')
                        dclUnitcdName = list_info.get('dclUnitcdName')
                        lawfUnitcdName = list_info.get('lawfUnitcdName')
                        dclQty = list_info.get('dclQty')
                        lawfQty = list_info.get('lawfQty')
                        dclTotalAmt = list_info.get('dclTotalAmt')
                        _dict = {
                            'bondInvtNo': bondInvtNo,
                            'listStatName': listStatName,
                            'vrfdedMarkcdName': vrfdedMarkcdName,
                            'inputTime': inputTime,
                            'invtIochkptStucdName': invtIochkptStucdName,
                            'rltEntryBizopEtpsNm': rltEntryBizopEtpsNm,
                            'gdsSeqno': gdsSeqno,
                            'gdsMtno': gdsMtno,
                            'gdecd': gdecd,
                            'gdsNm': gdsNm,
                            'dclCurrcdName': dclCurrcdName,
                            'dclUnitcdName': dclUnitcdName,
                            'lawfUnitcdName': lawfUnitcdName,
                            'dclQty': dclQty,
                            'lawfQty': lawfQty,
                            'dclTotalAmt': dclTotalAmt,
                            'putrecNo': putrecNo,
                        }
                        self.sql.insert('nems_info_copy', **_dict)
                    log.info('第{0}条数据{1}插入成功'.format(index + 1, bondInvtNo))
            except:
                log.info(traceback.format_exc())
                self.error.append(ret)
        else:
            if not rets and retry < 2:  # # 只有当rets获取为空才会走这个分支 重试三次
                self.get_data_2_sql(ie_flag,
                                    rets=rets,
                                    status=status,
                                    start_date=start_date,
                                    end_date=end_date,
                                    retry=retry + 1)
        if rets:  # 只有获取到ret不为空的时候才会走这个分支,所以下面的最多只会打一次
            log.info('数据已全部插入')
            if self.error: log.info('错误记录: {}'.format(self.error))

    def generate_excel(self, ie_flg, status, start_date, end_date):
        conn = pymysql.connect(**settings.DATABASES_GOLD_8_3)
        sql = 'select * from nems_info_copy'
        data = pd.read_sql(sql, con=conn)
        data.drop('id', axis=1, inplace=True)
        data.columns = list(map(lambda x: self.FIELDS_MAP.get(x),
                                data.columns))
        generate_file_path = self.generate_file_path(ie_flg, status,
                                                     start_date, end_date)
        if any([not data.empty, self.error]):
            try:
                with pd.ExcelWriter(generate_file_path) as writer:
                    if not data.empty:
                        data.to_excel(writer, sheet_name='数据统计', index=None)
                    log.warning('self.error = {}'.format(self.error))
                    if self.error:
                        print("pd.DataFrame({'海关编号': self.error}) = ",
                              pd.DataFrame({'海关编号': self.error}))
                        pd.DataFrame({
                            '海关编号': self.error
                        }).to_excel(writer, sheet_name='错误记录',
                                    index=None)  # 写入报错的记录
            except:
                log.info(traceback.format_exc())
                log.info("data = {0}, self.error = {1}".format(
                    data, self.error))
            self.sql.delete('nems_info_copy', where={'id__gt': 0})

    @used_time
    def run(self, *args):
        if len(args):
            rets = self._get_search_conditions(*args)
        else:
            rets = self.get_search_conditions()  #
        for ie_flag, status, start_date, end_date in rets:
            self.get_data_2_sql(ie_flag,
                                status=status,
                                start_date=start_date,
                                end_date=end_date)
            rety_count = 3
            while rety_count:
                rety_count -= 1
                rets = self.get_eids(ie_flag, status, start_date, end_date)
                if rets:
                    self.get_data_2_sql(ie_flag,
                                        rets=rets,
                                        status=status,
                                        start_date=start_date,
                                        end_date=end_date)
                else:
                    break
            if rety_count == 0:
                ret = self.get_eids(ie_flag, status, start_date, end_date)
                if ret: log.warning(f'还是有漏网之鱼{ret}')
            self.generate_excel(self.IE_MAP.get(ie_flag),
                                self.STATUS_MAP.get(status), start_date,
                                end_date)

    def get_month_first_last_day(self, year=None, month=None):
        if year:
            assert isinstance(
                year,
                int) and year <= self.timer.year, '年份输入有误,年份应是小于{}的正整数'.format(
                    self.timer.year)
        if month:
            assert isinstance(month, int) and month in range(
                1, 13), '月份输入有误,应是大于0小于13的正整数'
        return (self.timer.get_month_first_day(year=year, month=month),
                self.timer.get_month_last_day(year=year, month=month))

    def get_search_conditions(self, today=True):
        ret = []
        rets = []
        data = []
        ie_list = ['I', 'E']
        status_list = ['B', 'P']
        for i in ie_list:
            for k in status_list:
                ret.append([i, k])
        if today:
            rets.append((self.timer.get_today, self.timer.get_today))
        else:
            for month in range(1, 3):
                rets.append(self.get_month_first_last_day(month=month))
            rets.append((self.timer.get_month_first_day(month=3), ""))
        for i in ret:
            for start, end in rets:
                tmp = copy.deepcopy(i)
                tmp.extend([start, end])
                data.append(tmp)
        return data

    def _get_search_conditions(self, start_time, end_time):
        ret = []
        rets = []
        data = []
        ie_list = ['I', 'E']
        status_list = ['B', 'P']
        from itertools import product
        for i, k in product(ie_list, status_list):
            ret.append([i, k])
        rets.append((self.timer.strptime(start_time),
                     self.timer.strptime(end_time) if end_time else ""))
        for i in ret:
            for start, end in rets:
                tmp = copy.deepcopy(i)
                tmp.extend([start, end])
                data.append(tmp)
        return data
コード例 #7
0
 def __init__(self, file_path):
     self.file_path = file_path
     self.sql = Sql(self.DATABASES)
コード例 #8
0
ファイル: sync_sql.py プロジェクト: Zzaniu/HGSpiders
class SyncSqlGold(object):
    """从线上数据库同步gold_8.1数据库"""

    def __init__(self):
        self.sql_gold = Sql(settings.DATABASES_GOLD_8_1)
        self.__sql_online = Sql(settings.DATABASES)  # 双下划綫开头,表示为私有对象,只可被类内访问,不可继承

    def query_online(self, sql, *args):
        return self.__sql_online.raw_sql(sql, *args)

    def run_sync(self):
        self.sync_npts_db()
        self.sync_nems_db()
        self.sync_bwl_db()
        self.sync_special_bwl_db()

    def sync_npts_db(self):
        self.sync_npts_head()
        self.sync_npts_img()
        self.sync_npts_exg()
        self.sync_npts_cm()

    def sync_nems_db(self):
        self.sync_nems_head()
        self.sync_nems_img()
        self.sync_nems_exg()
        self.sync_nems_cm()

    def sync_bwl_db(self):
        self.sync_bwl_head()
        self.sync_bwl_list()

    def sync_special_bwl_db(self):
        self.sync_special_bwl_head()
        self.sync_special_bwl_list()

    def get_distinct_seqno(self, sql, table_name):
        """获取表中所有不重复的SEQNO"""
        ret = []
        sql_str = "SELECT DISTINCT SEQNO from {}".format(table_name)
        results = sql(sql_str)
        if results.get('status'):
            for seqno in results.get('ret_tuples'):
                ret.append(seqno[0])  # 列表的方式就是数据大了这个方式就行不通了,会太耗内存
        return ret

    def take_the_difference_set(self, list_online, list_gold):
        """取线上与GOLD的差集"""
        return list(set(list_online)-set(list_gold))

    def sync_tatm_seq(self, miss_seqno_list, table_name):
        for seqno in miss_seqno_list:
            rets = self.__sql_online.select(table_name, where={'SEQNO': seqno})
            for ret in rets:
                ret.pop('id')
                self.sql_gold.insert(table_name, **ret)

    def get_miss_seqno_list(self, table_name):
        seq_online_list = self.get_distinct_seqno(self.query_online, table_name)
        seq_gold_list = self.get_distinct_seqno(self.sql_gold.raw_sql, table_name)
        return self.take_the_difference_set(seq_online_list, seq_gold_list)

    def sync_npts_head(self):
        miss_seqno_list = self.get_miss_seqno_list('NptsEmlHead')
        self.sync_tatm_seq(miss_seqno_list, 'NptsEmlHead')

    def sync_npts_img(self):
        miss_seqno_list = self.get_miss_seqno_list('NptsEmlImgType')
        self.sync_tatm_seq(miss_seqno_list, 'NptsEmlImgType')
        self.sync_update_tatm_list('NptsEmlImgType', 'GDSSEQNO')

    def sync_npts_exg(self):
        miss_seqno_list = self.get_miss_seqno_list('NptsEmlExgType')
        self.sync_tatm_seq(miss_seqno_list, 'NptsEmlExgType')
        self.sync_update_tatm_list('NptsEmlExgType', 'GDSSEQNO')

    def sync_npts_cm(self):
        miss_seqno_list = self.get_miss_seqno_list('NptsEmlConsumeType')
        self.sync_tatm_seq(miss_seqno_list, 'NptsEmlConsumeType')
        self.sync_update_tatm_list('NptsEmlConsumeType', 'GSEQNO')

    def sync_nems_head(self):
        miss_seqno_list = self.get_miss_seqno_list('NemsHead')
        self.sync_tatm_seq(miss_seqno_list, 'NemsHead')

    def sync_nems_img(self):
        miss_seqno_list = self.get_miss_seqno_list('NemsImgList')
        self.sync_tatm_seq(miss_seqno_list, 'NemsImgList')
        self.sync_update_tatm_list('NemsImgList', 'GDSSEQNO')

    def sync_nems_exg(self):
        miss_seqno_list = self.get_miss_seqno_list('NemsExgList')
        self.sync_tatm_seq(miss_seqno_list, 'NemsExgList')
        self.sync_update_tatm_list('NemsExgList', 'GDSSEQNO')

    def sync_nems_cm(self):
        miss_seqno_list = self.get_miss_seqno_list('NemsCmList')
        self.sync_tatm_seq(miss_seqno_list, 'NemsCmList')
        self.sync_update_tatm_list('NemsCmList', 'GSEQNO')

    def sync_bwl_seqno(self, miss_seqno_list, table_name):
        for seqno in miss_seqno_list:
            rets = self.__sql_online.select(table_name, where={'SeqNo': seqno})
            for ret in rets:
                ret.pop('Id')
                self.sql_gold.insert(table_name, **ret)

    def get_bwl_miss_seqno_list(self, table_name):
        seq_online_list = self.get_distinct_seqno(self.query_online, table_name)
        seq_gold_list = self.get_distinct_seqno(self.sql_gold.raw_sql, table_name)
        return self.take_the_difference_set(seq_online_list, seq_gold_list)

    def sync_bwl_head(self):
        miss_seqno_list = self.get_bwl_miss_seqno_list('BwlHeadType')
        self.sync_bwl_seqno(miss_seqno_list, 'SpecialBwlHeadType')

    def sync_special_bwl_head(self):
        miss_seqno_list = self.get_bwl_miss_seqno_list('SpecialBwlHeadType')
        self.sync_bwl_seqno(miss_seqno_list, 'BwlHeadType')

    def sync_update_2_max(self, table_name, field, seqno):
        local_max_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.sql_gold, table_name, field, seqno)
        online_max_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.__sql_online, table_name, field, seqno)
        if local_max_gdsseqno < online_max_gdsseqno:
            rets = self.__sql_online.select(table_name, where={'SeqNo': seqno, '{}__gt'.format(field): local_max_gdsseqno})
            for ret in rets:
                if ret.__contains__('Id'):
                    ret.pop('Id')
                elif ret.__contains__('id'):
                    ret.pop('id')
                else:
                    pass
                self.sql_gold.insert(table_name, **ret)

    def sync_update_2_min(self, table_name, field, seqno):
        local_min_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.sql_gold, table_name, field, seqno, max=False)
        online_min_gdsseqno = self.get_local_db_max_or_min_gdsseqno(self.__sql_online, table_name, field, seqno, max=False)
        if local_min_gdsseqno > online_min_gdsseqno:
            rets = self.__sql_online.select(table_name, where={'SeqNo': seqno, '{}__lt'.format(field): local_min_gdsseqno})
            for ret in rets:
                if ret.__contains__('Id'):
                    ret.pop('Id')
                elif ret.__contains__('id'):
                    ret.pop('id')
                else:
                    pass
                self.sql_gold.insert(table_name, **ret)\

    @error_2_send_email
    def sync_update_tatm_list(self, table_name, field):
        assert not self.get_miss_seqno_list(table_name), '数据库{}的SeqNo同步未完成'.format(table_name)
        for seqno in self.get_distinct_seqno(self.query_online, table_name):
            self.sync_update_2_max(table_name, field, seqno)
            self.sync_update_2_min(table_name, field, seqno)

    @error_2_send_email
    def sync_update_bwl_list(self, table_name, field):
        assert not self.get_bwl_miss_seqno_list(table_name), '数据库{}的SeqNo同步未完成'.format(table_name)
        for seqno in self.get_distinct_seqno(self.query_online, table_name):
            self.sync_update_2_max(table_name, field, seqno)
            self.sync_update_2_min(table_name, field, seqno)

    def sync_bwl_list(self):
        miss_seqno_list = self.get_bwl_miss_seqno_list('BwlListType')
        self.sync_bwl_seqno(miss_seqno_list, 'BwlListType')
        self.sync_update_bwl_list('BwlListType', 'GdsSeqno')

    def sync_special_bwl_list(self):
        miss_seqno_list = self.get_bwl_miss_seqno_list('SpecialBwlListType')
        self.sync_bwl_seqno(miss_seqno_list, 'SpecialBwlListType')
        self.sync_update_bwl_list('SpecialBwlListType', 'GdsSeqno')

    def get_local_db_max_or_min_gdsseqno(self, sql, tabname, field, seqNo, max=True):
        if max:
            _sql = 'SELECT max({}) as gdsSeqno FROM {} WHERE SEQNO = '.format(field, tabname) + "%s"
        else:
            _sql = 'SELECT min({}) as gdsSeqno FROM {} WHERE SEQNO = '.format(field, tabname) + "%s"
        ret = sql.raw_sql(_sql, seqNo)
        if ret.get('status'):
            gdsSeqno = ret['ret_tuples'][0][0]
            return gdsSeqno

    def sync_spec_bwl_list(self):
        """此函數無用,是因為爬錯表,所以用來更新另外一張表的"""
        seqno_list = ['201800000000030528', '201800000000030527', '201800000000030485', '201800000000005604']
        for seqno in seqno_list:
            ret_list = self.__sql_online.select('SpecialBwlHeadType', '*', where={'SeqNo': seqno})
            for ret in ret_list:
                self.sql_gold.insert('SpecialBwlHeadType', **ret)
        for seqno in seqno_list:
            ret_list = self.__sql_online.select('SpecialBwlListType', '*', where={'SeqNo': seqno})
            for ret in ret_list:
                ret.pop('Id')
                self.sql_gold.insert('SpecialBwlListType', **ret)

    def update_spec_bwl_list(self):
        """此函數無用,更新BwlList2Head字段"""
        seqno_list = ['201800000000030528', '201800000000030527', '201800000000030485', '201800000000005604']
        for seqno in seqno_list:
            id = self.sql_gold.select('SpecialBwlHeadType', 'Id', where={'SeqNo': seqno})
            print('id = ', id)
            if id:
                self.sql_gold.update('SpecialBwlListType', where={'SeqNo': seqno}, BwlList2Head=id[0].get('Id'))

    def update_bwl_list2head(self):
        self.__sql_online.update('BwlListType', no_where=True, BwlList2Head=2)
コード例 #9
0
ファイル: sync_sql.py プロジェクト: Zzaniu/HGSpiders
 def __init__(self):
     self.sql_gold = Sql(settings.DATABASES_GOLD_8_1)
     self.__sql_online = Sql(settings.DATABASES)  # 双下划綫开头,表示为私有对象,只可被类内访问,不可继承
コード例 #10
0
class AutoWin(BaseCls):
    """自动录入数据"""
    DATABASES = {
        'host': 'gz-cdb-ld4ka6l5.sql.tencentcdb.com',
        'port': 63482,
        'user': '******',
        'password': '******',
        'db': 'GMBGTEO',
        'charset': 'utf8',
    }
    CLASS_NAME = ['WindowsForms10.Window.8.app.0.201d787_r14_ad1', 'WindowsForms10.Window.8.app.0.21093c0_r15_ad1',
                  'WindowsForms10.Window.8.app.0.33ec00f_r14_ad1', 'WindowsForms10.Window.8.app.0.134c08f_r15_ad1',
                  'WindowsForms10.Window.8.app.0.201d787_r9_ad1']

    def __init__(self):
        self.resister_singal()
        self.sql = Sql(self.DATABASES)
        self.errors = []
        self.rate_0 = []
        self.file_path = os.path.join(GetExcelData.BASE_PATH, '退税记录.xls')
        # self.data = GetExcelData().parse_excel()
        self.data = GetExcelData().new_parse_excel()
        self.init_app()

    def handle(self, signalnum, *args, **kwargs):
        if signalnum == signal.SIGINT:
            self.print_error("  --- 检测到 Ctrl + C, 将暂停程序 ---")
        # self.print_error('  --- 程序退出 ---')
        # raise SystemExit()
        os.system("pause")
        self.dlg.click_input()

    def init_app(self, index=0):
        """实例化app"""
        try:
            app = application.Application().connect(title_re='外贸企业出口退税申报系统2.1版',
                                                    class_name=self.CLASS_NAME[index])  # 实例化
            dlg = app['外贸企业出口退税申报系统2.1版']
            # print('dlg = ', dir(dlg))
            # print('1 = ', dlg.PrintControlIdentifiers)
            # print('2 = ', dlg.print_control_identifiers())
            setattr(self, 'app', app)
            setattr(self, 'dlg', dlg)
            setattr(self, 'class_name', self.CLASS_NAME[index])
        except:
            if index >= len(self.CLASS_NAME):
                self.print_error('  ----- 请打开退税系统 !!! -----')
                index = -1
                self.healper()
            index += 1
            self.init_app(index)

    @staticmethod
    def get_total(datas):
        total = 0
        for data in datas:
            total += int(data.get('数量'))
        return total

    def auto_operate(self, datas):
        for data in datas:
            try:  # TODO 程序出错处理,比如点击提示框或者点击确认框
                self.dlg.click_input()
                # os.system("pause")
                # self.give_add()
                self.dlg.Edit0.set_text(data.get('关联号'))
                self.dlg.Edit4.set_text(data.get('申报年月'))
                self.dlg.Edit5.set_text(data.get('申报批次'))
                self.dlg.Edit6.set_text(data.get('序号'))
                self.dlg.Edit9.set_text(data.get('进货凭证号'))
                self.dlg.Edit11.set_text(data.get('供货方纳税号'))
                self.dlg.Edit12.set_text(str(data.get('开票日期')))
                self.dlg.Edit13.set_text(data.get('商品编码')[:8])
                self.dlg.Edit13.type_keys('~')  # 商品代码按回车
                unit = self.dlg.Edit15.texts()[0]  # 单位
                assert unit, '单位不可为空'
                rate = self.dlg.Edit20.texts()[0]  # 退税率
                assert self.is_float_or_number(rate), '退税率不可为空'
                if self.is_rate_0(rate, data):
                    continue
                self.dlg.Edit16.set_text(self.deal_unit(unit, datas, data))  # 数量
                # self.dlg.Edit16.set_text(50)  # 数量
                self.dlg.Edit17.set_text(data.get('调整后人民币不含税总价'))  # 计税金额
                self.dlg.Edit17.type_keys('~')  # 计税金额按回车
                self.dlg.Edit25.type_keys('~')  # 备注按回车
                time.sleep(1)
                self.click_sys_prompt('保存')
                time.sleep(1)
            except Exception as e:
                log.error(traceback.format_exc())
                self.click_sys_ask('确认')
                self.click_sys_prompt('确认')
                self.give_up()  # 放弃
                self.print_error('->'.join((str(e), str(data))))
                log.error('->'.join((str(e), str(data))))
                self.errors.append(data)
                time.sleep(1)

    def give_add(self):
        """增加"""
        print("self.dlg['增加'] = ", self.dlg['增加'])
        if self.dlg['放弃'].is_enabled():
            self.give_up()
        self.click_button('增加')

    def give_up(self):
        """放弃"""
        self.click_button('放弃')
        self.click_sys_ask('确认')

    def is_rate_0(self, rate, data):
        """处理退税率为0的情况"""
        if rate and int(rate.split('.')[0]) == 0:
            self.rate_0.append(data)
            return True
        return False

    def deal_unit(self, unit, datas, data):
        """处理单位问题"""
        if pd.isnull(data.get('报关单号')):
            return data.get('数量')
        declist_keys = ['CodeTs', 'GName', 'GUnit', 'FirstUnit', 'GQty', 'FirstQty', 'DeclPrice']
        msgs = {'DecMsg': 'DecList', 'SpecialDecmsg': 'SpecialDeclist', 'SpiderDecMsg': 'SpiderDeclist',
                'HisDecMsg': 'HisDeclist'}
        for msg in msgs:
            ret = self.sql.select(msg, 'DecId', where={'QpEntryId': data.get('报关单号'), 'DeleteFlag': 0}, first=True)
            if ret:
                dec_lists = self.sql.select(msgs.get(msg), *declist_keys, where={'DecId': ret.get('DecId'), 'DeleteFlag': 0})
                log.debug('报关单号:{0!r}, msg: {1!r}, list: {2!r}'.format(data.get('报关单号'), msg, msgs.get(msg)))
                break
        else:
            raise Exception('报关单号{0!r}不存在,请检查'.format(data.get('报关单号')))
        total = self.get_total(datas)  # 应该是总量要相等
        declist = self.confirm_declist(data, dec_lists)
        assert declist, '未找到对应表体记录,关联号:{0!r},商品编码:{1!r},商品名称:{2!r},数量:{3!r},报关单号:{4!r}'.format(
                                        data['关联号'], data['商品编码'], data['商品名称'], data['数量'], data['报关单号'])
        _map = {
            self.unit_code_2_name(declist['GUnit']): declist['GQty'],
            self.unit_code_2_name(declist['FirstUnit']): declist['FirstQty'],
        }
        log.debug('_map.get(unit) = {}'.format(_map.get(unit)))
        log.debug("Decimal(data['数量']).quantize(Decimal('0.0000')) = {}".format(Decimal(data['数量']).quantize(Decimal('0.0000'))))
        log.debug("declist['GQty'] = {}".format(declist['GQty']))
        return str(_map.get(unit)*Decimal(data['数量']).quantize(Decimal('0.0000'))/declist['GQty'])  # 报关单数量(个/千克)*模板数量(个)/申报数量(个)

    def unit_code_2_name(self, unit):
        """单位code转名称"""
        unit_name = self.sql.select('UnitCode', 'Name', where={'Code': unit}, first=True)
        assert unit_name, '转换单位失败,在UnitCode表中不存在Code等于{0!r}的记录'.format(unit)
        return unit_name.get('Name')

    def confirm_declist(self, data, declists):
        """确认表体"""
        for declist in declists:
            if declist['CodeTs'] == data['商品编码'] and declist['GName'] == data['商品名称'] and (
                    declist['GQty'] == Decimal(data['数量']).quantize(Decimal('0.0000'))
                        or declist['FirstQty'] == Decimal(data['数量']).quantize(Decimal('0.0000'))):  # 商品编码+品名确定报关单表体
                return declist

    def write_error_and_rate0_2_file(self):
        """将错误和rate为0的记录下来并写入excel"""
        log.debug('发生错误没录入的记录 = {}'.format(self.errors))
        log.debug('退税率为零的记录 = {}'.format(self.rate_0))
        with pd.ExcelWriter(self.file_path) as writer:
            if self.errors:
                pd.DataFrame(self.errors).to_excel(writer, sheet_name='错误记录', index=None)  # 写入报错的记录
            if self.rate_0:
                pd.DataFrame(self.rate_0).to_excel(writer, sheet_name='退税率为0的记录', index=None)  # 写入退税率为0的记录

    def click_sys_ask(self, value):
        """系统询问点确认"""
        try:
            self.app.window(title_re='系统询问', class_name=self.class_name)[value].click()  # 确认
        except:
            pass

    def click_sys_prompt(self, value):
        """系统提示点确认"""
        try:
            self.app.window(title_re='系统提示', class_name=self.class_name)[value].click()  # 确认
        except:
            pass

    def click_button(self, button_name):
        if self.dlg[button_name].is_enabled():
            self.dlg[button_name].click()
            self.dlg[button_name].wait('visible')

    def is_float_or_number(self, v):
        regex = re.compile(r'(^-?\d+?.\d+?$)|(^-?\d+$)')
        if re.match(regex, v):
            return True
        return False

    def retry_errors(self, retry=0):
        if self.errors:
            retry_errors = self.errors[:]
            self.errors.clear()
            self.auto_operate(retry_errors)
        else: return
        if retry > 0:
            self.retry_errors(retry-1)

    def run(self):
        for data in self.data:
            if not isinstance(data, list): data = [data]
            self.auto_operate(data)
        self.retry_errors(RETRY_ERROR_COUNT)
        self.write_error_and_rate0_2_file()
        print('  谢谢您的使用,祝您生活愉快,再见')
        print('  程序执行完毕')
        time.sleep(1)
コード例 #11
0
ファイル: generate_excel.py プロジェクト: Zzaniu/nothing
class GenerateExcel(object):
    SPIDER_MAP = {
        "机保": NemsSpider,
        "前海": SpecialNemsSpider,
    }
    FIELDS_MAP = {
        'bondInvtNo': '清单编号',
        'listStatName': '数据状态',
        'vrfdedMarkcdName': '核扣标志',
        'inputTime': '录入时间',
        'invtIochkptStucdName': '清单进出卡口状态',
        'rltEntryBizopEtpsNm': '关联报关单境内收发货人名称',
        'gdsSeqno': '备案序号/自动备案序号',
        'gdsMtno': '商品料号',
        'gdecd': '商品编码',
        'gdsNm': '商品名称',
        'dclCurrcdName': '币制',
        'dclUnitcdName': '申报计量单位',
        'lawfUnitcdName': '法定计量单位',
        'dclQty': '申报数量',
        'lawfQty': '法定数量',
        'dclTotalAmt': '企业申报总价',
        'putrecNo': '手账册编号',
    }
    GENERATE_FILE_DIR = r"d:\周晓琳"

    def __init__(self, file_path, name):
        self.spider = self.SPIDER_MAP.get(name)()
        self.sql = Sql(settings.DATABASES_GOLD_8_3)
        self.timer = TimeHelper()
        self.file_path = file_path
        os.makedirs(self.GENERATE_FILE_DIR, exist_ok=True)
        self.generate_file_path = os.path.join(self.GENERATE_FILE_DIR, name+self.timer.get_str_now_time) + '.xlsx'
        self.error = []

    def get_eid_ieflag(self, bondInvtNo):
        return bondInvtNo, bondInvtNo[8:9]

    def get_bondInvtNos(self):
        data = pd.read_excel(self.file_path, skiprows=0, sheet_name='核注清单', converters={'海关编号': str})
        data = data.drop_duplicates()  # 列去重,只有一列,所以不需要制定那一列了
        print('共有数据{}条'.format(data.ix[:, 0].size))
        setattr(self, 'datas', data.loc[:, ['海关编号']])
        # print('self.datas = ', self.datas, ', type = ', type(self.datas))
        # for index, _data in enumerate(data.ix[:, 0]):
        #     yield _data
        #     print('已处理{0}条数据, sqeno = {1}'.format(index+1, _data))

    # def generate_excel(self, datas):
    #     f = pd.concat(datas, ignore_index=True)  # 将DataFrame纵向连接起来
    #     f.to_excel('D://单一窗口核注清单.xlsx', index_label='序号')

    def get_data_2_sql(self):
        """从单一窗口拿数据"""
        self.get_bondInvtNos()
        for index, bondInvtNo in enumerate(self.datas.ix[:, 0]):
            try:
                with aniu_atomic(self.sql):
                    if len(str(bondInvtNo)) < 18 or self.sql.select(
                            'nems_info_20190313', 'id', where={'bondInvtNo': bondInvtNo}, first=True):
                        print('bondInvtNo = ', bondInvtNo, len(str(bondInvtNo)), ', 数据库可能已存在此数据')
                        continue
                    ret = self.spider._get_search_post_info('', *self.get_eid_ieflag(bondInvtNo))
                    seqno = ret.get('seqNo')
                    listStatName = ret.get('listStatName')
                    regex = re.compile(r'(删除)|(退单)|(暂存)')
                    if re.search(regex, listStatName): raise Exception('状态为{}的单不处理,直接跳过')
                    vrfdedMarkcdName = ret.get('vrfdedMarkcdName')
                    head, lists = self.spider.get_head_and_list_detail(seqno)
                    inputTime = head.get('inputTime')
                    invtIochkptStucdName = head.get('invtIochkptStucdName')
                    rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm')
                    putrecNo = head.get('putrecNo')
                    for list_info in lists:
                        gdsSeqno = list_info.get('putrecSeqno') or list_info.get('param3') or ''
                        gdsMtno = list_info.get('gdsMtno')
                        gdecd = list_info.get('gdecd')
                        gdsNm = list_info.get('gdsNm')
                        dclCurrcdName = list_info.get('dclCurrcdName')
                        dclUnitcdName = list_info.get('dclUnitcdName')
                        lawfUnitcdName = list_info.get('lawfUnitcdName')
                        dclQty = list_info.get('dclQty')
                        lawfQty = list_info.get('lawfQty')
                        dclTotalAmt = list_info.get('dclTotalAmt')

                        _dict = {
                            'bondInvtNo': bondInvtNo,
                            'listStatName': listStatName,
                            'vrfdedMarkcdName': vrfdedMarkcdName,
                            'inputTime': inputTime,
                            'invtIochkptStucdName': invtIochkptStucdName,
                            'rltEntryBizopEtpsNm': rltEntryBizopEtpsNm,
                            'gdsSeqno': gdsSeqno,
                            'gdsMtno': gdsMtno,
                            'gdecd': gdecd,
                            'gdsNm': gdsNm,
                            'dclCurrcdName': dclCurrcdName,
                            'dclUnitcdName': dclUnitcdName,
                            'lawfUnitcdName': lawfUnitcdName,
                            'dclQty': dclQty,
                            'lawfQty': lawfQty,
                            'dclTotalAmt': dclTotalAmt,
                            'putrecNo': putrecNo,
                        }
                        self.sql.insert('nems_info_20190313', **_dict)
                    print('第{0}条数据{1}插入成功'.format(index+1, bondInvtNo))
            except Exception as e:
                if str(e).find('直接跳过') > -1:
                    print(bondInvtNo, str(e))
                    continue
                print(traceback.print_exc())
                self.error.append(bondInvtNo)
        if self.error: print('错误记录: {}'.format(','.join(self.error)))
        print('数据已全部插入')

    def get_data(self):
        """帮晓林哥统计核注清单清单编号对应的备案序号/自动备案序号对应申报数量"""
        for bondInvtNo in self.get_bondInvtNos():

                _dict = {'清单编号': [], '备案序号/自动备案序号': [], '申报数量': [], '数据状态': [], '核扣标志': [], '录入时间': [],
                         '清单进出卡口状态': [], '关联报关单境内收发货人名称': [], '商品料号': [], '商品编码': [], '商品名称': [],
                         '币制': [], '申报计量单位': [], '法定计量单位': [], '法定数量': [], '企业申报总价': [],}
                ret = self.spider._get_search_post_info('', *self.get_eid_ieflag(bondInvtNo))
                seqno = ret.get('seqNo')
                listStatName = ret.get('listStatName')
                vrfdedMarkcdName = ret.get('vrfdedMarkcdName')
                head, lists = self.spider.get_head_and_list_detail(seqno)
                inputTime = head.get('inputTime')
                invtIochkptStucdName = head.get('invtIochkptStucdName')
                rltEntryBizopEtpsNm = head.get('rltEntryBizopEtpsNm')
                for list_info in lists:
                    gdsSeqno = list_info.get('putrecSeqno') or list_info.get('param3') or ''
                    gdsMtno = list_info.get('gdsMtno')
                    gdecd = list_info.get('gdecd')
                    gdsNm = list_info.get('gdsNm')
                    dclCurrcdName = list_info.get('dclCurrcdName')
                    dclUnitcdName = list_info.get('dclUnitcdName')
                    lawfUnitcdName = list_info.get('lawfUnitcdName')
                    dclQty = list_info.get('dclQty')
                    lawfQty = list_info.get('lawfQty')
                    dclTotalAmt = list_info.get('dclTotalAmt')
                    _dict['清单编号'].append(bondInvtNo)
                    _dict['数据状态'].append(listStatName)
                    _dict['核扣标志'].append(vrfdedMarkcdName)
                    _dict['录入时间'].append(inputTime)
                    _dict['清单进出卡口状态'].append(invtIochkptStucdName)
                    _dict['关联报关单境内收发货人名称'].append(rltEntryBizopEtpsNm)
                    _dict['备案序号/自动备案序号'].append(gdsSeqno)
                    _dict['商品料号'].append(gdsMtno)
                    _dict['商品编码'].append(gdecd)
                    _dict['商品名称'].append(gdsNm)
                    _dict['币制'].append(dclCurrcdName)
                    _dict['申报计量单位'].append(dclUnitcdName)
                    _dict['法定计量单位'].append(lawfUnitcdName)
                    _dict['申报数量'].append(dclQty)
                    _dict['法定数量'].append(lawfQty)
                    _dict['企业申报总价'].append(dclTotalAmt)
                f = pd.DataFrame(_dict)
                yield f

    def generate_excel(self):
        conn = pymysql.connect(**settings.DATABASES_GOLD_8_3)
        sql = 'select * from nems_info_20190313'
        data = pd.read_sql(sql, con=conn)
        data.drop('id', axis=1, inplace=True)
        data.columns = list(map(lambda x: self.FIELDS_MAP.get(x), data.columns))
        print('self.generate_file_path = ', self.generate_file_path)
        with pd.ExcelWriter(self.generate_file_path) as writer:
            if hasattr(self, 'datas') and not self.datas.empty:
                self.datas.to_excel(writer, sheet_name='原始数据', index=None)
            if not data.empty:
                data.to_excel(writer, sheet_name='数据统计', index=None)
            print('self.error = ', self.error)
            if self.error:
                pd.DataFrame({'海关编号': self.error}).to_excel(writer, sheet_name='错误记录', index=None)  # 写入报错的记录

    @used_time
    def run(self):
        self.sql.delete('nems_info_20190313', where={'id__gt': 0})
        self.get_data_2_sql()
        self.generate_excel()