def database_input(self):
        self.name_mapping = data.name_mapping
        # 从标题提取name
        if not self.self_name:
            for name in self.name_mapping:
                if name in self.title:
                    self.self_name = name

        if self.self_name in self.name_mapping:
            comp_id = self.name_mapping[self.self_name]
        elif not self.self_name:
            comp_id = 'temp'
        else:
            comp_id = self.self_name
        # print(comp_id)
        # clear_company_file(self.output_path, name_mapping[self.self_name])
        mongo.delete_datas({'path': self.output_path}, comp_id, 'mapping')
        info = {
            'type': 'form',
            'path': self.output_path,
            'company_name': self.self_name,
            'dates': [self.start_date, self.end_date],
            'account': self.self_account,
            'currency': self.currency,
            'gen_date': self.gen_date,
            'transactions_num': self.target_df.shape[1]
        }
        self.transaction_num = self.target_df.shape[1]
        info['transctions_num'] = self.transaction_num

        mongo.insert_data(info, comp_id, 'mapping')
Beispiel #2
0
    def update_rule(
            self,
            query):  # query should be in the form of {'target': 'option'}
        for key, selected in query.items():
            # selected = query[key]
            # 1.更新option_unmatched
            # if selected not in self.option_unmatched:
            if selected not in self.option_list:
                print('错误!不存在此选项')
                return False

            # 2. 分情况更新target_unmatched和user_rule
            if key in self.target_unmatched:  # 还没被match的
                self.target_unmatched.remove(key)
            self.user_rules[key] = selected
            # mongo.delete_col('user_rule', 'mapping')  # 每次删掉原有collection
            mongo.delete_datas({'name': self.user_name}, 'user_rule',
                               'mapping')
            mongo.insert_data(self.user_rules, 'user_rule', 'mapping')

            # 3. 更新reversed_mapping 为之后生成excel作准备
            self.reversed_mapping[key] = selected
            # if selected != 'none':  # none 不去掉,因为还可能被选择
            #     self.option_unmatched.remove(selected)
        return True
def add_rules(request, company, rule_name):
    query = {'company': company, 'rule_name': rule_name}
    try:
        user_rules = mongo.show_datas('user_rule', query, 'Mapping')[0]
        # user_rules.update(request)
        # mongo.update_datas({'company':company, 'rule_name': rule_name}, {'$set': user_rules}, 'user_rule', 'Mapping')
        mongo.delete_datas(query, 'user_rule', 'Mapping')
    except:
        user_rules = query
        # print('no user rules yet.')
    user_rules.update(request)
    mongo.insert_data(user_rules, 'user_rule', 'Mapping')
    print(user_rules)
    return 'success update ' + str(request)
def main_mg(company, batch_id):
    in_map, out_map = get_rules(rulePath)
    try:
        del in_map['nan']
        del out_map['']
        del out_map['nan']
    except Exception as e:
        print(e)
    datas = mg.show_datas('mapped_df', query={'company': company, 'batch_id': batch_id}, db='Cache')
    for data in datas:
        cur_df = pd.read_json(data['data'])
        labeled_df = process_file(cur_df, '', in_map, out_map, show_plot=False, write_excel=False)
        df_json = labeled_df.to_json(orient='columns', force_ascii=False)
        data['data'] = df_json
        mg.delete_datas({'batch_id': batch_id, 'file': data['file'], 'table': data['table']}, 'mapped_df', 'Cache')
        mg.insert_data(data, 'mapped_df', 'Cache')
def add_rules(query, user):
    user_rules = {}
    try:
        user_rules = mongo.show_datas('user_rule', {
            'type': 'user_rule',
            'name': user
        }, 'mapping')[0]
    except:
        user_rules["type"] = "user_rules"
        user_rules['name'] = user
        print('no user rules yet.')
    user_rules.update(query)
    mongo.delete_datas({'name': user}, 'user_rule',
                       'mapping')  # 每次删掉原有collection
    mongo.insert_data(user_rules, 'user_rule', 'mapping')
    return 'success'
def add_stats(query, path):
    necc_info = {}
    try:
        necc_info = mongo.show_datas('necessary', {
            'type': 'necessary',
            'path': path
        }, 'mapping')[0]
    except:
        necc_info["type"] = "necessary"
        necc_info['path'] = path
    necc_info.update(query)
    mongo.delete_datas({
        'type': 'necessary',
        'path': path
    }, 'necessary', 'mapping')  # 每次删掉原有collection
    mongo.insert_data(necc_info, 'necessary', 'mapping')
    return 'success'
def add_stats(request, company, file, table, batch_id):
    query = {
        'company': company,
        'file': file,
        'table': table,
        'batch_id': batch_id
    }
    #   print(request)
    #   print(type(request))
    try:
        necc_info = mongo.show_datas('sheet_info', query, 'Info')[0]
        mongo.delete_datas(query, 'sheet_info', 'Info')
    except:
        necc_info = query
    necc_info.update(request)
    mongo.insert_data(necc_info, 'sheet_info', 'Info')
    print(necc_info)
    return 'success update ' + str(request)
Beispiel #8
0
def add_stats(query, path):
    necc_info = {}
    try:
        necc_info = mongo.show_datas('necessary', {
            'type': 'necessary',
            'path': path
        }, 'mapping')[0]
    except:
        necc_info["type"] = "necessary"
        necc_info['path'] = path
        # print('no user rules yet.')
    necc_info.update(query)
    # for key, val in query.items():
    #     user_rules[key] = val
    mongo.delete_datas({
        'type': 'necessary',
        'path': path
    }, 'necessary', 'mapping')  # 每次删掉原有collection
    mongo.insert_data(necc_info, 'necessary', 'mapping')
    return 'success'
Beispiel #9
0
    def database_input(self):
        self.name_mapping = {  # 之后可以考虑用头四个字转拼音来生成collection名字
            '上海爱钛技术咨询有限公司': 'aitai',
            '宜昌华昊新材料科技有限公司': 'huahao',
            '浙江亿控自动化设备有限公司': 'yikong',
            '爱钛': 'aitai',
            '华昊': 'huahao',
            '亿控': 'yikong',
            '同普': 'tongpu',
        }
        # 从标题提取name
        if not self.self_name:
            for name in self.name_mapping:
                if name in self.title:
                    self.self_name = name

        if self.self_name in self.name_mapping:
            comp_id = self.name_mapping[self.self_name]
        elif not self.self_name:
            comp_id = 'temp'
        else:
            comp_id = self.self_name
        print(comp_id)
        # clear_company_file(self.output_path, name_mapping[self.self_name])
        mongo.delete_datas({'path': self.output_path}, comp_id, 'mapping')
        print(self.raw_df)
        print(self.target_df)
        info = {
            'type': 'form',
            'path': self.output_path,
            'company_name': self.self_name,
            'dates': [self.start_date, self.end_date],
            'account': self.self_account,
            'currency': self.currency,
            'gen_date': self.gen_date,
            'transactions_num': self.target_df.shape[1]
        }
        # self.transaction_num = self.target_df.shape[1]
        # info['transctions_num'] = self.transaction_num

        mongo.insert_data(info, comp_id, 'mapping')
 def save_df(self):
     df_json = self.generated_df.to_json(orient='columns',
                                         force_ascii=False)
     df_data = {
         'company': self.company,
         'file': self.title,
         'table': self.table,
         'batch_id': self.batch_id,
         'data': df_json
     }
     query = {
         'company': self.company,
         'file': self.title,
         'table': self.table,
         'batch_id': self.batch_id
     }
     if mongo.show_datas('mapped_df', query, 'Cache'):
         # mongo.update_datas(query, {'$set': df_data}, 'mapped_df', 'Cache')
         mongo.delete_datas(query, 'mapped_df', 'Cache')
         mongo.insert_data(df_data, 'mapped_df', 'Cache')
     else:
         mongo.insert_data(df_data, 'mapped_df', 'Cache')
         print('batch_id is ', self.batch_id)
Beispiel #11
0
 def clear_user_rule(self):
     mongo.delete_datas({'name': self.user_name}, 'user_rule', 'mapping')
Beispiel #12
0
    def mapping(self):
        # get base rule and rule summary from mongodb
        self.base_rules_summary = mongo.show_datas('base_rule',
                                                   {'type': 'rule_summary'},
                                                   'mapping')[0]
        self.base_rules = mongo.show_datas('base_rule', {'type': 'base_rule'},
                                           'mapping')[0]
        try:
            self.user_rules = mongo.show_datas('user_rule', {
                'type': 'user_rule',
                'name': self.user_name
            }, 'mapping')[0]
            # self.base_rules.update(self.user_rules)         # 合并user_rules 进base_rule!
        except:
            self.user_rules["type"] = "user_rule"
            self.user_rules['name'] = self.user_name
            # print('no user rules yet.')
        try:
            self.necessary_info = mongo.show_datas('necessary', {
                'type': 'necessary',
                'path': self.output_path
            }, 'mapping')[0]
        except:
            self.necessary_info = {
                'type': 'necessary',
                'path': self.output_path,
            }
        self.target_unmatched = self.base_rules_summary['target_headers'].copy(
        )  # 需要.copy,防止总的headers list被修改
        self.necessary_unmatched = self.necessary_items.copy()
        # self.option_unmatched = list(self.option_list).copy()
        # self.option_unmatched.append('none')        # 用作空选项
        self.option_list.append('none')
        for key in self.option_list:
            if key in self.base_rules:  # 如果在baserule里已找到匹配项
                val = self.base_rules[key]
                # self.matched_mapping[item] = self.base_rules[item]
                self.reversed_mapping[val] = key
                self.target_unmatched.remove(val)
                # self.option_unmatched.remove(item)            # 可多选?去不去掉呢??
        # 去掉input excel中随录信息包含值
        if self.self_name:
            self.target_unmatched.remove('本方名称')
            # self.necessary_unmatched.remove('self_name')
            # necessary['self_name'] = self.self_name
        if self.self_account:
            self.target_unmatched.remove('本方账号')
            # self.necessary_unmatched.remove('self_account')
            # necessary['self_account'] = self.self_account

        # 三步,库数据更新表,表数据更新库,找到空项
        # TODO 库里的necc,把表数据更新
        for key, val in self.necessary_info.items():
            if key not in ['type', 'path', '_id'] and val:
                exec('self.{} = "{}"'.format(key, val))
                self.necessary_unmatched.remove(key)

        # TODO 表数据更新库。去除表里包含的necessary
        # neccs = [self.self_name, self.self_account, self.self_bank, self.currency, self.start_date, self.end_date]
        # for i in range(len(neccs)):
        #     if neccs[i]:
        #         self.necessary_unmatched.remove(self.necessary_items[i])
        #         self.necessary_info[self.necessary_items[i]] = neccs[i]

        for i in self.necessary_unmatched:
            # if exec('temp = "self.{}"'.format(i)):
            # exec('self.necessary_unmatched.remove("{}")'.format(i))     # 注意,在里面如果要变量变str,需要加""
            exec('self.necessary_info["{}"] = self.{}'.format(i, i))

        #  TODO 根据库数据找到未匹配数据
        for i, val in self.necessary_info.items():
            if i in self.necessary_unmatched and val:
                self.necessary_unmatched.remove(i)
        print(self.necessary_info, self.necessary_unmatched)
        mongo.delete_datas({
            'type': 'necessary',
            'path': self.output_path
        }, 'necessary', 'mapping')
        mongo.insert_data(self.necessary_info, 'necessary', 'mapping')

        # 生成反向mapping
        # for key, val in self.matched_mapping.items():  # 如果有多个none怎么办呢?:此时还无none, 所以需要先reverse,再加none
        #     self.reversed_mapping[val] = key
        self.reversed_mapping.update(
            self.user_rules)  # 合并user_rules 进base_rule!
        target_unmatched = []
        for i in self.target_unmatched:  # 一个个处理还没有匹配上的target选项
            # cur_tar = self.target_unmatched[0]
            if i not in self.reversed_mapping:  # user_rule被加进reversemap了,但target_unmatched并没有被update
                target_unmatched.append(i)
        self.target_unmatched = target_unmatched
        # return [self.target_unmatched, self.option_unmatched]
        return [
            self.target_unmatched, self.option_list, self.necessary_unmatched
        ]
    def info_extractor(self):
        # 匹配表头行 并提取表格信息
        row_num_found = False
        row_num = 0
        keywords_dict = data.keywords_dict
        for index in self.raw_df.index:  # 逐行看关键词是否存在
            # 看是否第0行本来就匹配
            cols = self.raw_df.columns.ravel().tolist()
            for i in cols:
                if i in keywords_dict['header_key']:
                    row_num = 0
                    row_num_found = True
                    break
            if row_num_found:
                break

            for i in range(self.raw_df.shape[1]):
                # 需要先找表头
                cell = self.raw_df.loc[index].values[i]
                if cell in keywords_dict['header_key']:  # 通过关键词寻找表头位置
                    row_num = index + 1
                    row_num_found = True
                    break

                for key in keywords_dict:  # 获取表头前统计信息
                    if (cell in keywords_dict[key]):
                        print(cell)
                        exec('self.{} = self.raw_df.loc[index].values[i + 1]'.
                             format(key))  # i+1为被匹配信息右边一项
                        break

        if row_num_found:
            self.target_df = pd.read_excel(
                self.file_path, sheet_name=self.table,
                header=row_num)  # 重新建立dataframe, 注意换table!!
            cols = self.target_df.columns.ravel()
            unnamed = [i for i in cols if re.search(r'Unnamed.*', i)]
            for i in unnamed:
                self.target_df = self.target_df.drop(columns=i)
            self.option_list = self.target_df.columns.ravel().tolist(
            )  # 表头list
            self.transaction_num = self.target_df.shape[0]
        else:
            print('titles not found!')
            return False
        print(self.target_df)

        # 从标题提取name
        self.name_mapping = data.name_mapping
        if not self.self_name:
            for name in self.name_mapping:
                if name in self.title:
                    self.self_name = name

        # 从表名提取银行
        if '银行' in self.table:
            self.self_bank = self.table

        # 从第一格提取账号
        cell = self.raw_df.columns.ravel()[0]
        match = re.findall(r'(\d{16,19})', cell)
        if match:
            print('Found self account number: ', match[0])
            self.self_account = str(match[0])

        # 从标题提取日期
        if not self.start_date or not self.end_date:
            res = re.findall(r'(20[12]\d)(\d*)-?(\d*)', self.title)
            if res:
                res = res[0]
                if not res[1] and not res[2]:  # 只匹配到年份
                    self.start_date = res[0] + '0101'
                    self.end_date = res[0] + '1231'
                elif not res[2]:
                    self.start_date = res[0] + res[1] + '01'
                    self.end_date = res[0] + res[1] + '30'
                elif len(res[1]) == 2:
                    if len(res[2]) == 6:
                        self.start_date = res[0] + res[1] + '01'
                        self.end_date = res[2] + '30'
                    if len(res[2]) == 2:
                        self.start_date = res[0] + res[1] + '01'
                        self.end_date = res[0] + res[2] + '30'
                    if len(res[2]) == 1:
                        self.start_date = res[0] + res[1] + '01'
                        self.end_date = res[0] + '0' + res[2] + '30'

        # store as json
        df_json = self.target_df.to_json(orient='columns', force_ascii=False)
        df_data = {
            'company': self.company,
            'file': self.title,
            'table': self.table,
            'data': df_json
        }
        query = {
            'company': self.company,
            'file': self.title,
            'table': self.table,
            'batchid': self.batch_id
        }
        if mongo.show_datas('unmapped_df', query, 'Cache'):
            # mongo.update_datas(query, {'$set': df_data}, 'unmapped_df', 'Cache')
            mongo.delete_datas(query, 'unmapped_df', 'Cache')
            mongo.insert_data(df_data, 'unmapped_df', 'Cache')
        else:
            mongo.insert_data(df_data, 'unmapped_df', 'Cache')

        # data2 = mongo.show_datas('unmapped_df', query, 'Cache')[0]
        # df2 = pd.read_json(data2['data'])
        # print(df2)
        return True
    def mapping(self):
        # get base rule and rule summary from mongodb
        self.base_rules_summary = mongo.show_datas('base_rule',
                                                   {'type': 'rule_summary'},
                                                   'mapping')[0]
        self.base_rules = mongo.show_datas('base_rule', {'type': 'base_rule'},
                                           'mapping')[0]
        try:
            self.user_rules = mongo.show_datas('user_rule', {
                'type': 'user_rule',
                'name': self.user_name
            }, 'mapping')[0]
            # self.base_rules.update(self.user_rules)         # 合并user_rules 进base_rule!
        except:
            self.user_rules["type"] = "user_rule"
            self.user_rules['name'] = self.user_name
            # print('no user rules yet.')
        try:
            self.necessary_info = \
            mongo.show_datas('necessary', {'type': 'necessary', 'path': self.output_path}, 'mapping')[0]
        except:
            self.necessary_info = {
                'type': 'necessary',
                'path': self.output_path,
            }
        self.target_unmatched = self.base_rules_summary['target_headers'].copy(
        )  # 需要.copy,防止总的headers list被修改
        self.necessary_unmatched = self.necessary_items.copy()
        self.option_list.append('none')
        for key in self.option_list:
            if key in self.base_rules:  # 如果在baserule里已找到匹配项
                val = self.base_rules[key]
                self.reversed_mapping[val] = key
                self.target_unmatched.remove(val)
        # 去掉input excel中随录信息包含值
        if self.self_name:
            self.target_unmatched.remove('本方名称')
        if self.self_account:
            self.target_unmatched.remove('本方账号')

        # 三步,库数据更新表,表数据更新库,找到空项
        # TODO 库里的necc,把表数据更新
        for key, val in self.necessary_info.items():
            if key not in ['type', 'path', '_id'] and val:
                exec('self.{} = "{}"'.format(key, val))
                self.necessary_unmatched.remove(key)

        # TODO 表数据更新库。去除表里包含的necessary
        for i in self.necessary_unmatched:
            # if exec('temp = "self.{}"'.format(i)):
            # exec('self.necessary_unmatched.remove("{}")'.format(i))     # 注意,在里面如果要变量变str,需要加""
            exec('self.necessary_info["{}"] = self.{}'.format(i, i))

        #  TODO 根据库数据找到未匹配数据
        for i, val in self.necessary_info.items():
            if i in self.necessary_unmatched and val:
                self.necessary_unmatched.remove(i)
        # print(self.necessary_info, self.necessary_unmatched)
        mongo.delete_datas({
            'type': 'necessary',
            'path': self.output_path
        }, 'necessary', 'mapping')
        mongo.insert_data(self.necessary_info, 'necessary', 'mapping')

        # 生成反向mapping
        self.reversed_mapping.update(
            self.user_rules)  # 合并user_rules 进base_rule!
        target_unmatched = []
        for i in self.target_unmatched:  # 一个个处理还没有匹配上的target选项
            if i not in self.reversed_mapping:  # user_rule被加进reversemap了,但target_unmatched并没有被update
                target_unmatched.append(i)
        self.target_unmatched = target_unmatched
        return [
            self.target_unmatched, self.option_list, self.necessary_unmatched,
            self.necessary_info
        ]