Example #1
0
class AriesParser():
    def __init__(self, text):
        self.text = text
        self.head_splitted_account = None
        self.account_utils = AccountUtils()
        self.string_utils = StringUtils()

    def parse(self):
        text = self.__preprocess_text(self.text)
        lines = self.__scan_lines(text)
        return self.__parse_lines(lines)

    def __preprocess_text(self, text):
        text = self.account_utils.concat_account(text)
        text = self.account_utils.remove_eten_separation(text)
        return text

    def __scan_lines(self, text):
        scanner = Scanner(Source(text))
        scanner.scan()
        tokens = scanner.get_tokens()

        lines = []
        tokens_in_line = []
        for token in tokens:
            tokens_in_line.append(token)
            if token.get_token_type() == 'TK_EOL':
                lines.append(tokens_in_line)
                tokens_in_line = []
        return lines

    def __parse_lines(self, lines):
        column_name_list = None
        visited_column_name_list = False
        row_list = []
        for line in lines:
            type_list = [token.get_token_type() for token in line]
            #print type_list
            # pass useless line
            if type_list == ['TK_EOL']:
                continue
            elif type_list == ['TK_SEPERATION', 'TK_EOL']:
                continue
            
            # try to parse column name list
            if not visited_column_name_list:
                if type_list == ['TK_ACCOUNT', 'TK_ACCOUNT', 'TK_EOL']:
                    try:
                        column_name_list = self.__parse_column_name_list(line[:2])
                        visited_column_name_list = True
                    except Exception:
                        pass
                elif type_list == ['TK_ACCOUNT', 'TK_ACCOUNT', 'TK_ACCOUNT', 'TK_EOL']:
                    try:
                        column_name_list = self.__parse_column_name_list(line[1:3])
                        visited_column_name_list = True
                    except Exception:
                        pass
            # try to parse rest row list
            else:
                row = None
                if type_list == ['TK_ACCOUNT', 'TK_NUMBER', 'TK_NUMBER', 'TK_EOL']:
                    row = self.__parse_account_number_number_line(line)
                elif type_list == ['TK_ACCOUNT', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_NUMBER', 'TK_EOL']:
                    row = self.__parse_account_paren_number_number_line(line)
                elif type_list == ['TK_ACCOUNT', 'TK_NUMBER', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_EOL']:
                    row = self.__parse_account_number_paren_number_line(line)
                elif type_list == ['TK_ACCOUNT', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_EOL']:
                    row = self.__parse_account_paren_number_paren_number_line(line)
                elif type_list == ['TK_ACCOUNT', 'TK_EOL']:
                    row = self.__parse_account_line(line)
                elif type_list == ['TK_ACCOUNT', 'TK_ACCOUNT', 'TK_ACCOUNT', 'TK_EOL']:
                    continue
                else:
                    raise ValueError
                row_list.append(row) if row else None

        assert visited_column_name_list, 'We should parse column name list'
        assert len(row_list) > 0, 'We should parse some rows'
        return column_name_list, row_list   

    def __parse_column_name_list(self, stmt_date_list):
        column_name_list = [u'會計科目']
        assert len(stmt_date_list) == 2, 'There shouble be 2 statement dates' 
        for i in [0, 1]:
            date_period = self.string_utils.from_local_string_to_date_period(stmt_date_list[i].get_value())
            stmt_date = date_period[1]
            column_name_list.append(stmt_date)  
        return column_name_list        

    # ['TK_ACCOUNT', 'TK_EOL']
    def __parse_account_line(self, line):
        return [line[0].get_value()]

    # ['TK_ACCOUNT', 'TK_NUMBER', 'TK_NUMBER', 'TK_EOL']
    def __parse_account_number_number_line(self, line):
        return [
            line[0].get_value(),
            self.string_utils.normalize_number(line[1].get_value()), 
            self.string_utils.normalize_number(line[2].get_value())
        ]

    # ['TK_ACCOUNT', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_NUMBER', 'TK_EOL']
    def __parse_account_paren_number_number_line(self, line):
        return [
            line[0].get_value(),
            -self.string_utils.normalize_number(line[2].get_value()),
            self.string_utils.normalize_number(line[4].get_value())
        ]
        
    # ['TK_ACCOUNT', 'TK_NUMBER', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_EOL']
    def __parse_account_number_paren_number_line(self, line):
        return [
            line[0].get_value(),
            self.string_utils.normalize_number(line[1].get_value()),
            -self.string_utils.normalize_number(line[3].get_value())
        ]

    # ['TK_ACCOUNT', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_LEFT_PAREN', 'TK_NUMBER', 'TK_RIGHT_PAREN', 'TK_EOL']
    def __parse_account_paren_number_paren_number_line(self, line):
        return [
            line[0].get_value(),
            -self.string_utils.normalize_number(line[2].get_value()),
            -self.string_utils.normalize_number(line[5].get_value())
        ]
Example #2
0
 def __init__(self, text):
     self.text = text
     self.head_splitted_account = None
     self.account_utils = AccountUtils()
     self.string_utils = StringUtils()