class IfrsOperatingRevenueAssembler():
    def __init__(self):
        self.base_xpath = '//html/body'
        self.content_screener = ContentScreener()
        self.string_utils = StringUtils()

    def assemble(self, param):
        content, stock_symbol, date = param['content'], param['stock_symbol'], param['date']
        self.content_screener.screen(param)
        content = self.string_utils.normalize_string(content)
        html_object = lxml.html.fromstring(content)
        relative_html_object = self.__traverse_to_relative_html_object(html_object)
        column_name_list = self.__assemble_column_name_list(relative_html_object)
        row_list = self.__assemble_row_list(relative_html_object)
        return OperatingRevenueDao(column_name_list, row_list, stock_symbol, date)

    def __traverse_to_relative_html_object(self, html_object):
        relative_html_object_list = html_object.xpath(self.base_xpath)
        assert len(relative_html_object_list) > 0, 'invalid base_xpath'
        return relative_html_object_list[0]

    def __assemble_column_name_list(self, relative_html_object):
        # traverse and sanity check
        tr_tags = relative_html_object.xpath('./table[@class="hasBorder"]/tr')
        assert len(tr_tags) > 0, 'invalid tr_tags'

        # traverse and sanity check
        th_texts = tr_tags[0].xpath('./th/text()')
        assert len(th_texts) == 2, 'invalid th_texts size, should be 2'
        # should be account
        account = th_texts[0]

        # traverse and sanity check
        table_tags = relative_html_object.xpath('./table[@class="noBorder"]')
        assert len(table_tags) > 0, 'invalid table_tags'
        td_tags = table_tags[2].xpath('./td')
        assert len(td_tags) > 0, 'invalid td_tags'
        # should be snapdate
        snapdate = self.string_utils.from_local_string_to_date(td_tags[1].text)

        return [account, snapdate]

    def __assemble_row_list(self, relative_html_object):
        # skip one row of column name list
        tr_tags = relative_html_object.xpath('./table[@class="hasBorder"]/tr')[1:]
        return [self.__assemble_row(tr_tag) for tr_tag in tr_tags]

    def __assemble_row(self, relative_html_object):
        # should be item
        th_texts = relative_html_object.xpath('./th/text()')
        assert len(th_texts) == 1, 'invalid th_texts size, should be 1'
        item = th_texts[0]

        # should be number (operating revenue)
        td_texts = relative_html_object.xpath('./td/text()')
        assert len(th_texts) == 1, 'invalid td_texts size, should be 1'
        number_string = td_texts[0]
        number = self.string_utils.normalize_number(number_string)

        return [item, number]
class OperatingRevenueSummaryAssembler:
    def __init__(self):
        self.content_screener = ContentScreener()
        self.date_utils = DateUtils()
        self.string_utils = StringUtils()

    def assemble(self, param):
        content, date = param["content"], param["date"]
        self.content_screener.screen(param)
        stmt_date = self.date_utils.get_last_date_of_month(date)
        column_name_list, row_list, release_date = self.__assemble_summary(content)
        return OperatingRevenueSummaryDao(column_name_list, row_list, stmt_date, release_date)

    def __assemble_summary(self, html_object):
        try:
            return AriesParser().parse(html_object)
        except AssertionError:
            return self.__assemble_summary_step_1(html_object)

    def __assemble_summary_step_1(self, html_object):
        try:
            return TaurusParser().parse(html_object)
        except AssertionError:
            return self.__assemble_summary_step_2(html_object)

    def __assemble_summary_step_2(self, html_object):
        try:
            return GeminiParser().parse(html_object)
        except AssertionError:
            return self.__assemble_summary_step_3(html_object)

    def __assemble_summary_step_3(self, html_object):
        try:
            return CancerParser().parse(html_object)
        except AssertionError:
            return LeoParser().parse(html_object)
 def __init__(self):
     self.base_xpath = '//html/body'
     self.content_screener = ContentScreener()
     self.string_utils = StringUtils()
 def __init__(self):
     self.base_xpath = '//html/body[@id="content_d"]/center/table[@class="result_table hasBorder"]'
     self.content_screener = ContentScreener()
     self.string_utils = StringUtils()
class XbrlBalanceSheetAssembler():
    def __init__(self):
        self.base_xpath = '//html/body[@id="content_d"]/center/table[@class="result_table hasBorder"]'
        self.content_screener = ContentScreener()
        self.string_utils = StringUtils()

    def assemble(self, param):
        content, stock_symbol, date = param['content'], param['stock_symbol'], param['date']
        self.content_screener.screen(param)
        content = self.string_utils.normalize_string(content)
        html_object = lxml.html.fromstring(content)
        relative_html_object = self.__traverse_to_relative_html_object(html_object)
        column_name_list = self.__assemble_column_name_list(relative_html_object)
        row_list = self.__assemble_row_list(relative_html_object)
        return BalanceSheetDao(column_name_list, row_list, stock_symbol, date)

    def __traverse_to_relative_html_object(self, html_object):
        relative_html_object_list = html_object.xpath(self.base_xpath)
        assert len(relative_html_object_list) == 1, 'invalid base_xpath'
        return relative_html_object_list[0]

    def __assemble_column_name_list(self, relative_html_object):
        # traverse and sanity check
        tr_tags = relative_html_object.xpath('./tr[@class="tblHead"]')
        assert len(tr_tags) == 2, 'invalid tr_tags'

        # traverse and sanity check        
        statement_th_texts = tr_tags[1].xpath('./th/text()')
        assert len(statement_th_texts) == 1, 'invalid statement_th_texts'
        assert unicode(statement_th_texts[0]) == u'資產負債表', 'invalid statement_th_texts[0]'

        column_name_list = []
        
        # should be account type
        column_th_texts = tr_tags[0].xpath('./th/text()')
        account_type = column_th_texts[0] # of unicode type
        column_name_list.append(account_type)

        # should be snapshot dates
        for local_string in column_th_texts[1:]:
            snapshot_date = self.string_utils.from_local_string_to_date(local_string) # of datetime.date type
            column_name_list.append(snapshot_date)

        return column_name_list

    def __assemble_row_list(self, relative_html_object):
        # skip one row of statement name and one row of column name list
        tr_tags = relative_html_object.xpath('./tr')[2:]
        return [self.__assemble_row(tr_tag) for tr_tag in tr_tags]

    def __assemble_row(self, relative_html_object):
        row = []

        td_texts = relative_html_object.xpath('./td/text()')

        # should be account type 
        account_type = td_texts[0].strip()
        row.append(account_type)

        # should be number
        for number_string in td_texts[1:]:
            number = self.string_utils.normalize_number(number_string)
            row.append(number)

        return row
 def __init__(self):
     self.base_xpath = '//html/body/div[@id="SysJustIFRAMEDIV"]/table/tr/td/table/tr/td/table'
     self.content_screener = ContentScreener()
     self.string_utils = StringUtils()
class DividendPolicyAssembler():
    def __init__(self):
        self.base_xpath = '//html/body/div[@id="SysJustIFRAMEDIV"]/table/tr/td/table/tr/td/table'
        self.content_screener = ContentScreener()
        self.string_utils = StringUtils()

    def assemble(self, param):
        content, stock_symbol = param['content'], param['stock_symbol']
        self.content_screener.screen(param)
        html_object = self.__get_html_object(content)
        relative_html_object = self.__traverse_to_relative_html_object(html_object)
        column_name_list = self.__assemble_column_name_list(relative_html_object)
        row_list = self.__assemble_row_list(relative_html_object)
        return DividendPolicyDao(column_name_list, row_list, stock_symbol)

    def __get_html_object(self, content):
        content = self.string_utils.normalize_string(content)
        content = content.replace(u'<br>', u'')
        return lxml.html.fromstring(content)

    def __traverse_to_relative_html_object(self, html_object):
        relative_html_object_list = html_object.xpath(self.base_xpath)
        assert len(relative_html_object_list) > 0, 'invalid base_xpath (table_tags)'

        return relative_html_object_list[0]

    def __assemble_column_name_list(self, relative_html_object):
        # traverse and sanity check
        tr_tags = relative_html_object.xpath('./tr')
        assert len(tr_tags) > 2, 'invalid tr_tags'

        # skip the first row of header
        td_texts = tr_tags[2].xpath('./td/text()')
        assert len(td_texts) == 7, 'invalid td_texts size, should be 7'

        return [text.strip() for text in td_texts]

    def __assemble_row_list(self, relative_html_object):
        # skip the first row of header
        # skip the second row of empty lines
        # skip the third row of column name list
        tr_tags = relative_html_object.xpath('./tr')
        assert len(tr_tags) > 2, 'invalid tr_tags'

        return [self.__assemble_row(tr_tag) for tr_tag in tr_tags[3:]]

    def __assemble_row(self, relative_html_object):
        # should be item
        td_texts = relative_html_object.xpath('./td/text()')
        assert len(td_texts) == 7, 'invalid td_texts size, should be 7'

        row = []

        # should be stmt_date
        stmt_date = self.string_utils.from_local_string_to_date(td_texts[0])
        row.append(stmt_date)

        # should be number 
        for number_string in td_texts[1:-1]:
            number = self.string_utils.normalize_number(number_string)
            row.append(number)

        # should be number in percentage
        for number_string in td_texts[-1:]:
            number = self.string_utils.normalize_number(number_string) * 0.01
            row.append(number)

        return row
 def __init__(self):
     self.content_screener = ContentScreener()
     self.date_utils = DateUtils()
     self.string_utils = StringUtils()