Example #1
0
    def extract_related_reports(self):
        """ 연관 보고서 리스트 추출

        Returns
        -------
        list of RelatedReport
            연관 보고서리스트 반환

        """
        if self.html is None:
            self._get_report()
        results = []
        soup = self.html
        family = soup.find('select', id='family')
        related_reports = family.find_all('option')
        for report in related_reports:
            value = report.attrs.get('value')
            if compare_str(value, 'null'):
                continue
            rpt_nm = re.sub(r'\s+', ' ', report.text).strip()
            rcp_no = value.split('=')[1]
            if compare_str(self.rcp_no, rcp_no):
                if self.info.get('rpt_nm') is None:
                    self.info['rpt_nm'] = rpt_nm
                continue
            info = {'rcp_no': rcp_no, 'rpt_nm': rpt_nm, 'parent': self}
            results.append(RelatedReport(**info))
        self._related_reports = sorted(results,
                                       key=lambda x: x.rcp_no,
                                       reverse=True)
        return self._related_reports
Example #2
0
    def extract_attached_reports(self):
        """ 첨부된 보고서 리스트 추출 및 반환

        Returns
        -------
        list of AttachedReport
            첨부된 보고서 리스트

        """
        if self.html is None:
            self._get_report()
        soup = self.html
        attached = soup.find('p', class_='f_none')
        attached_list = attached.find_all('option')
        attached_reports = []

        for docs in attached_list:
            rpt_nm = re.sub(r'\s+', ' ', docs.text).strip()
            docs_url = docs.attrs.get('value')
            if compare_str(docs_url, 'null'):
                pass
            else:
                info = dict()
                parsed = parse_qs(docs_url)
                info['rcp_no'] = parsed.get('rcpNo')[0]
                info['dcm_no'] = parsed.get('dcmNo')[0]
                info['rpt_nm'] = rpt_nm
                info['parent'] = self
                attached_reports.append(AttachedReport(**info))
        self._attached_reports = sorted(attached_reports,
                                        key=lambda x: x.rcp_no,
                                        reverse=True)
        return self._attached_reports
Example #3
0
    def run_test(self):
        fs = self.crp.get_financial_statement(start_dt=self.start_dt,
                                              separate=self.separate,
                                              report_tp=self.report_tp)
        for test in self.test_set:
            tp = test['fs_tp']
            date = test['date']
            column = test['column']
            item = test['item']
            expected = test['expected']

            df = fs[tp]
            date_column = find_all_columns(df=df, query=date)[0]
            label_column = find_all_columns(df=df, query=column)[0]

            actual = None

            for idx in range(len(df)):
                text = df[label_column].iloc[idx].replace(' ', '')
                if compare_str(text, item):
                    actual = df[date_column].iloc[idx]

            if actual != expected:
                pytest.fail("Test failed: crp_cd='{}', fs_tp='{}', ".format(
                    self.crp.crp_cd, tp) +
                            "start_dt='{}', report_tp='{}', ".format(
                                self.start_dt, fs.info['report_tp']) +
                            "date='{}', column='{}',".format(date, column) +
                            "item='{}', actual='{}', expected='{}'".format(
                                item, actual, expected))
Example #4
0
def get_value_from_dataset(classification, dataset, concept_id):
    """ dataset에서 값을 추출하는 함수 """
    def str_to_float(val):
        try:
            return float(val)
        except ValueError:
            return val

    if isinstance(classification, dict):
        classification = [classification]

    results = list()
    added_title = list()
    for cls in classification:
        value = float('nan')
        for data in dataset[cls['cls_id']]:
            if compare_str(data.concept.id, concept_id):
                value = str_to_float(data.value)
                break
        title = get_title(cls, 'en')
        if title in added_title:
            index = added_title.index(title)
            if not math.isnan(value):
                results[index] = value
        else:
            results.append(value)
            added_title.append(title)
    return results
Example #5
0
    def get_table_by_code(self, code: str) -> Union[Table, None]:
        """ Table 코드와 일치하는 Table 반환

        Parameters
        ----------
        code: str
            Table 코드번호

        Returns
        -------
        Table or None
            코드 번호에 맞는 Table 또는 None
        """
        for table in self.tables:
            if compare_str(table.code, code):
                return table
        return None
Example #6
0
def search_financial_statement(crp_cd: str, start_dt: str, end_dt: str = None,
                               fs_tp: Tuple[str] = ('fs', 'is', 'ci', 'cf'), separate: bool = False,
                               report_tp: str = 'annual', lang: str = 'ko',
                               separator: bool = True) -> FinancialStatement:
    """
    재무제표 검색

    Parameters
    ----------
    crp_cd: str
        종목코드
    start_dt: str
        검색 시작일자(YYYYMMDD)
    end_dt: str, optional
        검색 종료일자(YYYYMMDD)
    fs_tp: tuple of str, optional
        'fs' 재무상태표, 'is' 손익계산서, 'ci' 포괄손익계산서, 'cf' 현금흐름표
    separate: bool, optional
        개별재무제표 여부
    report_tp: str, optional
        'annual' 1년, 'half' 반기, 'quarter' 분기
    lang: str, optional
        'ko' 한글, 'en' 영문
    separator: bool, optional
        1000단위 구분자 표시 여부

    Returns
    -------
    FinancialStatement
        제무제표 검색 결과

    """
    if is_notebook():
        from tqdm import tqdm_notebook as tqdm
    else:
        from tqdm import tqdm

    # 재무제표 검색 결과
    statements = None

    # 사업보고서 검색(최종보고서)
    reports = search_report(crp_cd=crp_cd, start_dt=start_dt, end_dt=end_dt,
                            bsn_tp='A001', page_count=100, fin_rpt=True)

    if len(reports) == 0:
        # todo 감사보고서를 이용하여 재무제표 검색
        raise RuntimeError('Could not find an annual report')

    next_index = None

    for idx, _ in enumerate(reports):
        # 가장 최근 보고서의 경우 XBRL 파일을 이용하여 재무제표 검색
        latest_report = reports[idx]
        latest_xbrl = latest_report.xbrl
        # XBRL 파일이 존재할 때
        if latest_xbrl:
            if separate is False and not latest_xbrl.exist_consolidated():
                raise NotFoundConsolidated('Could not find consolidated financial statements')

            # XBRL 정보를 이용하여 재무제표 정보 초기화
            analyzed_results = analyze_xbrl(latest_report, fs_tp=fs_tp, separate=separate, lang=lang,
                                            show_abstract=False, show_class=True,
                                            show_depth=10, show_concept=True, separator=separator)
            statements = copy.deepcopy(analyzed_results)
            break

        else:
            statements = analyze_html(latest_report, fs_tp=fs_tp, separate=separate, lang=lang)

        # Report 에 재무제표 정보 없이 수정 사항만 기록된 경우 다음 리포트 검색
        if statements is not None:
            next_index = idx + 1
            break

    if separate is False and all([statements[tp] is None for tp in statements]):
        raise NotFoundConsolidated('Could not find consolidated financial statements')

    label_df = None
    for report in tqdm(reports[next_index:], desc='Annual reports', unit='report'):
        statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang)

    if compare_str(report_tp, 'half') or compare_str(report_tp, 'quarter'):
        half = search_report(crp_cd=crp_cd, start_dt=start_dt, end_dt=end_dt,
                             bsn_tp=['A002'], page_count=100, fin_rpt=True)
        for report in tqdm(half, desc='Semiannual reports', unit='report'):
            statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang)

    if compare_str(report_tp, 'quarter'):
        quarter = search_report(crp_cd=crp_cd, start_dt=start_dt, end_dt=end_dt,
                                bsn_tp=['A003'], page_count=100, fin_rpt=True)
        for report in tqdm(quarter, desc='Quarterly report', unit='report'):
            statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang)

    statements = drop_empty_columns(statements)
    label_df = drop_empty_columns(label_df)

    statements = sorting_columns(statements)
    label_df = sorting_columns(label_df)

    info = {
        'crp_cd': crp_cd,
        'start_dt': start_dt,
        'end_dt': end_dt,
        'separate': separate,
        'report_tp': report_tp,
        'lang': lang,
        'separator': separator
    }
    return FinancialStatement(statements, label_df, info)
Example #7
0
def convert_thead_into_columns(fs_tp: str, fs_table: dict, separate: bool = False,
                               lang: str = 'ko'):
    """ thead에서 DataFrame의 columns을 추출하는 Method"""
    def column_ko_to_en(ko):
        ko_to_en = {
            '과목': 'label_ko',
            '주석': 'comment'
        }
        en = ko_to_en.get(ko)
        return en if en else ko

    thead = fs_table['table'].thead

    if thead is None:
        tt = fs_table['table'].tbody.tr.extract()
        thead = BeautifulSoup('<thead></thead>', 'html.parser')
        thead.thead.append(tt)
        for td in thead.tr.find_all('td'):
            td.name = 'th'
    th_colspan_list = [int(th.attrs.get('colspan', 1)) for th in thead.tr.find_all('th')]
    date_info = extract_date_from_header(fs_table['header'])
    # Regular Expression for title
    regex = str_to_regex('과목 OR 주석')

    fs_string = {
        'fs': 'Statement of financial position',
        'is': 'Income statement',
        'ci': 'Statement of comprehensive income',
        'cf': 'Statement of cash flows'
    }

    str_unit = extract_unit_from_header(fs_table['header'])
    str_unit = str_to_regex('원 OR USD').search(str_unit)
    if str_unit:
        str_unit = str_unit.group(0)
        str_unit = 'KRW' if compare_str('원', str_unit) else 'USD'
        for key in fs_string:
            fs_string[key] = fs_string[key] + '(Unit: {})'.format(str_unit)

    label = {
        'ko': {
            True: '별도재무제표',
            False: '연결재무제표'
        },
        'en': {
            True: 'Separate',
            False: 'Consolidated'
        }
    }

    # 최대 Col
    col_length = sum(th_colspan_list)
    # 최대 Row
    row_length = len(thead.find_all('tr'))
    row_length = row_length + 1 if row_length == 1 else row_length
    # row-sapn, col-span을 처리하기 위한 Matrix
    columns_matrix = [[None for y in range(col_length)] for x in range(row_length)]
    for idx, tr in enumerate(thead.find_all('tr')):
        start_idx = 0
        for ele_idx, element in enumerate(columns_matrix[idx]):
            if element is None:
                start_idx = ele_idx
                break

        for jdx, th in enumerate(tr.find_all('th')):
            row_span = int(th.attrs.get('rowspan', 1))
            col_span = int(th.attrs.get('colspan', 1))
            text = re.sub(r'\s+', '', th.text)
            date_list = [datetime(1900, 1, 1)]
            if idx == 0:
                if jdx == 0:
                    text = '과목'
                elif regex.search(text) is None:
                    if len(date_info) > 0:
                        date_list = date_info.pop(0)
                    else:
                        import warnings
                        date = '-'.join([date.strftime('%Y%m%d') for date in date_list])
                        warnings_text = "Date data length does not match table header."\
                                + "So last date was set using last data({}). ".format(date)
                        warnings.warn(warnings_text, RuntimeWarning)
                    text = '-'.join([date.strftime('%Y%m%d') for date in date_list])

            if regex.search(text):
                row_span = 2

            for mdx in range(row_span):
                for ndx in range(col_span):
                    new_text = text
                    if mdx == 0 and regex.search(text):
                        new_text = fs_string[fs_tp]
                    columns_matrix[idx + mdx][start_idx + ndx] = new_text
            start_idx = start_idx + ndx + 1

    regex_3month = re.compile(r'3개월')
    regex_total = str_to_regex(r'누적 OR 금액')

    columns = []

    for jdx in range(len(columns_matrix[0])):
        column = []
        sec_item = []
        for idx in range(len(columns_matrix)):
            item = columns_matrix[idx][jdx]
            if idx == 0:
                column.append(item)
                continue
            elif idx == 1 and (item is None or regex.search(item) is None):
                    sec_item.append(label[lang][separate])
            else:
                pass

            if item is None:
                pass
            elif compare_str(column[0], item):
                continue
            elif regex_3month.search(item):
                # extract date info
                date_info = [datetime.strptime(date_str, '%Y%m%d') for date_str in column[0].split('-')]

                # calculating start_dt
                delta = relativedelta(months=3)
                start_dt = date_info[1] - delta
                start_dt = start_dt.replace(day=1)

                end_dt = date_info[1]
                column[0] = '-'.join([date.strftime('%Y%m%d') for date in [start_dt, end_dt]])
            elif regex_total.search(item):
                pass
            else:
                sec_item.append(column_ko_to_en(item))
        if sec_item[0] in ['label_ko', 'comment']:
            column.append(sec_item[0])
        else:
            column.append(tuple(sec_item))
        columns.append(column)
    return columns