예제 #1
0
    def _extract_dataset(self, reports: List[Report]):
        """
        Report에 포함된 XBRL 파일에서 Concept_id 와 Label 값 추출

        Parameters
        ----------
        reports: list of Report
            추출할 Report 리스트
        """
        if is_notebook():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        dataset = []
        for report in tqdm(reports,
                           desc='Extracting concept_id and label_ko',
                           unit='report'):
            df_fs = analyze_xbrl(report)
            if df_fs is None:
                continue
            for tp in df_fs:
                df = df_fs[tp]
                if df is not None:
                    concept_column = find_all_columns(df, 'concept_id')[0]
                    label_ko_column = find_all_columns(df, 'label_ko')[0]
                    for idx in range(len(df)):
                        concept_id = df[concept_column].iloc[idx]
                        label_ko = df[label_ko_column].iloc[idx]
                        if concept_id and label_ko:
                            try:
                                label = self.extract_nouns(label_ko)
                                dataset.append((concept_id, label))
                            except BaseException:
                                continue

        self._dataset = dataset
예제 #2
0
def extract(corp_code: str,
            bgn_de: str,
            end_de: str = None,
            fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'),
            separate: bool = False,
            report_tp: str = 'annual',
            lang: str = 'ko',
            separator: bool = True) -> FinancialStatement:
    """
    재무제표 검색

    Parameters
    ----------
    corp_code: str
        공시대상회사의 고유번호(8자리)
    bgn_de: str
        검색 시작일자(YYYYMMDD)
    end_de: str, optional
        검색 종료일자(YYYYMMDD)
    fs_tp: tuple of str, optional
        'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표
    separate: bool, optional
        개별재무제표 여부
    report_tp: str, optional
        'annual' 1년, 'half' 반기, 'quarter' 분기
    lang: str, optional
        'ko' 한글, 'en' 영문
    separator: bool, optional
        1000단위 구분자 표시 여부

    Returns
    -------
    FinancialStatement
        제무제표 검색 결과

    """
    if is_notebook():
        from tqdm import tqdm_notebook as tqdm
    else:
        from tqdm import tqdm

    import dart_fss as dart
    dart.utils.spinner.spinner_enable = False

    reports = search_annual_report(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, separate=separate)
    try:
        length = len(reports)
        statements = None
        label_df = None
        # Spinner disable

        for _ in tqdm(range(length), desc='Annual reports', unit='report'):
            report = reports.pop(0)
            if statements is None:
                statements = analyze_report(report=report,
                                            fs_tp=fs_tp,
                                            separate=separate,
                                            lang=lang,
                                            separator=separator)
                if separate is False and all([statements[tp] is None for tp in statements]):
                    raise NotFoundConsolidated('Could not find consolidated financial statements')
                # initialize label dictionary
                label_df = init_label(statements, fs_tp=fs_tp)

            else:
                nstatements = analyze_report(report=report,
                                             fs_tp=fs_tp,
                                             separate=separate,
                                             lang=lang,
                                             separator=separator)
                statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df)

        if str_compare(report_tp, 'half') or str_compare(report_tp, 'quarter'):
            half = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de,
                                  pblntf_detail_ty='A002', page_count=100, last_reprt_at='Y')
            length = len(half)
            for _ in tqdm(range(length), desc='Semiannual reports', unit='report'):
                report = half.pop(0)
                nstatements = analyze_report(report=report,
                                             fs_tp=fs_tp,
                                             separate=separate,
                                             lang=lang,
                                             separator=separator)
                statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df)

        if str_compare(report_tp, 'quarter'):
            quarter = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de,
                                     pblntf_detail_ty='A003', page_count=100, last_reprt_at='Y')
            length = len(quarter)
            for _ in tqdm(range(length), desc='Quarterly report', unit='report'):
                report = quarter.pop(0)
                nstatements = analyze_report(report=report,
                                            fs_tp=fs_tp,
                                            separate=separate,
                                            lang=lang,
                                            separator=separator)
                statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df)

        statements = drop_empty_columns(statements)
        label_df = drop_empty_columns(label_df)

        statements = sorting_columns(statements)
        label_df = sorting_columns(label_df)

        info = {
            'corp_code': corp_code,
            'bgn_de': bgn_de,
            'end_de': end_de,
            'separate': separate,
            'report_tp': report_tp,
            'lang': lang,
            'separator': separator
        }
        # Spinner enable
        dart.utils.spinner.spinner_enable = True
        return FinancialStatement(statements, label_df, info)
    except Exception as e:
        msg = 'An error occurred while fetching or analyzing {}.'.format(report.to_dict())
        e.args = (*e.args, msg, )
        raise e
    finally:
        dart.utils.spinner.spinner_enable = True
예제 #3
0
def extract(corp_code: str,
            bgn_de: str,
            end_de: str = None,
            fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'),
            separate: bool = False,
            report_tp: Union[str, List[str]] = 'annual',
            lang: str = 'ko',
            separator: bool = True,
            dataset: str = 'xbrl') -> FinancialStatement:
    """
    재무제표 검색

    Parameters
    ----------
    corp_code: str
        공시대상회사의 고유번호(8자리)
    bgn_de: str
        검색 시작일자(YYYYMMDD)
    end_de: str, optional
        검색 종료일자(YYYYMMDD)
    fs_tp: tuple of str, optional
        'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표
    separate: bool, optional
        개별재무제표 여부
    report_tp: str or list, optional
        str: 'annual' 연간, 'half' 연간 + 반기, 'quarter' 연간 + 반기 + 분기
        list: ['annual'] : 연간, ['half']: 반기, ['quarter'] 분기, ['annual', 'half']: 연간 + 반기
              ['annual', 'quarter']: 연간 + 분기, ['half', 'quarter']:  반기 + 분기, ['annual', 'half', 'quarter']: 연간 + 반기 + 분기
    lang: str, optional
        'ko' 한글, 'en' 영문
    separator: bool, optional
        1000단위 구분자 표시 여부
    dataset: str, optional
        'xbrl': xbrl 파일 우선 데이터 추출, 'web': web page 우선 데이터 추출(default: 'xbrl')
    Returns
    -------
    FinancialStatement
        제무제표 검색 결과

    """
    if is_notebook():
        from tqdm import tqdm_notebook as tqdm
    else:
        from tqdm import tqdm

    if dataset not in ['xbrl', 'web']:
        raise ValueError('invalid dataset type: only xbrl or web are allowed')

    all_report_tp = ('annual', 'half', 'quarter')
    all_report_name = ('Annual', 'Semiannual', 'Quarterly')
    all_pblntf_detail_ty = ('A001', 'A002', 'A003')

    def check_report_tp(req_tp, tp):
        if isinstance(req_tp, str):
            index = all_report_tp.index(req_tp) + 1
            if tp in all_report_tp[:index]:
                return True
            else:
                return False
        elif isinstance(req_tp, list) and tp in req_tp:
            return True
        else:
            return False

    # Spinner disable
    import dart_fss as dart
    dart.utils.spinner.spinner_enable = False
    statements = None
    label_df = None
    report = None
    try:
        for idx, tp in enumerate(all_report_tp):
            if check_report_tp(report_tp, tp):
                if tp == 'annual':
                    reports = search_annual_report(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, separate=separate)
                else:
                    reports = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de,
                                             pblntf_detail_ty=all_pblntf_detail_ty[idx], page_count=100, last_reprt_at='Y')
                if reports == 0:
                    continue
                length = len(reports)
                for _ in tqdm(range(length), desc='{} reports'.format(all_report_name[idx]), unit='report'):
                    report = reports.pop(0)
                    if statements is None:
                        statements = analyze_report(report=report,
                                                    fs_tp=fs_tp,
                                                    separate=separate,
                                                    lang=lang,
                                                    separator=separator)
                        if statements is None:
                            warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict())
                            warnings.warn(warnings_text, RuntimeWarning)
                        else:
                            if separate is False and all([statements[tp] is None for tp in statements]):
                                raise NotFoundConsolidated('Could not find consolidated financial statements')
                            # initialize label dictionary
                            label_df = init_label(statements, fs_tp=fs_tp)

                    else:
                        nstatements = analyze_report(report=report,
                                                     fs_tp=fs_tp,
                                                     separate=separate,
                                                     lang=lang,
                                                     separator=separator,
                                                     dataset=dataset)
                        if nstatements is None:
                            warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict())
                            warnings.warn(warnings_text, RuntimeWarning)
                        else:
                            statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df)

        # Spinner enable
        dart.utils.spinner.spinner_enable = True
        if separate is False and (statements is None or all([statements[tp] is None for tp in statements])):
            raise NotFoundConsolidated('Could not find consolidated financial statements')

        statements = drop_empty_columns(statements)
        label_df = drop_empty_columns(label_df)

        statements = sorting_columns(statements)
        label_df = sorting_columns(label_df)

        info = {
            'corp_code': corp_code,
            'bgn_de': bgn_de,
            'end_de': end_de,
            'separate': separate,
            'report_tp': report_tp,
            'lang': lang,
            'separator': separator
        }
        return FinancialStatement(statements, label_df, info)
    except Exception as e:
        if report is not None:
            msg = 'An error occurred while fetching or analyzing {}.'.format(report.to_dict())
        else:
            msg = 'Unexpected Error'
        e.args = (*e.args, msg, )
        raise e
    finally:
        dart.utils.spinner.spinner_enable = True
예제 #4
0
def extract(corp_code: str,
            bgn_de: str,
            end_de: str = None,
            fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'),
            separate: bool = False,
            report_tp: str = 'annual',
            lang: str = 'ko',
            separator: bool = True) -> FinancialStatement:
    """
    재무제표 검색

    Parameters
    ----------
    corp_code: str
        공시대상회사의 고유번호(8자리)
    bgn_de: str
        검색 시작일자(YYYYMMDD)
    end_de: str, optional
        검색 종료일자(YYYYMMDD)
    fs_tp: tuple of str, optional
        'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표
    separate: bool, optional
        개별재무제표 여부
    report_tp: str, optional
        'annual' 1년, 'half' 반기, 'quarter' 분기
    lang: str, optional
        'ko' 한글, 'en' 영문
    separator: bool, optional
        1000단위 구분자 표시 여부

    Returns
    -------
    FinancialStatement
        제무제표 검색 결과

    """
    if is_notebook():
        from tqdm import tqdm_notebook as tqdm
    else:
        from tqdm import tqdm

    # 재무제표 검색 결과
    statements = None
    reports = []
    try:
        # 사업보고서 검색(최종보고서)
        reports = search_filings(corp_code=corp_code,
                                 bgn_de=bgn_de,
                                 end_de=end_de,
                                 pblntf_detail_ty='A001',
                                 page_count=100,
                                 last_reprt_at='Y')
    except NoDataReceived:
        # 감사보고서 검색
        if separate:
            pblntf_detail_ty = 'F001'
        else:
            pblntf_detail_ty = 'F002'
        reports = search_filings(corp_code=corp_code,
                                 bgn_de=bgn_de,
                                 end_de=end_de,
                                 pblntf_detail_ty=pblntf_detail_ty,
                                 page_count=100,
                                 last_reprt_at='Y')
    finally:
        if len(reports) == 0:
            raise RuntimeError('Could not find an annual report')

        next_index = 0
        for idx, _ in enumerate(reports):
            # 가장 최근 보고서의 경우 XBRL 파일을 이용하여 재무제표 검색
            latest_report = reports[idx]
            latest_xbrl = latest_report.xbrl
            # XBRL 파일이 존재할 때
            if latest_xbrl is not None:
                if separate is False and not latest_xbrl.exist_consolidated():
                    raise NotFoundConsolidated(
                        'Could not find consolidated financial statements')

                # XBRL 정보를 이용하여 재무제표 정보 초기화
                analyzed_results = analyze_xbrl(latest_report,
                                                fs_tp=fs_tp,
                                                separate=separate,
                                                lang=lang,
                                                show_abstract=False,
                                                show_class=True,
                                                show_depth=10,
                                                show_concept=True,
                                                separator=separator)
                statements = copy.deepcopy(analyzed_results)
            else:
                statements = analyze_html(latest_report,
                                          fs_tp=fs_tp,
                                          separate=separate,
                                          lang=lang)
            # Report 에 재무제표 정보 없이 수정 사항만 기록된 경우 다음 리포트 검색
            if statements is not None:
                next_index = idx + 1
                break

        if separate is False and all(
            [statements[tp] is None for tp in statements]):
            raise NotFoundConsolidated(
                'Could not find consolidated financial statements')

        label_df = None
        for report in tqdm(reports[next_index:],
                           desc='Annual reports',
                           unit='report'):
            statements, label_df = merge_fs(statements,
                                            label_df,
                                            report,
                                            fs_tp=fs_tp,
                                            separate=separate,
                                            lang=lang)

        if str_compare(report_tp, 'half') or str_compare(report_tp, 'quarter'):
            half = search_filings(corp_code=corp_code,
                                  bgn_de=bgn_de,
                                  end_de=end_de,
                                  pblntf_detail_ty='A002',
                                  page_count=100,
                                  last_reprt_at='Y')
            for report in tqdm(half, desc='Semiannual reports', unit='report'):
                statements, label_df = merge_fs(statements,
                                                label_df,
                                                report,
                                                fs_tp=fs_tp,
                                                separate=separate,
                                                lang=lang)

        if str_compare(report_tp, 'quarter'):
            quarter = search_filings(corp_code=corp_code,
                                     bgn_de=bgn_de,
                                     end_de=end_de,
                                     pblntf_detail_ty='A003',
                                     page_count=100,
                                     last_reprt_at='Y')
            for report in tqdm(quarter, desc='Quarterly report',
                               unit='report'):
                statements, label_df = merge_fs(statements,
                                                label_df,
                                                report,
                                                fs_tp=fs_tp,
                                                separate=separate,
                                                lang=lang)

        statements = drop_empty_columns(statements)
        label_df = drop_empty_columns(label_df)

        statements = sorting_columns(statements)
        label_df = sorting_columns(label_df)

        info = {
            'corp_code': corp_code,
            'bgn_de': bgn_de,
            'end_de': end_de,
            'separate': separate,
            'report_tp': report_tp,
            'lang': lang,
            'separator': separator
        }
        return FinancialStatement(statements, label_df, info)