def _extract_dataset(self, reports: List[Report]): """ Report에 포함된 XBRL 파일에서 Concept_id 와 Label 값 추출 Parameters ---------- reports: list of Report 추출할 Report 리스트 """ if is_notebook(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm dataset = [] for report in tqdm(reports, desc='Extracting concept_id and label_ko', unit='report'): df_fs = analyze_xbrl(report) if df_fs is None: continue for tp in df_fs: df = df_fs[tp] if df is not None: concept_column = find_all_columns(df, 'concept_id')[0] label_ko_column = find_all_columns(df, 'label_ko')[0] for idx in range(len(df)): concept_id = df[concept_column].iloc[idx] label_ko = df[label_ko_column].iloc[idx] if concept_id and label_ko: try: label = self.extract_nouns(label_ko) dataset.append((concept_id, label)) except BaseException: continue self._dataset = dataset
def extract(corp_code: str, bgn_de: str, end_de: str = None, fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'), separate: bool = False, report_tp: str = 'annual', lang: str = 'ko', separator: bool = True) -> FinancialStatement: """ 재무제표 검색 Parameters ---------- corp_code: str 공시대상회사의 고유번호(8자리) bgn_de: str 검색 시작일자(YYYYMMDD) end_de: str, optional 검색 종료일자(YYYYMMDD) fs_tp: tuple of str, optional 'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표 separate: bool, optional 개별재무제표 여부 report_tp: str, optional 'annual' 1년, 'half' 반기, 'quarter' 분기 lang: str, optional 'ko' 한글, 'en' 영문 separator: bool, optional 1000단위 구분자 표시 여부 Returns ------- FinancialStatement 제무제표 검색 결과 """ if is_notebook(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm import dart_fss as dart dart.utils.spinner.spinner_enable = False reports = search_annual_report(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, separate=separate) try: length = len(reports) statements = None label_df = None # Spinner disable for _ in tqdm(range(length), desc='Annual reports', unit='report'): report = reports.pop(0) if statements is None: statements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator) if separate is False and all([statements[tp] is None for tp in statements]): raise NotFoundConsolidated('Could not find consolidated financial statements') # initialize label dictionary label_df = init_label(statements, fs_tp=fs_tp) else: nstatements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator) statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df) if str_compare(report_tp, 'half') or str_compare(report_tp, 'quarter'): half = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty='A002', page_count=100, last_reprt_at='Y') length = len(half) for _ in tqdm(range(length), desc='Semiannual reports', unit='report'): report = half.pop(0) nstatements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator) statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df) if str_compare(report_tp, 'quarter'): quarter = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty='A003', page_count=100, last_reprt_at='Y') length = len(quarter) for _ in tqdm(range(length), desc='Quarterly report', unit='report'): report = quarter.pop(0) nstatements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator) statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df) statements = drop_empty_columns(statements) label_df = drop_empty_columns(label_df) statements = sorting_columns(statements) label_df = sorting_columns(label_df) info = { 'corp_code': corp_code, 'bgn_de': bgn_de, 'end_de': end_de, 'separate': separate, 'report_tp': report_tp, 'lang': lang, 'separator': separator } # Spinner enable dart.utils.spinner.spinner_enable = True return FinancialStatement(statements, label_df, info) except Exception as e: msg = 'An error occurred while fetching or analyzing {}.'.format(report.to_dict()) e.args = (*e.args, msg, ) raise e finally: dart.utils.spinner.spinner_enable = True
def extract(corp_code: str, bgn_de: str, end_de: str = None, fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'), separate: bool = False, report_tp: Union[str, List[str]] = 'annual', lang: str = 'ko', separator: bool = True, dataset: str = 'xbrl') -> FinancialStatement: """ 재무제표 검색 Parameters ---------- corp_code: str 공시대상회사의 고유번호(8자리) bgn_de: str 검색 시작일자(YYYYMMDD) end_de: str, optional 검색 종료일자(YYYYMMDD) fs_tp: tuple of str, optional 'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표 separate: bool, optional 개별재무제표 여부 report_tp: str or list, optional str: 'annual' 연간, 'half' 연간 + 반기, 'quarter' 연간 + 반기 + 분기 list: ['annual'] : 연간, ['half']: 반기, ['quarter'] 분기, ['annual', 'half']: 연간 + 반기 ['annual', 'quarter']: 연간 + 분기, ['half', 'quarter']: 반기 + 분기, ['annual', 'half', 'quarter']: 연간 + 반기 + 분기 lang: str, optional 'ko' 한글, 'en' 영문 separator: bool, optional 1000단위 구분자 표시 여부 dataset: str, optional 'xbrl': xbrl 파일 우선 데이터 추출, 'web': web page 우선 데이터 추출(default: 'xbrl') Returns ------- FinancialStatement 제무제표 검색 결과 """ if is_notebook(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm if dataset not in ['xbrl', 'web']: raise ValueError('invalid dataset type: only xbrl or web are allowed') all_report_tp = ('annual', 'half', 'quarter') all_report_name = ('Annual', 'Semiannual', 'Quarterly') all_pblntf_detail_ty = ('A001', 'A002', 'A003') def check_report_tp(req_tp, tp): if isinstance(req_tp, str): index = all_report_tp.index(req_tp) + 1 if tp in all_report_tp[:index]: return True else: return False elif isinstance(req_tp, list) and tp in req_tp: return True else: return False # Spinner disable import dart_fss as dart dart.utils.spinner.spinner_enable = False statements = None label_df = None report = None try: for idx, tp in enumerate(all_report_tp): if check_report_tp(report_tp, tp): if tp == 'annual': reports = search_annual_report(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, separate=separate) else: reports = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty=all_pblntf_detail_ty[idx], page_count=100, last_reprt_at='Y') if reports == 0: continue length = len(reports) for _ in tqdm(range(length), desc='{} reports'.format(all_report_name[idx]), unit='report'): report = reports.pop(0) if statements is None: statements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator) if statements is None: warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict()) warnings.warn(warnings_text, RuntimeWarning) else: if separate is False and all([statements[tp] is None for tp in statements]): raise NotFoundConsolidated('Could not find consolidated financial statements') # initialize label dictionary label_df = init_label(statements, fs_tp=fs_tp) else: nstatements = analyze_report(report=report, fs_tp=fs_tp, separate=separate, lang=lang, separator=separator, dataset=dataset) if nstatements is None: warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict()) warnings.warn(warnings_text, RuntimeWarning) else: statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df) # Spinner enable dart.utils.spinner.spinner_enable = True if separate is False and (statements is None or all([statements[tp] is None for tp in statements])): raise NotFoundConsolidated('Could not find consolidated financial statements') statements = drop_empty_columns(statements) label_df = drop_empty_columns(label_df) statements = sorting_columns(statements) label_df = sorting_columns(label_df) info = { 'corp_code': corp_code, 'bgn_de': bgn_de, 'end_de': end_de, 'separate': separate, 'report_tp': report_tp, 'lang': lang, 'separator': separator } return FinancialStatement(statements, label_df, info) except Exception as e: if report is not None: msg = 'An error occurred while fetching or analyzing {}.'.format(report.to_dict()) else: msg = 'Unexpected Error' e.args = (*e.args, msg, ) raise e finally: dart.utils.spinner.spinner_enable = True
def extract(corp_code: str, bgn_de: str, end_de: str = None, fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'), separate: bool = False, report_tp: str = 'annual', lang: str = 'ko', separator: bool = True) -> FinancialStatement: """ 재무제표 검색 Parameters ---------- corp_code: str 공시대상회사의 고유번호(8자리) bgn_de: str 검색 시작일자(YYYYMMDD) end_de: str, optional 검색 종료일자(YYYYMMDD) fs_tp: tuple of str, optional 'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표 separate: bool, optional 개별재무제표 여부 report_tp: str, optional 'annual' 1년, 'half' 반기, 'quarter' 분기 lang: str, optional 'ko' 한글, 'en' 영문 separator: bool, optional 1000단위 구분자 표시 여부 Returns ------- FinancialStatement 제무제표 검색 결과 """ if is_notebook(): from tqdm import tqdm_notebook as tqdm else: from tqdm import tqdm # 재무제표 검색 결과 statements = None reports = [] try: # 사업보고서 검색(최종보고서) reports = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty='A001', page_count=100, last_reprt_at='Y') except NoDataReceived: # 감사보고서 검색 if separate: pblntf_detail_ty = 'F001' else: pblntf_detail_ty = 'F002' reports = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty=pblntf_detail_ty, page_count=100, last_reprt_at='Y') finally: if len(reports) == 0: raise RuntimeError('Could not find an annual report') next_index = 0 for idx, _ in enumerate(reports): # 가장 최근 보고서의 경우 XBRL 파일을 이용하여 재무제표 검색 latest_report = reports[idx] latest_xbrl = latest_report.xbrl # XBRL 파일이 존재할 때 if latest_xbrl is not None: if separate is False and not latest_xbrl.exist_consolidated(): raise NotFoundConsolidated( 'Could not find consolidated financial statements') # XBRL 정보를 이용하여 재무제표 정보 초기화 analyzed_results = analyze_xbrl(latest_report, fs_tp=fs_tp, separate=separate, lang=lang, show_abstract=False, show_class=True, show_depth=10, show_concept=True, separator=separator) statements = copy.deepcopy(analyzed_results) else: statements = analyze_html(latest_report, fs_tp=fs_tp, separate=separate, lang=lang) # Report 에 재무제표 정보 없이 수정 사항만 기록된 경우 다음 리포트 검색 if statements is not None: next_index = idx + 1 break if separate is False and all( [statements[tp] is None for tp in statements]): raise NotFoundConsolidated( 'Could not find consolidated financial statements') label_df = None for report in tqdm(reports[next_index:], desc='Annual reports', unit='report'): statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang) if str_compare(report_tp, 'half') or str_compare(report_tp, 'quarter'): half = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty='A002', page_count=100, last_reprt_at='Y') for report in tqdm(half, desc='Semiannual reports', unit='report'): statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang) if str_compare(report_tp, 'quarter'): quarter = search_filings(corp_code=corp_code, bgn_de=bgn_de, end_de=end_de, pblntf_detail_ty='A003', page_count=100, last_reprt_at='Y') for report in tqdm(quarter, desc='Quarterly report', unit='report'): statements, label_df = merge_fs(statements, label_df, report, fs_tp=fs_tp, separate=separate, lang=lang) statements = drop_empty_columns(statements) label_df = drop_empty_columns(label_df) statements = sorting_columns(statements) label_df = sorting_columns(label_df) info = { 'corp_code': corp_code, 'bgn_de': bgn_de, 'end_de': end_de, 'separate': separate, 'report_tp': report_tp, 'lang': lang, 'separator': separator } return FinancialStatement(statements, label_df, info)