Exemple #1
0
    def run_test(self):
        fs = self.corp.extract_fs(bgn_de=self.bgn_de,
                                  separate=self.separate,
                                  report_tp=self.report_tp)
        for test in self.test_set:
            tp = test['fs_tp']
            date = test['date']
            column = test['column']
            item = test['item']
            expected = test['expected']

            df = fs[tp]
            date_column = find_all_columns(df=df, query=date)[0]
            label_column = find_all_columns(df=df, query=column)[0]

            actual = None

            for idx in range(len(df)):
                text = df[label_column].iloc[idx].replace(' ', '')
                if str_compare(text, item):
                    actual = df[date_column].iloc[idx]

            if actual != expected:
                pytest.fail("Test failed: corp_code='{}', ".format(
                    self.corp.corp_code) +
                            "corp_name='{}', fs_tp='{}', ".format(
                                self.corp.corp_name, tp) +
                            "start_dt='{}', report_tp='{}', ".format(
                                self.bgn_de, fs.info['report_tp']) +
                            "date='{}', column='{}',".format(date, column) +
                            "item='{}', actual='{}', expected='{}'".format(
                                item, actual, expected))
    def _extract_dataset(self, reports: List[Report]):
        """
        Report에 포함된 XBRL 파일에서 Concept_id 와 Label 값 추출

        Parameters
        ----------
        reports: list of Report
            추출할 Report 리스트
        """
        if is_notebook():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        dataset = []
        for report in tqdm(reports,
                           desc='Extracting concept_id and label_ko',
                           unit='report'):
            df_fs = analyze_xbrl(report)
            if df_fs is None:
                continue
            for tp in df_fs:
                df = df_fs[tp]
                if df is not None:
                    concept_column = find_all_columns(df, 'concept_id')[0]
                    label_ko_column = find_all_columns(df, 'label_ko')[0]
                    for idx in range(len(df)):
                        concept_id = df[concept_column].iloc[idx]
                        label_ko = df[label_ko_column].iloc[idx]
                        if concept_id and label_ko:
                            try:
                                label = self.extract_nouns(label_ko)
                                dataset.append((concept_id, label))
                            except BaseException:
                                continue

        self._dataset = dataset
def compare_df_and_ndf_cnn(
        column: Tuple[Union[str, Tuple[str]]], df: DataFrame, ndf: DataFrame,
        ldf: DataFrame, ndata: List[Union[float, str, None]],
        nlabels: List[str]) -> Tuple[List[Union[float, str]], List[str]]:
    """
    Convolutional neural network 를 시용하여 데이터를 검색하는 함수

    Parameters
    ----------
    column: tuple
        추가할 column Name
    df: dict of { str: DataFrame }
        데이터를 추가할 DataFrame, 추출된 결과값이 누적된 DataFrame
    ndf: dict of { str: DataFrame }
        데이터를 검색할 DataFrame, Report에서 추출한 새로운 DataFrame
    ndata: list of float
        추가할 column의 데이터 리스트
    nlabels: list of str
        추가할 column의 label 리스트

    Returns
    -------
    tuple of list
        추가할 column의 데이터 리스트, 추가할 column의 label 리스트
    """
    # CNN 처리시 사용
    concept_none_data = {}
    df_label_column = find_all_columns(df, 'label_ko')[0]

    is_concept = True
    df_concept_column = find_all_columns(df, 'concept_id')
    if len(df_concept_column) == 0:
        is_concept = False
    else:
        df_concept_column = df_concept_column[0]

    ndf_label_column = find_all_columns(ndf, 'label_ko')[0]

    for idx, value in enumerate(ndata):
        if isinstance(value, str):
            pass
        elif value is None:
            pass
        elif math.isnan(value):
            pass
        else:
            continue

        label = df[df_label_column].iloc[idx]
        label = re.sub(r'\s+', '', label)
        label = extract_account_title(label)

        if is_concept:
            concept_id = df[df_concept_column].iloc[idx]
        else:
            concept_id = guess_concept_id(label)

        if concept_id is not None:
            concept_none_data[concept_id] = idx

    matched = []
    used = []
    for idx in range(len(ndf)):
        if idx in matched:
            continue
        label = extract_account_title(ndf[ndf_label_column].iloc[idx])
        concept_id = guess_concept_id(label)
        index = concept_none_data.get(concept_id)
        if index is not None and index not in used:
            value = ndf[column].iloc[idx]
            if isinstance(value, str):
                pass
            else:
                used.append(index)
                matched.append(idx)
                ndata[index] = value
                nlabels[index] = label

    return ndata, nlabels
Exemple #4
0
def test_fs_show_depth(fs_report):
    df = fs_report.show('bs', show_depth=1)
    columns = find_all_columns(df, 'class')
    actual = len(columns)
    expected = 2
    assert actual == expected
Exemple #5
0
def test_fs_concept_false(fs_report):
    df = fs_report.show('bs', show_concept=False)
    columns = find_all_columns(df, 'concept')
    actual = len(columns)
    expected = 0
    assert actual == expected
Exemple #6
0
    def show(self,
             tp,
             show_class: bool = True,
             show_depth: int = 10,
             show_concept: bool = True) -> DataFrame:
        """
        재무제표 정보를 표시해주는 Method

        Parameters
        ----------
        tp: str
            표시할 재무제표 타입: 'fs' 재무상태표, 'is' 손익계산서, 'ci' 포괄손익계산서, 'cf' 현금흐름표
        show_class: bool
            class 표시 여부
        show_depth: bool
            표시할 class의 깊이
        show_concept: bool
            concept_id 표시 여부

        Returns
        -------
        DataFrame
            재무제표
        """
        from dart_fss.fs.extract import find_all_columns

        df = self._statements[tp]
        if df is None:
            return df
        class_columns = find_all_columns(df, 'class')

        if show_class is False:
            ncolumns = []
            columns = df.columns.tolist()
            for column in columns:
                if column not in class_columns:
                    ncolumns.append(column)
            df = df[ncolumns]
        else:
            drop_rows = []
            columns = df.columns.tolist()
            cdf = df[class_columns]
            for idx in range(len(cdf)):
                for class_idx, item in enumerate(cdf.iloc[idx]):
                    if class_idx > show_depth and item is not None:
                        drop_rows.append(idx)
            ncolumns = []
            for column in columns:
                if column not in class_columns[show_depth + 1:]:
                    ncolumns.append(column)

            df = df[ncolumns].drop(drop_rows)

        if show_concept is False:
            concept_colmuns = find_all_columns(df, 'concept_id')
            if len(concept_colmuns) == 1:
                ncolumns = []
                columns = df.columns.tolist()
                for column in columns:
                    if column not in concept_colmuns:
                        ncolumns.append(column)
                df = df[ncolumns]
        return df
def test_xbrl_get_author_information(samsung_xbrl):
    author = samsung_xbrl.get_author_information()
    column = find_all_columns(author, '공시담당자')[0]
    actual = author[column][3]
    expected = '031-277-7227'
    assert actual == expected