Example #1
0
def get_xbrl_from_file(file_path: str) -> DartXbrl:
    """ XBRL 파일 로드 함수

    XBRL 파일을 로드하기 위한 함수로 로딩완료 후 DartXbrl 클래스를 반환한다

    Parameters
    ----------
    file_path: str
        XBRL 파일 경로

    Returns
    -------
    DartXbrl
        DartXbrl 클래스
    """
    # PyPI를 통해 설치된 Arelle 라이브러리 사용시 Linux에서 발생하는 오류 수정을 위한코드
    # Github 버전에서는 수정된 상태로 이후 삭제예정
    if sys.platform == 'win32':
        pass
    elif sys.platform == 'darwin':
        pass
    else:
        arelle_app_dir = os.path.join(os.path.expanduser("~/.config"),
                                      "arelle")
        if not os.path.exists(arelle_app_dir):
            os.makedirs(arelle_app_dir)
    model_xbrl = Cntlr.Cntlr().modelManager.load(file_path)
    filename = file_path.split('\\')[-1]
    return DartXbrl(filename, model_xbrl)
Example #2
0
def main():
    if IS_EXTRACTED:
        pass
    else:
        edinet_zip_dir = os.path.join(EDINET_ROOT_DIR, "zip")
        extract_files_from_zip(edinet_zip_dir,
                               dest_dir_root=EDINET_ROOT_DIR,
                               unzip_members_regrep="|".join([
                                   f"XBRL/PublicDoc/.*\.{extension}"
                                   for extension in ["xbrl", "xsd", "xml"]
                               ]))
    # XBRLから情報取得
    xbrl_file_regrex = os.path.join(EDINET_ROOT_DIR, EDINET_XBRL_REGREX)
    xbrl_files = glob.glob(xbrl_file_regrex)
    list_dict_facts = []
    ctrl = Cntlr.Cntlr()
    model_manager = ModelManager.initialize(ctrl)
    for index, xbrl_file in enumerate(xbrl_files):
        print(xbrl_file, ":", index + 1, "/", len(xbrl_files))
        list_dict_facts_per_file = get_facts(model_manager, xbrl_file)
        if list_dict_facts_per_file is not None:
            list_dict_facts = list_dict_facts + list_dict_facts_per_file
    if list_dict_facts:
        df_yuho = pd.DataFrame(list_dict_facts)
        # Edinetコードリストの情報をマージ
        df_edinetcd_info = get_edinetcd_info(EDINETCDDLINFO_COLS)
        df_yuho = df_yuho.merge(df_edinetcd_info, on=EDINETCD_COL, how="left")
        df_yuho.to_csv(os.path.join(EDINET_ROOT_DIR, OUTPUT_FILE_NAME),
                       index=False,
                       encoding="cp932")
        print(f"{'-'*10} 情報抽出 完了 {'-'*10}")
    else:
        print("処理対象のデータはありませんでした。")
Example #3
0
def get_current_df(xbrl_path):
    arelle = Cntlr.Cntlr()
    arelle.webCache.cacheDir = '/mnt/disks/disk-1/arelle/cache/'
    arelle_xbrl = arelle.modelManager.load(xbrl_path)

    arelle_df = pd.DataFrame(
        data=[(fact, fact.value, fact.concept.qname, fact.context.hasSegment,
               fact.context.period.viewText(), fact.contextID,
               fact.context.isStartEndPeriod, fact.context.isInstantPeriod,
               fact.context.isForeverPeriod, fact.context.startDatetime,
               fact.context.endDatetime) for fact in arelle_xbrl.facts],
        columns=(
            'Fact',  #Fact; We leave the fact in the table in case we want more out of it
            'Value',  #Value, like 5,200,000 in "5.2m"
            'Account',  #Account, like "Cash" in "Cash of 5.2m"
            'Category',  #Category, like "for the parent company" in "Cash of 5.2m for the parent company"b
            'Period',
            'ContextID',
            'isStartEndPeriod',
            'isInstantPeriod',
            'isForeverPeriod',
            'startDateTime',
            'endDateTime'))

    arelle_df = arelle_df[(
        arelle_df.Fact.map(lambda f: f.isNumeric and not f.isNil)
    )]  # & #Fact is Numeric (i.e. can be converted to a number)
    #                                                 arelle_df.Account.map(lambda a: a=='us-gaap:EarningsPerShareBasic') & #Just Cash; Account mentions "Cash", anywhere would give us too many values
    # arelle_df.Category.map(lambda c:not c))] # Could also use "~", which does boolean "not" on the entire column

    #     file_path = arelle.webCache.getfilename(xbrl_path)
    #     if os.path.exists(file_path):
    #         shutil.move(file_path,os.path.join('/disk-1/arelle/cache',os.path.basename(file_path)))

    return arelle_df  #current_df
Example #4
0
def main():
    # ArelleCntlrSample().run()
    # xbrl = Cntlr.Cntlr().modelManager.load('https://www.sec.gov/Archives/edgar/data/101984/000010198416000062/ueic-20151231.xml')
    xbrl = Cntlr.Cntlr().modelManager.load('./nke-20190531.xml')
    # ViewFileFactTable.viewFacts(xbrl, './ueic-20151231.csv')
    # modelDoc = ModelDocument.load(xbrl,'./ueic-20151231.xml')
    # mf = ModelInstanceObject.ModelFact()
    for fact in xbrl.undefinedFacts:
        print(fact)
Example #5
0
def main():
    if IS_EXTRACTED:
        pass
    else:
        edinet_zip_dir = os.path.join(EDINET_ROOT_DIR, "zip")
        extract_files_from_zip(edinet_zip_dir,
                               dest_dir_root=EDINET_ROOT_DIR,
                               unzip_members_regrep="|".join([
                                   f"XBRL/PublicDoc/.*\.{extension}"
                                   for extension in ["xbrl", "xsd", "xml"]
                               ]))
    # XBRLから情報取得
    xbrl_file_regrex = os.path.join(EDINET_ROOT_DIR, EDINET_XBRL_REGREX)
    xbrl_files = glob.glob(xbrl_file_regrex)
    ctrl = Cntlr.Cntlr()
    model_manager = ModelManager.initialize(ctrl)
    for index, xbrl_file in enumerate(xbrl_files):
        print(xbrl_file, ":", index + 1, "/", len(xbrl_files))
        export_facts(model_manager, xbrl_file)

    print(f"{'-'*10} XBRL出力 完了 {'-'*10}")
Example #6
0
def get_xbrl_from_file(file_path: str) -> DartXbrl:
    """ XBRL 파일 로드 함수

    XBRL 파일을 로드하기 위한 함수로 로딩완료 후 DartXbrl 클래스를 반환한다

    Parameters
    ----------
    file_path: str
        XBRL 파일 경로

    Returns
    -------
    DartXbrl
        DartXbrl 클래스
    """
    # PyPI를 통해 설치된 Arelle 라이브러리 사용시 발생하는 오류 수정을 위한코드
    from .spinner import Spinner
    spinner = Spinner('XBRL Loading')
    spinner.start()

    if sys.platform == 'win32':
        pass
    elif sys.platform == 'darwin':
        arelle_app_dir = os.path.join(
            os.path.expanduser('~/Library/Application Support'), 'Arelle')
        if not os.path.exists(arelle_app_dir):
            os.makedirs(arelle_app_dir)
    else:
        arelle_app_dir = os.path.join(os.path.expanduser("~/.config"),
                                      "arelle")
        if not os.path.exists(arelle_app_dir):
            os.makedirs(arelle_app_dir)
    model_xbrl = Cntlr.Cntlr().modelManager.load(file_path)
    filename = file_path.split('\\')[-1]
    xbrl = DartXbrl(filename, model_xbrl)
    spinner.stop()
    return xbrl
Example #7
0
def load_edinet():

    edinet_list = []

    # xbrl 読み込み
    xbrl_files = glob.glob(const.FOLDER_XBRL + '*.xbrl')

    # EdinetcodeDlInfo.csv 読み込み
    edinetcodedata = pd.read_csv(const.FILE_EDINETCODE,
                                 skiprows=1,
                                 encoding='cp932')

    for index, xbrl_file in enumerate(xbrl_files):
        ctrl = Cntlr.Cntlr()
        model_manager = ModelManager.initialize(ctrl)
        model_xbrl = model_manager.load(xbrl_file)

        print(xbrl_file, ":", index + 1, "/", len(xbrl_files))

        # XBRLをDataFrameにセット
        factData = pd.DataFrame(data=[
            (fact.concept.qname.localName, fact.value, fact.isNumeric,
             fact.contextID, fact.decimals) for fact in model_xbrl.facts
        ],
                                columns=[
                                    'element_id', 'value', 'isNumeric',
                                    'contextID', 'decimals'
                                ])

        # 取得したい要素
        datalist = []
        datalist_ren = []
        flg_ren = False
        for key, value in dict_cols.items():
            contextRef = value['contextRef']
            element_ids = value['element_id']
            if len(element_ids) > 0:
                for element_id in element_ids:
                    # data1 = factData[factData['element_id'].str.contains(element_id)]
                    data1 = []
                    data2 = []
                    data3 = []
                    data1 = factData[factData['element_id'] == element_id]
                    if len(data1) == 1: break

                    data2 = data1[data1['contextID'] == (
                        contextRef + '_NonConsolidatedMember')]
                    data3 = data1[data1['contextID'] == contextRef]
                    if len(data2) >= 1 or len(data3) >= 1: break

                data, data_ren, unitRef = '', '', ''
                if len(data1) == 1:
                    data = data1['value'].values[0]
                    data_ren = data
                    print(key + ' : ' + data)
                if len(data2) >= 1 and len(data3) == 0:
                    data = data2['value'].values[0]
                    data_ren = data
                    print(key + ' : ' + data)
                if len(data3) >= 1:
                    data = ''
                    data_ren = data3['value'].values[0]
                    print(key + '(連結) : ' + data_ren)
                if len(data1) == 0:
                    print(key + ' : ' + 'NoData ')
                    data = ''

                datalist.append(data)
                if flg_ren:
                    datalist_ren.append(data_ren)

                if key == 'EDINETコード':
                    company = edinetcodedata[edinetcodedata['EDINETコード'] ==
                                             data]
                    if len(company) == 1:
                        print('提出者業種 : ' + company['提出者業種'].values[0])
                        data = company['提出者業種'].values[0]

                        flg_ren = False
                        if company['連結の有無'].values[0] == '有':
                            print('連結の有無 : ' + company['連結の有無'].values[0])
                            flg_ren = True
                    else:
                        print('提出者業種 : ' + 'NoData ')
                        data = ''
                    datalist.append(data)
                    datalist.append('連結' if flg_ren else '単独')

                    if flg_ren:
                        # 連結用にデータをコピー
                        datalist_ren = datalist[:]

        if flg_ren:
            edinet_list.append(datalist_ren)
        else:
            edinet_list.append(datalist)

    return edinet_list