def test_get_xbrl(self): _dir = os.path.dirname(__file__) client = DocumentClient() # lang="en" does not exist? file_path = client.get_xbrl("S100FGR9", save_dir=_dir) self.assertTrue(os.path.exists(file_path)) os.remove(file_path)
def setUpClass(cls): _dir = os.path.join(os.path.dirname(__file__), "../../data") client = DocumentClient() file_path = client.get_xbrl("S100G2KL", save_dir=_dir, expand_level="dir") cls.reader = Reader(file_path)
def test_get_xbrl_zip_dir(self): _dir = os.path.dirname(__file__) client = DocumentClient() # lang="en" does not exist? file_path = client.get_xbrl("S100FGSC", save_dir=_dir, expand_level=None) self.assertTrue(os.path.exists(file_path)) self.assertTrue(str(file_path).endswith(".zip")) os.remove(file_path)
def test_get_xbrl_zip(self): _dir = os.path.dirname(__file__) client = DocumentClient() # lang="en" does not exist? file_path = client.get_xbrl("S100FGR9", save_dir=_dir, expand_level="dir") self.assertTrue(os.path.exists(file_path)) self.assertTrue(os.path.isdir(file_path)) shutil.rmtree(file_path)
def check(): _dir = os.path.join(os.path.dirname(__file__), "./data") if os.path.exists(_dir): shutil.rmtree(_dir) else: os.mkdir(_dir) client = DocumentClient() file_path = client.get_xbrl("S100G2KL", save_dir=_dir, expand_level="dir") reader = Reader(file_path) print("Start Calculation") bs = reader.extract(Finance).bs() bs.to_csv("bs.csv", index=False, encoding="shift_jis") shutil.rmtree(_dir)
def test_xbrl_dir(self): _dir = os.path.join(os.path.dirname(__file__), "../data") client = DocumentClient() file_path = client.get_xbrl("S100FGR9", save_dir=_dir, expand_level="dir") xbrl_dir = Directory(file_path) self.assertGreater(len(xbrl_dir.xsd.find_all("element")), 0) self.assertGreater(len(xbrl_dir.cal.find_all("calculationLink")), 0) self.assertGreater(len(xbrl_dir.def_.find_all("definitionArc")), 0) self.assertGreater(len(xbrl_dir.lab.find_all("labelLink")), 0) self.assertGreater(len(xbrl_dir.lab_en.find_all("labelLink")), 0) self.assertGreater(len(xbrl_dir.pre.find_all("presentationLink")), 0) self.assertTrue(xbrl_dir.man.find("manifest")) xbrl_dir.delete() self.assertFalse(os.path.exists(file_path))
def get_xbrl(self, save_dir: str = "", file_name: str = "", lang: str = "ja",): from xbrr.edinet.client.document_client import DocumentClient client = DocumentClient() return client.get_xbrl(self.document_id, save_dir, file_name, lang)
def collect(self, directory="", source_directory="", year="", edinet_code="", sec_code="", jcn="", file_type="xbrl"): """ Collect the documents based on ledger file. """ if not source_directory: s_dir = Path.cwd().joinpath(self.storage._default_raw_data) else: s_dir = Path(source_directory) if not s_dir.is_absolute(): s_dir = Path.cwd().joinpath(source_directory) target = self.data filters = { "fiscal_year": str(year), "edinet_code": edinet_code, "sec_code": str(sec_code), "jcn": str(jcn) } conditions = [] for k in filters: if filters[k]: target = target[target[k] == filters[k]] conditions.append(filters[k]) if len(conditions) == 0: raise Exception("You have to specify at least one condition.") if not directory: t_dir = Path.cwd().joinpath(self.storage._default_raw_data) else: t_dir = Path(directory) if not t_dir.is_absolute(): t_dir = Path.cwd().joinpath(directory) t_dir = t_dir.joinpath("_".join(conditions)) if not t_dir.exists(): t_dir.mkdir(parents=True, exist_ok=True) for i, r in tqdm(target.iterrows(), total=target.shape[0]): fiscal_year = r["fiscal_year"] doc_id = r["doc_id"] y_s_dir = s_dir.joinpath(fiscal_year).joinpath("docs") y_s_path = y_s_dir.joinpath(f"{doc_id}.xbrl") if y_s_path.exists(): shutil.copy(str(y_s_path), str(t_dir.joinpath(f"{doc_id}.xbrl"))) else: client = DocumentClient() file_type_matched = True try: if file_type == "pdf": file_path = client.get_pdf(doc_id, save_dir=t_dir) elif file_type == "xbrl": file_path = client.get_xbrl(doc_id, save_dir=t_dir, expand_level="file") elif file_type == "zip": file_path = client.get_xbrl(doc_id, save_dir=t_dir, expand_level="dir") elif file_type == "csr": if isinstance(r['csr_path'], str) and r['csr_path']: print(r['csr_path']) file_name = os.path.basename(r['csr_path']) url = f"https://s3-ap-northeast-1.amazonaws.com/chakki.esg.csr.jp/{r['csr_path']}" self.storage._download(url, t_dir.joinpath(file_name)) else: file_type_matched = False except Exception as e: print("Can not download {}.".format(doc_id)) if not file_type_matched: raise Exception(f"File type {file_type} is not supported") time.sleep(0.1) # to save api host return target