Python Storage.parse Examples

Programming Language: Python

Namespace/Package Name: coarij.storage

Class/Type: Storage

Method/Function: parse

Examples at hotexamples.com: 2

Python Storage.parse - 2 examples found. These are the top rated real world Python examples of coarij.storage.Storage.parse extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Storage(7)

download(4)

download_ledger(2)

extract(2)

parse(2)

tokenize(2)

get_ledger(1)

Example #1

Show file

    def test_parse(self):
        storage = Storage(self.ROOT)
        path = self._download(kind="F")
        path = storage.parse("company.history")
        self.assertTrue(path.exists())
        self.assertTrue(path.joinpath("2018").exists())
        self.assertTrue(path.joinpath("2018/documents.csv").exists())
        self.assertGreater(
            len(list(path.joinpath("2018/docs").glob("*company_history.txt"))),
            0)

        path = storage.parse("business.risks", sec_code="1376")
        with path.joinpath("2018/documents.csv").open(encoding="utf-8") as f:
            self.assertEquals(len(f.readlines()), 2)
        self.assertEquals(
            len(list(path.joinpath("2018/docs").glob("*business_risks.txt"))),
            1)

Example #2

Show file

class CoARiJ(object):
    """
    Data management tool for CoARiJ
     dataset.
    """
    def __init__(self):
        self._storage = Storage()

    def download(self, directory="", kind="F", year="", force=False):
        """Download the {kind} {year} dataset to {directory}.

        Args:
            directory (str): Downloaded dataset to specified directory.
            kind (str): 'F': raw file datadata, 'E': text extracted data.
            year (str): Financial year of dataset.
            force (bool): When True, overwrite data if exist.

        Returns:
            str: Path to downloaded directory

        """
        return self._storage.download(directory, kind, year, force)

    def parse(self,
              aspect_element,
              source_directory="",
              target_directory="",
              year="",
              edinet_code="",
              sec_code="",
              jcn="",
              normalized=True):
        """
        Parse {aspect_to_element} from files in {source_directory}{year} and
        save it in {target_directory}{year} as txt/html file.

        Args:
            aspect_element (str): Target aspect.element (ex: company.history).
            source_directory (str): Source directory includes XBRL files.
            target_directory (str): Target directory that txt/htmls are saved.
            year (str): Target financial year.
            edinet_code (str): EDINET code to specify compan.
            sec_code (str): SEC code to specify compan.
            jcn (str): Target JCN code to specify compan.
            normalized: (bool): True: extract text, False: save raw xml(html).

        Returns:
            str: Path to parsed files directory

        """
        return self._storage.parse(aspect_element, source_directory,
                                   target_directory, year, edinet_code,
                                   sec_code, jcn, normalized)

    def tokenize(self,
                 tokenizer="janome",
                 source_directory="",
                 target_directory="",
                 year="",
                 edinet_code="",
                 sec_code="",
                 jcn="",
                 aspect_element="",
                 mode="",
                 dictionary="",
                 dictionary_type=""):
        """
        Tokenize by {tokenizer} from files in {source_directory}{year} and
        save it in {target_directory}{year} as txt/html file.

        Args:
            tokenizer (str): Japanese tokenizer ('janome' or 'sudachi').
            source_directory (str): Source directory includes XBRL files.
            target_directory (str): Target directory that txt/htmls are saved.
            year (str): Target financial year.
            edinet_code (str): EDINET code to specify compan.
            sec_code (str): SEC code to specify compan.
            jcn (str): Target JCN code to specify compan.
            aspect_element (str): Target aspect.element (ex: company.history).
            mode: (str): Sudachi tokenizer mode.
            dictionary: (str): Dictionary path for Janome.
            dictionary_type: (str): Dictionary type for Janome.

        Returns:
            str: Path to parsed files directory

        """
        return self._storage.tokenize(tokenizer, source_directory,
                                      target_directory, year, edinet_code,
                                      sec_code, jcn, aspect_element, mode,
                                      dictionary, dictionary_type)