Ejemplo n.º 1
0
    def test_cache_file(self):
        """
        Unit test for CacheHelper.cache_file
        :return:
        """
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        delay: int = 5000
        cache: HttpCache = HttpCache(cache_dir, delay)

        test_url: str = "https://www.w3schools.com/xml/note.xml"
        expected_path: str = cache_dir + "www.w3schools.com/xml/note.xml"

        # if the testing file already exists delete if first
        if os.path.isfile(expected_path):
            os.remove(expected_path)

        # on the first execution the file will be downloaded from the internet
        time_stamp: float = time.time()
        self.assertEqual(cache.cache_file(test_url), expected_path)
        self.assertGreaterEqual(time.time() - time_stamp, delay / 1000)

        # on the second execution the file path will be returned
        time_stamp = time.time()
        self.assertEqual(cache.cache_file(test_url), expected_path)
        self.assertLess(time.time() - time_stamp, delay / 1000)

        # test if the file was downloaded
        self.assertTrue(os.path.isfile(expected_path))
        # delete the file
        self.assertTrue(cache.purge_file(test_url))
        # test if the file was deleted
        self.assertFalse(os.path.isfile(expected_path))
Ejemplo n.º 2
0
    def test_parse_taxonomy(self):
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        cache: HttpCache = HttpCache(cache_dir)
        print(f"Saving to {cache_dir}")
        """ Parse ESEF taxonomy and check if lei was also imported 

        # entry point for ESEF core schema
        entry_point_url: str = 'https://www.esma.europa.eu/taxonomy/2019-03-27/esef_cor.xsd'

        tax: TaxonomySchema = parse_taxonomy(cache, entry_point_url)
        # test if the lei taxonomy was also parsed (the lei taxonomy is imported by ESEF)
        lei_tax: TaxonomySchema = tax.get_taxonomy('http://www.xbrl.org/taxonomy/int/lei/2018-11-01')
        self.assertTrue(lei_tax)
        """
        """ Parse extending taxonomy of Apple Inc. and check if all us-gaap concepts where parsed """
        extension_schema_url: str = 'https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.xsd'

        tax: TaxonomySchema = parse_taxonomy(cache, extension_schema_url)
        print(tax)
        lei_tax: TaxonomySchema = tax.get_taxonomy(
            'http://fasb.org/us-gaap/2020-01-31')
        self.assertTrue(lei_tax)
        # us-gaap 2020 has 17281 concepts
        self.assertEqual(len(lei_tax.concepts), 17281)
Ejemplo n.º 3
0
 def test_parse_ixbrl_document(self):
     """ Integration test for instance.parse_ixbrl_instance() """
     logging.basicConfig(stream=sys.stdout, level=logging.INFO)
     cache_dir: str = os.path.abspath('./../cache/') + '/'
     cache: HttpCache = HttpCache(cache_dir)
     """ Integration test for instance.parse_ixbrl_instance() """
     instance_doc_url: str = 'https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm'
     inst: XbrlInstance = parse_ixbrl_url(instance_doc_url, cache)
     print(inst)
     self.assertEqual(len(inst.facts), 1334)
Ejemplo n.º 4
0
def parse_ixbrl_url(instance_url: str, cache: HttpCache) -> XbrlInstance:
    """
    Parses a inline XBRL (iXBRL) instance file.
    :param cache: HttpCache instance
    :param instance_url: url to the instance file(on the internet)
    This function will check, if the instance file is already in the cache and load it from there based on the
    instance_url.
    For EDGAR submissions: Before calling this method; extract the enclosure and copy the files to the cache.
        i.e. Use CacheHelper.extract_edgar_enclosure()
    :return:
    """
    instance_path: str = cache.cache_file(instance_url)
    return parse_ixbrl(instance_path, cache, instance_url)
Ejemplo n.º 5
0
    def test_parse_linkbase(self):
        """
        Unit test for linkbase.parse_linkbase()
        """
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        linkbase_url: str = 'https://www.esma.europa.eu/taxonomy/2019-03-27/esef_cor-lab-de.xml'
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        cache: HttpCache = HttpCache(cache_dir)

        linkbase: Linkbase = parse_linkbase(cache, linkbase_url,
                                            LinkbaseType.LABEL)
        print(linkbase)
        # This linkbase has 5028 locators
        self.assertEqual(len(linkbase.extended_links[0].root_locators), 5028)
        # Todo: Function for getting all labels for a given concept id would be nice..
        # check the labels for one sample concept
        for locator in linkbase.extended_links[0].root_locators:
            if locator.concept_id != 'ifrs-full_Assets': continue
            label: str = locator.children[0].labels[0].text
            self.assertEqual(label, 'Vermögenswerte')
Ejemplo n.º 6
0
def parse_taxonomy(cache: HttpCache, schema_url: str) -> TaxonomySchema:
    """
    Parses a taxonomy schema file.
    :param cache: HttpCache instance
    :param schema_url: url to the schema (on the internet)
    :return:
    """
    # Get the local absolute path to the schema file (and download it if it is not yet cached)
    try:
        schema_path: str = cache.cache_file(schema_url)
    except FileNotFoundError:
        raise TaxonomyNotFound(
            f"Could not find schema document from {schema_url}")

    root: ET.Element = ET.parse(schema_path).getroot()
    # get the target namespace of the taxonomy
    target_ns = root.attrib['targetNamespace']
    taxonomy: TaxonomySchema = TaxonomySchema(schema_url, target_ns)

    import_elements: [ET.Element] = root.findall('xsd:import', NAME_SPACES)

    for import_element in import_elements:
        import_url = import_element.attrib['schemaLocation']
        # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"!!
        if not import_url.startswith('http'):
            import_url = resolve_uri(schema_url, import_url)
        taxonomy.imports.append(parse_taxonomy(cache, import_url))

    role_type_elements: [ET.Element] = root.findall(
        'xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
    # parse ELR's
    for elr in role_type_elements:
        elr_definition = elr.find(LINK_NS + 'definition')
        if elr_definition is None or elr_definition.text is None: continue
        taxonomy.link_roles.append(
            ExtendedLinkRole(elr.attrib['id'], elr.attrib['roleURI'],
                             elr_definition.text.strip()))

    # find all elements that are defined in the schema
    for element in root.findall(XDS_NS + 'element'):
        # if a concept has no id, it can not be referenced by a linkbase, so just ignore it
        if 'id' not in element.attrib or 'name' not in element.attrib:
            continue
        el_id: str = element.attrib['id']
        el_name: str = element.attrib['name']

        concept = Concept(el_id, schema_url, el_name)
        concept.type = element.attrib[
            'type'] if 'type' in element.attrib else False
        concept.nillable = bool(element.attrib['nillable']
                                ) if 'nillable' in element.attrib else False
        concept.abstract = bool(element.attrib['abstract']
                                ) if 'abstract' in element.attrib else False
        type_attr_name = XBRLI_NS + 'periodType'
        concept.period_type = element.attrib[
            type_attr_name] if type_attr_name in element.attrib else None
        balance_attr_name = XBRLI_NS + 'balance'
        concept.balance = element.attrib[
            balance_attr_name] if balance_attr_name in element.attrib else None
        # remove the prefix from the substitutionGroup (i.e xbrli:item -> item)
        concept.substitution_group = element.attrib['substitutionGroup'].split(
            ':')[-1] if 'substitutionGroup' in element.attrib else None

        taxonomy.concepts[concept.xml_id] = concept
        taxonomy.name_id_map[concept.name] = concept.xml_id

    linkbase_ref_elements: [ET.Element] = root.findall(
        'xsd:annotation/xsd:appinfo/link:linkbaseRef', NAME_SPACES)
    for linkbase_ref in linkbase_ref_elements:
        linkbase_url = linkbase_ref.attrib[XLINK_NS + 'href']
        role = linkbase_ref.attrib[
            XLINK_NS +
            'role'] if XLINK_NS + 'role' in linkbase_ref.attrib else None
        linkbase_type = LinkbaseType.get_type_from_role(
            role) if role is not None else LinkbaseType.guess_linkbase_role(
                linkbase_url)
        # check if the linkbase url is relative
        if not linkbase_url.startswith('http'):
            linkbase_url = resolve_uri(schema_url, linkbase_url)

        linkbase: Linkbase = parse_linkbase(cache, linkbase_url, linkbase_type)
        # add the linkbase to the taxonomy
        if linkbase_type == LinkbaseType.DEFINITION:
            taxonomy.def_linkbases.append(linkbase)
        elif linkbase_type == LinkbaseType.CALCULATION:
            taxonomy.cal_linkbases.append(linkbase)
        elif linkbase_type == LinkbaseType.PRESENTATION:
            taxonomy.pre_linkbases.append(linkbase)
        elif linkbase_type == LinkbaseType.LABEL:
            taxonomy.lab_linkbases.append(linkbase)

    # loop over the ELR's of the schema and assign the extended links from the linkbases
    for elr in taxonomy.link_roles:
        for extended_def_links in [
                def_linkbase.extended_links
                for def_linkbase in taxonomy.def_linkbases
        ]:
            for extended_def_link in extended_def_links:
                if extended_def_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.definition_link = extended_def_link
                    break
        for extended_pre_links in [
                pre_linkbase.extended_links
                for pre_linkbase in taxonomy.pre_linkbases
        ]:
            for extended_pre_link in extended_pre_links:
                if extended_pre_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.presentation_link = extended_pre_link
                    break
        for extended_cal_links in [
                cal_linkbase.extended_links
                for cal_linkbase in taxonomy.cal_linkbases
        ]:
            for extended_cal_link in extended_cal_links:
                if extended_cal_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.calculation_link = extended_cal_link
                    break

    return taxonomy
Ejemplo n.º 7
0
def parse_linkbase(cache: HttpCache, linkbase_url: str,
                   linkbase_type: LinkbaseType) -> Linkbase:
    """
    Parses a linkbase and returns a Linkbase object containing all
    locators, arcs and links of the linkbase in a hierarchical order (a Tree)
    :param cache: HttpCache instance
    :param linkbase_url: url to the linkbase
    :param linkbase_type: Type of the linkbase
    :return:
    """
    linkbase_path: str = cache.cache_file(linkbase_url)
    root: ET.Element = ET.parse(linkbase_path).getroot()

    # store the role refs in a dictionary, with the role uri as key.
    # Role Refs are xlink's that connect the extended Links to the ELR defined in the schema
    role_refs: dict = {}
    for role_ref in root.findall(LINK_NS + 'roleRef'):
        role_refs[role_ref.attrib['roleURI']] = role_ref.attrib[XLINK_NS +
                                                                'href']

    # Loop over all definition/calculation/presentation/label links.
    # Each extended link contains the locators and the definition arc's
    extended_links: [ExtendedLink] = []

    # figure out if we want to search for definitionLink, calculationLink, presentationLink or labelLink
    # figure out for what type of arcs we are searching; definitionArc, calculationArc, presentationArc or labelArc
    extended_link_tag: str
    arc_type: str
    if linkbase_type == LinkbaseType.DEFINITION:
        extended_link_tag = "definitionLink"
        arc_type = "definitionArc"
    elif linkbase_type == LinkbaseType.CALCULATION:
        extended_link_tag = "calculationLink"
        arc_type = "calculationArc"
    elif linkbase_type == LinkbaseType.PRESENTATION:
        extended_link_tag = "presentationLink"
        arc_type = "presentationArc"
    else:
        extended_link_tag = "labelLink"
        arc_type = "labelArc"

    # loop over all extended links. Extended links can be: link:definitionLink, link:calculationLink e.t.c
    # Note that label linkbases only have one extended link
    for extended_link in root.findall(LINK_NS + extended_link_tag):
        extended_link_role: str = extended_link.attrib[XLINK_NS + 'role']
        # find all locators (link:loc) and arcs (i.e link:definitionArc or link:calculationArc)
        locators = extended_link.findall(LINK_NS + 'loc')
        arc_elements = extended_link.findall(LINK_NS + arc_type)

        # store the locators in a dictionary. The label attribute is the key. This way we can access them in O(1)
        locator_map = {}
        for loc in locators:
            loc_label: str = loc.attrib[XLINK_NS + 'label']
            # check if the locator href is absolute
            locator_href = loc.attrib[XLINK_NS + 'href']
            if not locator_href.startswith('http'):
                # resolve the path
                locator_href = resolve_uri(linkbase_url, locator_href)
            locator_map[loc_label] = Locator(locator_href, loc_label)

        # Performance: extract the labels in advance. The label name (xlink:label) is the key and the value is
        # an array of all labels that have this name. This can be multiple labels (label, terseLabel, documentation...)
        label_map = {}
        if linkbase_type == LinkbaseType.LABEL:
            for label_element in extended_link.findall(LINK_NS + 'label'):
                # if the label is empty, just ignore it
                label_name: str = label_element.attrib[XLINK_NS + 'label']
                label_role: str = label_element.attrib[XLINK_NS + 'role']
                label_lang: str = label_element.attrib[XML_NS + 'lang']
                label_obj = Label(label_name, label_role, label_lang,
                                  label_element.text)
                if label_name in label_map:
                    label_map[label_name].append(label_obj)
                else:
                    label_map[label_name] = [label_obj]

        for arc_element in arc_elements:
            # if the use of the element referenced by the arc is prohibited, just ignore it
            if 'use' in arc_element.attrib and arc_element.attrib[
                    'use'] == 'prohibited':
                continue
            # extract the attributes if the arc. The arc always connects two locators through the from and to attributes
            # additionally it defines the relationship between these two locators (arcrole)
            arc_from: str = arc_element.attrib[XLINK_NS + 'from']
            arc_to: str = arc_element.attrib[XLINK_NS + 'to']
            arc_role: str = arc_element.attrib[XLINK_NS + 'arcrole']
            arc_order: int = arc_element.attrib[
                'order'] if 'order' in arc_element.attrib else None

            # the following attributes are linkbase specific, so we have to check if they exist!
            # Needed for (sometimes) definitionArc
            arc_closed: bool = bool(arc_element.attrib[XBRLDT_NS + "closed"]) \
                if (XBRLDT_NS + "weight") in arc_element.attrib else None
            arc_context_element: str = arc_element.attrib[XBRLDT_NS + "contextElement"] if \
                (XBRLDT_NS + "contextElement") in arc_element.attrib else None
            # Needed for calculationArc
            arc_weight: float = float(
                arc_element.attrib["weight"]
            ) if "weight" in arc_element.attrib else None
            # Needed for presentationArc
            arc_priority: int = int(
                arc_element.attrib["priority"]
            ) if "priority" in arc_element.attrib else None
            arc_preferred_label: str = arc_element.attrib[
                "preferredLabel"] if "preferredLabel" in arc_element.attrib else None

            # Create the arc object based on the current linkbase type
            arc_object: AbstractArcElement
            if linkbase_type == LinkbaseType.DEFINITION:
                arc_object = DefinitionArc(locator_map[arc_from],
                                           locator_map[arc_to], arc_role,
                                           arc_order, arc_closed,
                                           arc_context_element)
            elif linkbase_type == LinkbaseType.CALCULATION:
                arc_object = CalculationArc(locator_map[arc_from],
                                            locator_map[arc_to], arc_order,
                                            arc_weight)
            elif linkbase_type == LinkbaseType.PRESENTATION:
                arc_object = PresentationArc(locator_map[arc_from],
                                             locator_map[arc_to], arc_order,
                                             arc_priority, arc_preferred_label)
            else:
                # find all labels that are referenced by this arc.
                # These where preprocessed previously, so we can just take them
                arc_object = LabelArc(locator_map[arc_from], arc_order,
                                      label_map[arc_to])

            # Build the hierarchy for the Locators.
            if linkbase_type != LinkbaseType.LABEL:
                # This does not work for label linkbase, since link:labelArcs only link to link:labels
                # and not to other locators!!
                locator_map[arc_to].parents.append(locator_map[arc_from])
            locator_map[arc_from].children.append(arc_object)

        # find the top elements of the three (all elements that have no parents)
        root_locators = []
        for locator in locator_map.values():
            if len(locator.parents) == 0:
                root_locators.append(locator)

        # only add the extended link to the linkbase if the link references a role
        # (some filers have empty links in which we are not interested:
        # <definitionLink xlink:type="extended" xlink:role="http://www.xbrl.org/2003/role/link"/>)
        if extended_link_role in role_refs:
            extended_links.append(
                ExtendedLink(extended_link_role, role_refs[extended_link_role],
                             root_locators))
        elif linkbase_type == LinkbaseType.LABEL:
            extended_links.append(
                ExtendedLink(extended_link_role, None, root_locators))
    return Linkbase(extended_links, linkbase_type)