Ejemplo n.º 1
    def test_cache_file(self):
        Unit test for CacheHelper.cache_file
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        delay: int = 5000
        cache: HttpCache = HttpCache(cache_dir, delay)

        test_url: str = "https://www.w3schools.com/xml/note.xml"
        expected_path: str = cache_dir + "www.w3schools.com/xml/note.xml"

        # if the testing file already exists delete if first
        if os.path.isfile(expected_path):

        # on the first execution the file will be downloaded from the internet
        time_stamp: float = time.time()
        self.assertEqual(cache.cache_file(test_url), expected_path)
        self.assertGreaterEqual(time.time() - time_stamp, delay / 1000)

        # on the second execution the file path will be returned
        time_stamp = time.time()
        self.assertEqual(cache.cache_file(test_url), expected_path)
        self.assertLess(time.time() - time_stamp, delay / 1000)

        # test if the file was downloaded
        # delete the file
        # test if the file was deleted
Ejemplo n.º 2
    def test_parse_taxonomy(self):
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        cache: HttpCache = HttpCache(cache_dir)
        print(f"Saving to {cache_dir}")
        """ Parse ESEF taxonomy and check if lei was also imported 

        # entry point for ESEF core schema
        entry_point_url: str = 'https://www.esma.europa.eu/taxonomy/2019-03-27/esef_cor.xsd'

        tax: TaxonomySchema = parse_taxonomy(cache, entry_point_url)
        # test if the lei taxonomy was also parsed (the lei taxonomy is imported by ESEF)
        lei_tax: TaxonomySchema = tax.get_taxonomy('http://www.xbrl.org/taxonomy/int/lei/2018-11-01')
        """ Parse extending taxonomy of Apple Inc. and check if all us-gaap concepts where parsed """
        extension_schema_url: str = 'https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.xsd'

        tax: TaxonomySchema = parse_taxonomy(cache, extension_schema_url)
        lei_tax: TaxonomySchema = tax.get_taxonomy(
        # us-gaap 2020 has 17281 concepts
        self.assertEqual(len(lei_tax.concepts), 17281)
Ejemplo n.º 3
 def test_parse_ixbrl_document(self):
     """ Integration test for instance.parse_ixbrl_instance() """
     logging.basicConfig(stream=sys.stdout, level=logging.INFO)
     cache_dir: str = os.path.abspath('./../cache/') + '/'
     cache: HttpCache = HttpCache(cache_dir)
     """ Integration test for instance.parse_ixbrl_instance() """
     instance_doc_url: str = 'https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm'
     inst: XbrlInstance = parse_ixbrl_url(instance_doc_url, cache)
     self.assertEqual(len(inst.facts), 1334)
Ejemplo n.º 4
def parse_ixbrl_url(instance_url: str, cache: HttpCache) -> XbrlInstance:
    Parses a inline XBRL (iXBRL) instance file.
    :param cache: HttpCache instance
    :param instance_url: url to the instance file(on the internet)
    This function will check, if the instance file is already in the cache and load it from there based on the
    For EDGAR submissions: Before calling this method; extract the enclosure and copy the files to the cache.
        i.e. Use CacheHelper.extract_edgar_enclosure()
    instance_path: str = cache.cache_file(instance_url)
    return parse_ixbrl(instance_path, cache, instance_url)
Ejemplo n.º 5
    def test_parse_linkbase(self):
        Unit test for linkbase.parse_linkbase()
        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
        linkbase_url: str = 'https://www.esma.europa.eu/taxonomy/2019-03-27/esef_cor-lab-de.xml'
        cache_dir: str = os.path.abspath('./../cache/') + '/'
        cache: HttpCache = HttpCache(cache_dir)

        linkbase: Linkbase = parse_linkbase(cache, linkbase_url,
        # This linkbase has 5028 locators
        self.assertEqual(len(linkbase.extended_links[0].root_locators), 5028)
        # Todo: Function for getting all labels for a given concept id would be nice..
        # check the labels for one sample concept
        for locator in linkbase.extended_links[0].root_locators:
            if locator.concept_id != 'ifrs-full_Assets': continue
            label: str = locator.children[0].labels[0].text
            self.assertEqual(label, 'Vermögenswerte')
Ejemplo n.º 6
def parse_taxonomy(cache: HttpCache, schema_url: str) -> TaxonomySchema:
    Parses a taxonomy schema file.
    :param cache: HttpCache instance
    :param schema_url: url to the schema (on the internet)
    # Get the local absolute path to the schema file (and download it if it is not yet cached)
        schema_path: str = cache.cache_file(schema_url)
    except FileNotFoundError:
        raise TaxonomyNotFound(
            f"Could not find schema document from {schema_url}")

    root: ET.Element = ET.parse(schema_path).getroot()
    # get the target namespace of the taxonomy
    target_ns = root.attrib['targetNamespace']
    taxonomy: TaxonomySchema = TaxonomySchema(schema_url, target_ns)

    import_elements: [ET.Element] = root.findall('xsd:import', NAME_SPACES)

    for import_element in import_elements:
        import_url = import_element.attrib['schemaLocation']
        # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"!!
        if not import_url.startswith('http'):
            import_url = resolve_uri(schema_url, import_url)
        taxonomy.imports.append(parse_taxonomy(cache, import_url))

    role_type_elements: [ET.Element] = root.findall(
        'xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
    # parse ELR's
    for elr in role_type_elements:
        elr_definition = elr.find(LINK_NS + 'definition')
        if elr_definition is None or elr_definition.text is None: continue
            ExtendedLinkRole(elr.attrib['id'], elr.attrib['roleURI'],

    # find all elements that are defined in the schema
    for element in root.findall(XDS_NS + 'element'):
        # if a concept has no id, it can not be referenced by a linkbase, so just ignore it
        if 'id' not in element.attrib or 'name' not in element.attrib:
        el_id: str = element.attrib['id']
        el_name: str = element.attrib['name']

        concept = Concept(el_id, schema_url, el_name)
        concept.type = element.attrib[
            'type'] if 'type' in element.attrib else False
        concept.nillable = bool(element.attrib['nillable']
                                ) if 'nillable' in element.attrib else False
        concept.abstract = bool(element.attrib['abstract']
                                ) if 'abstract' in element.attrib else False
        type_attr_name = XBRLI_NS + 'periodType'
        concept.period_type = element.attrib[
            type_attr_name] if type_attr_name in element.attrib else None
        balance_attr_name = XBRLI_NS + 'balance'
        concept.balance = element.attrib[
            balance_attr_name] if balance_attr_name in element.attrib else None
        # remove the prefix from the substitutionGroup (i.e xbrli:item -> item)
        concept.substitution_group = element.attrib['substitutionGroup'].split(
            ':')[-1] if 'substitutionGroup' in element.attrib else None

        taxonomy.concepts[concept.xml_id] = concept
        taxonomy.name_id_map[concept.name] = concept.xml_id

    linkbase_ref_elements: [ET.Element] = root.findall(
        'xsd:annotation/xsd:appinfo/link:linkbaseRef', NAME_SPACES)
    for linkbase_ref in linkbase_ref_elements:
        linkbase_url = linkbase_ref.attrib[XLINK_NS + 'href']
        role = linkbase_ref.attrib[
            XLINK_NS +
            'role'] if XLINK_NS + 'role' in linkbase_ref.attrib else None
        linkbase_type = LinkbaseType.get_type_from_role(
            role) if role is not None else LinkbaseType.guess_linkbase_role(
        # check if the linkbase url is relative
        if not linkbase_url.startswith('http'):
            linkbase_url = resolve_uri(schema_url, linkbase_url)

        linkbase: Linkbase = parse_linkbase(cache, linkbase_url, linkbase_type)
        # add the linkbase to the taxonomy
        if linkbase_type == LinkbaseType.DEFINITION:
        elif linkbase_type == LinkbaseType.CALCULATION:
        elif linkbase_type == LinkbaseType.PRESENTATION:
        elif linkbase_type == LinkbaseType.LABEL:

    # loop over the ELR's of the schema and assign the extended links from the linkbases
    for elr in taxonomy.link_roles:
        for extended_def_links in [
                for def_linkbase in taxonomy.def_linkbases
            for extended_def_link in extended_def_links:
                if extended_def_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.definition_link = extended_def_link
        for extended_pre_links in [
                for pre_linkbase in taxonomy.pre_linkbases
            for extended_pre_link in extended_pre_links:
                if extended_pre_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.presentation_link = extended_pre_link
        for extended_cal_links in [
                for cal_linkbase in taxonomy.cal_linkbases
            for extended_cal_link in extended_cal_links:
                if extended_cal_link.elr_id.split('#')[1] == elr.xml_id:
                    elr.calculation_link = extended_cal_link

    return taxonomy
Ejemplo n.º 7
def parse_linkbase(cache: HttpCache, linkbase_url: str,
                   linkbase_type: LinkbaseType) -> Linkbase:
    Parses a linkbase and returns a Linkbase object containing all
    locators, arcs and links of the linkbase in a hierarchical order (a Tree)
    :param cache: HttpCache instance
    :param linkbase_url: url to the linkbase
    :param linkbase_type: Type of the linkbase
    linkbase_path: str = cache.cache_file(linkbase_url)
    root: ET.Element = ET.parse(linkbase_path).getroot()

    # store the role refs in a dictionary, with the role uri as key.
    # Role Refs are xlink's that connect the extended Links to the ELR defined in the schema
    role_refs: dict = {}
    for role_ref in root.findall(LINK_NS + 'roleRef'):
        role_refs[role_ref.attrib['roleURI']] = role_ref.attrib[XLINK_NS +

    # Loop over all definition/calculation/presentation/label links.
    # Each extended link contains the locators and the definition arc's
    extended_links: [ExtendedLink] = []

    # figure out if we want to search for definitionLink, calculationLink, presentationLink or labelLink
    # figure out for what type of arcs we are searching; definitionArc, calculationArc, presentationArc or labelArc
    extended_link_tag: str
    arc_type: str
    if linkbase_type == LinkbaseType.DEFINITION:
        extended_link_tag = "definitionLink"
        arc_type = "definitionArc"
    elif linkbase_type == LinkbaseType.CALCULATION:
        extended_link_tag = "calculationLink"
        arc_type = "calculationArc"
    elif linkbase_type == LinkbaseType.PRESENTATION:
        extended_link_tag = "presentationLink"
        arc_type = "presentationArc"
        extended_link_tag = "labelLink"
        arc_type = "labelArc"

    # loop over all extended links. Extended links can be: link:definitionLink, link:calculationLink e.t.c
    # Note that label linkbases only have one extended link
    for extended_link in root.findall(LINK_NS + extended_link_tag):
        extended_link_role: str = extended_link.attrib[XLINK_NS + 'role']
        # find all locators (link:loc) and arcs (i.e link:definitionArc or link:calculationArc)
        locators = extended_link.findall(LINK_NS + 'loc')
        arc_elements = extended_link.findall(LINK_NS + arc_type)

        # store the locators in a dictionary. The label attribute is the key. This way we can access them in O(1)
        locator_map = {}
        for loc in locators:
            loc_label: str = loc.attrib[XLINK_NS + 'label']
            # check if the locator href is absolute
            locator_href = loc.attrib[XLINK_NS + 'href']
            if not locator_href.startswith('http'):
                # resolve the path
                locator_href = resolve_uri(linkbase_url, locator_href)
            locator_map[loc_label] = Locator(locator_href, loc_label)

        # Performance: extract the labels in advance. The label name (xlink:label) is the key and the value is
        # an array of all labels that have this name. This can be multiple labels (label, terseLabel, documentation...)
        label_map = {}
        if linkbase_type == LinkbaseType.LABEL:
            for label_element in extended_link.findall(LINK_NS + 'label'):
                # if the label is empty, just ignore it
                label_name: str = label_element.attrib[XLINK_NS + 'label']
                label_role: str = label_element.attrib[XLINK_NS + 'role']
                label_lang: str = label_element.attrib[XML_NS + 'lang']
                label_obj = Label(label_name, label_role, label_lang,
                if label_name in label_map:
                    label_map[label_name] = [label_obj]

        for arc_element in arc_elements:
            # if the use of the element referenced by the arc is prohibited, just ignore it
            if 'use' in arc_element.attrib and arc_element.attrib[
                    'use'] == 'prohibited':
            # extract the attributes if the arc. The arc always connects two locators through the from and to attributes
            # additionally it defines the relationship between these two locators (arcrole)
            arc_from: str = arc_element.attrib[XLINK_NS + 'from']
            arc_to: str = arc_element.attrib[XLINK_NS + 'to']
            arc_role: str = arc_element.attrib[XLINK_NS + 'arcrole']
            arc_order: int = arc_element.attrib[
                'order'] if 'order' in arc_element.attrib else None

            # the following attributes are linkbase specific, so we have to check if they exist!
            # Needed for (sometimes) definitionArc
            arc_closed: bool = bool(arc_element.attrib[XBRLDT_NS + "closed"]) \
                if (XBRLDT_NS + "weight") in arc_element.attrib else None
            arc_context_element: str = arc_element.attrib[XBRLDT_NS + "contextElement"] if \
                (XBRLDT_NS + "contextElement") in arc_element.attrib else None
            # Needed for calculationArc
            arc_weight: float = float(
            ) if "weight" in arc_element.attrib else None
            # Needed for presentationArc
            arc_priority: int = int(
            ) if "priority" in arc_element.attrib else None
            arc_preferred_label: str = arc_element.attrib[
                "preferredLabel"] if "preferredLabel" in arc_element.attrib else None

            # Create the arc object based on the current linkbase type
            arc_object: AbstractArcElement
            if linkbase_type == LinkbaseType.DEFINITION:
                arc_object = DefinitionArc(locator_map[arc_from],
                                           locator_map[arc_to], arc_role,
                                           arc_order, arc_closed,
            elif linkbase_type == LinkbaseType.CALCULATION:
                arc_object = CalculationArc(locator_map[arc_from],
                                            locator_map[arc_to], arc_order,
            elif linkbase_type == LinkbaseType.PRESENTATION:
                arc_object = PresentationArc(locator_map[arc_from],
                                             locator_map[arc_to], arc_order,
                                             arc_priority, arc_preferred_label)
                # find all labels that are referenced by this arc.
                # These where preprocessed previously, so we can just take them
                arc_object = LabelArc(locator_map[arc_from], arc_order,

            # Build the hierarchy for the Locators.
            if linkbase_type != LinkbaseType.LABEL:
                # This does not work for label linkbase, since link:labelArcs only link to link:labels
                # and not to other locators!!

        # find the top elements of the three (all elements that have no parents)
        root_locators = []
        for locator in locator_map.values():
            if len(locator.parents) == 0:

        # only add the extended link to the linkbase if the link references a role
        # (some filers have empty links in which we are not interested:
        # <definitionLink xlink:type="extended" xlink:role="http://www.xbrl.org/2003/role/link"/>)
        if extended_link_role in role_refs:
                ExtendedLink(extended_link_role, role_refs[extended_link_role],
        elif linkbase_type == LinkbaseType.LABEL:
                ExtendedLink(extended_link_role, None, root_locators))
    return Linkbase(extended_links, linkbase_type)