Example #1
0
 def __init__(self):
     self.traverser = XEBRConceptTraverser()
Example #2
0
class RDFConverter:
    def __init__(self):
        self.traverser = XEBRConceptTraverser()

    def _abbreviate(self, s):
        return re.sub('[^A-Z]', r'', s).lower()

    def _write_metrics(self, g, rep, identifier, filing):
        metrics = Metrics(filing['items'])

        for concept_name, function in metrics.get_metrics().items():
            concept = NS['xebr'][concept_name]
            datatype = xebr.graph.value(subject=concept, predicate=NS['rdfs']['range'])
            assert datatype is not None
            value = function()

            if value is not None:
                metric_id = NS['xebr']["%s_%s" % (concept_name, identifier)]
                g.add((metric_id, NS['xebr'][concept_name], Literal(value, datatype=datatype)))
                g.add((rep, NS['xebr']['hasMetric'], metric_id))

    def convert(self, xbrl_instance):
        g = Graph()
        for n in NS:
            g.bind(n, NS[n])

        for filing in xbrl_instance.get_filings_list():
            identifier = "%s_%s" % ( filing['metadata']['id'], filing['metadata']['end'] )

            # Add report instance to graph, with start/end metadata
            rep = NS['xebr']['rep_'+identifier]
            metadata = filing['metadata']
            g.add((rep, NS['rdf']['type'], NS['xebr']['Report']))
            g.add((rep, NS['xebr']['sourceTaxonomy'], URIRef(metadata['taxonomy'])))
            g.add((rep, NS['xebr']['start'], Literal(metadata['start'],datatype=XSD.date)))
            g.add((rep, NS['xebr']['end'], Literal(metadata['end'],datatype=XSD.date)))
            if 'source' in metadata:
                datatype = metadata['source'][1]
                g.add((rep, NS['dc']['source'], Literal(metadata['source'][0],datatype=datatype)))

            # Metrics
            self._write_metrics(g, rep, identifier, filing)

            # Traverse concepts and add to graph
            for report_type in self.traverser.report_type_iterator():
                for report in self.traverser.report_iterator(report_type):
                    node_belongsTo = NS['xebr']["%s_%s" % (self._abbreviate(report), identifier)]
                    empty_report=True
                    for abstract_concept, concepts, depth in self.traverser.concept_iterator(report):
                        concepts_in_filing = set(filing['items']) & set(concepts.keys())

                        # Add concepts to graph
                        if len(concepts_in_filing) > 0:
                            empty_report=False
                            ab = NS['xebr']["%s_%s" % (self._abbreviate(abstract_concept), identifier)]

                            # Add abstract_concept and report relationships
                            g.add((ab, NS['rdf']['type'], NS['xebr'][abstract_concept]))
                            g.add((ab, NS['xebr']['belongsTo'], node_belongsTo))

                            # Add all has* properties and values
                            for concept in concepts_in_filing:
                                value = filing['items'][concept]
                                if value is not None:
                                    if isinstance(value, URIRef):
                                        g.add( (ab, NS['xebr'][concept], value) )
                                    else:
                                        # Treat strings as plain literals
                                        if str(concepts[concept]) == "http://www.w3.org/2001/XMLSchema#string":
                                            datatype=None
                                        else:
                                            datatype=concepts[concept]
                                        g.add( (ab, NS['xebr'][concept], Literal(value, datatype=datatype)) )

                    # Only add metadata for the report if it has values for associated
                    # concepts below it in the tree
                    if not empty_report:
                        g.add((node_belongsTo, NS['rdf']['type'], NS['xebr'][report]))
                        g.add((node_belongsTo, NS['xebr']['belongsTo'], node_belongsTo))
                        g.add((rep, NS['xebr']["has"+report_type], node_belongsTo))

        return g