Exemple #1
0
def merge(files):
    """Merge a set of Feils FILES.

    This will expand out the felis FILES so that it is easy to
    override values (using @Id), then normalize to a single
    output.
    """
    graph = []
    for file in files:
        schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
        if "@graph" not in schema_obj:
            schema_obj["@type"] = "felis:Schema"
        schema_obj["@context"] = DEFAULT_CONTEXT
        graph.extend(jsonld.flatten(schema_obj))
    updated_map = {}
    for item in graph:
        _id = item["@id"]
        item_to_update = updated_map.get(_id, item)
        if item_to_update and item_to_update != item:
            logger.debug(f"Overwriting {_id}")
        item_to_update.update(item)
        updated_map[_id] = item_to_update
    merged = {
        "@context": DEFAULT_CONTEXT,
        "@graph": list(updated_map.values())
    }
    normalized = _normalize(merged)
    _dump(normalized)
Exemple #2
0
def do_flatten(doc):
    # flatten a document
    # see: https://json-ld.org/spec/latest/json-ld/#flattened-document-form
    flattened = jsonld.flatten(doc)
    # all deep-level trees flattened to the top-level
    print("FLATTENED")
    print(json.dumps(flattened, indent=2))
Exemple #3
0
async def retrieve(request):
    """Retrieve data specific to an acronym, URL should be persistent."""
    db_pool = request.app['pool']
    term = request.match_info['acronymid']
    term_type = request.match_info['acronymtype']
    LOG.info('retrieve')
    data = await fetch_acronym(db_pool, term, term_type)
    doc = {
        "@id": f'acr:{data[0]["index"]}',
        "dc:title": data[0]["title"],
        "skos:label": data[0]["title"],
        "dc:type": {
            "@id": f'acr:{data[0]["acronymtype"]}',
            "@type": 'skos:Concept',
            "skos:label": data[0]["acronymtype"]
        },
        "dc:language": data[0]["language"],
        "dc:description": data[0]["description"]
    }
    if request.content_type == 'application/ld+json':
        response = jsonld.flatten(doc, context)
        return web.json_response(response,
                                 content_type='application/ld+json',
                                 dumps=json.dumps)
    elif request.content_type == 'application/n-quads':
        response = jsonld.normalize(doc, {
            'algorithm': 'URDNA2015',
            'format': 'application/n-quads'
        })
        return web.Response(text=response, content_type='application/n-quads')
    else:
        response = {"data": data}
        return web.json_response(response,
                                 content_type='application/json',
                                 dumps=json.dumps)
Exemple #4
0
    def __init__(self,
                 name: str,
                 document: Any,
                 url: Optional[str] = None,
                 soup: Optional[BeautifulSoup] = None):
        if '@context' not in document or 'schema.org' in document['@context']:
            # schema.org, as it is commonly used, has a bug.
            # Very often 'http://schema.org' is used as the '@context' value, but the website does not ever return a valid JSON-LD schema.
            # It appears it used to, with the help of content-negotiation: https://webmasters.stackexchange.com/questions/123409
            # But this has stopped working. So we hard code in this default context to fix.
            document['@context'] = JSONLD.DEFAULT_CONTEXT
        else:
            # TODO: Support other schemas
            logging.warning(
                f"You're using a schema which is not schema.org ({document['@context']}). Support is limited at the moment (particularly for introspection)."
            )

        super().__init__(name)
        self.context = document['@context']
        self.graph = jsonld.flatten(document, self.context)['@graph']
        self._introspect()

        if url:
            self.url = url
        if soup:
            self.soup = soup
Exemple #5
0
    def _introspect(self):
        schema = None
        if self.context == JSONLD.DEFAULT_CONTEXT:
            schema = get_schemaorg_schema()
        else:
            # TODO: Support other schemas
            schema = None

        if schema:
            self.introspection_graph = jsonld.flatten(schema,
                                                      self.context)['@graph']
Exemple #6
0
 def process_jsonld(self, data, original, **kwargs):
     if isinstance(original, (list, tuple)):
         return data
     method = request.args.get("process", "compact")
     context = {"@context": {"@vocab": "http://neurostore.org/nimads/"}}
     if method == "flatten":
         return jsonld.flatten(data, context)
     elif method == "expand":
         return jsonld.expand(data)
     else:
         return jsonld.compact(data, context)
Exemple #7
0
    def _serialize(self,
                   obj: typing.Union[_T, typing.Iterable[_T]],
                   *,
                   many: bool = False):
        """Serialize ``obj`` to jsonld."""
        if many and obj is not None:
            return [
                self._serialize(d, many=False)
                for d in typing.cast(typing.Iterable[_T], obj)
            ]

        if isinstance(obj, Proxy):
            proxy_schema = obj.__proxy_schema__
            if (not obj.__proxy_initialized__
                    and isinstance(proxy_schema, type(self))
                    and proxy_schema.flattened == self.flattened):
                # if proxy was not accessed and we use the same schema, return original data
                return obj.__proxy_original_data__

            # resolve Proxy object
            obj = obj.__wrapped__

        ret = self.dict_class()
        for attr_name, field_obj in self.dump_fields.items():
            value = field_obj.serialize(attr_name,
                                        obj,
                                        accessor=self.get_attribute)
            if value is missing:
                continue
            key = field_obj.data_key if field_obj.data_key is not None else attr_name
            reverse = getattr(field_obj, "reverse", False)
            if reverse:
                if "@reverse" not in ret:
                    ret["@reverse"] = self.dict_class()
                ret["@reverse"][key] = value
            else:
                ret[key] = value

        if "@id" not in ret or not ret["@id"]:
            ret["@id"] = self.opts.id_generation_strategy(ret, obj)

        # add type
        rdf_type = self.opts.rdf_type

        if not rdf_type:
            raise ValueError("No class type specified for schema")

        ret["@type"] = normalize_type(rdf_type)

        if self.flattened and self._top_level:
            ret = jsonld.flatten(ret)

        return ret
Exemple #8
0
def flatten_metadata_graph(obj):
    from pyld import jsonld
    # simplify graph into a sequence of one dict per known dataset, even
    # if multiple meta data set from different sources exist for the same
    # dataset.

    # cache schema requests; this also avoid the need for network access
    # for previously "visited" schemas
    jsonld.set_document_loader(_cached_load_document)
    # TODO cache entire graphs to prevent repeated term resolution for
    # subsequent calls
    return jsonld.flatten(obj, ctx={"@context": "http://schema.org/"})
Exemple #9
0
def flatten_metadata_graph(obj):
    from pyld import jsonld
    # simplify graph into a sequence of one dict per known dataset, even
    # if multiple meta data set from different sources exist for the same
    # dataset.

    # cache schema requests; this also avoid the need for network access
    # for previously "visited" schemas
    jsonld.set_document_loader(_cached_load_document)
    # TODO cache entire graphs to prevent repeated term resolution for
    # subsequent calls
    return jsonld.flatten(obj, ctx={"@context": "http://schema.org/"})
Exemple #10
0
    def as_quad_stream(
        self,
        raw_data: TextIO,
        iri: Optional[URIRef],
        context: LDContext,
        root_loader: Loader,
    ) -> Iterable[Quad]:
        """Read JSON-LD data into a quad stream."""
        document = self.as_jsonld_document(raw_data)

        document = assign_key_if_not_present(
            document=document,
            key='@id',
            default_value=str(iri),
        )

        document = assign_key_if_not_present(
            document=document,
            key=str(OCTA.subjectOf),
            default_value={
                '@id': str(iri),
            },
        )

        try:
            document = jsonld.expand(
                document,
                options={
                    'expandContext': context,
                    'documentLoader': root_loader,

                    # Explanation:
                    #   https://github.com/digitalbazaar/pyld/issues/143
                    'base': str(LOCAL),
                },
            )
        except JsonLdError as err:
            raise ExpandError(
                message=str(err),
                document=document,
                context=context,
                iri=iri,
            ) from err

        document = jsonld.flatten(document)

        return list(
            parse_quads(
                quads_document=jsonld.to_rdf(document),
                graph=iri,
                blank_node_prefix=str(iri),
            ), )
def test_import_jsonld_into_named_graph(flatten_before_import: bool):
    """Test named graphs we use."""
    graph = ConjunctiveGraph()

    jsonld_document = JSONLD_DOCUMENT

    if flatten_before_import:
        jsonld_document = jsonld.flatten(jsonld_document)

    serialized_document = json.dumps(jsonld_document, indent=4)

    graph.parse(
        data=serialized_document,
        format='json-ld',
        publicID=PUBLIC_ID,
    )

    print(graph.serialize(format='n3').decode('utf-8'))

    # Make sure only one NAMED GRAPH is created on import.
    assert list(map(
        operator.itemgetter(Variable('g')),
        graph.query(
            'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g',
        ).bindings,
    )) == [
        URIRef('https://myblog.net/rdf/'),
    ]

    # The information in @included section was properly parsed.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                ex:Rover rdfs:subClassOf ex:Robot .
            }
        }
    ''', initNs=NAMESPACES).bindings == [{
        Variable('g'): PUBLIC_ID,
    }]

    # The information in the root was properly parsed.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                blog:JSONLD-and-named-graphs a schema:blogPost .
            }
        }
    ''', initNs=NAMESPACES).bindings == [{
        Variable('g'): PUBLIC_ID,
    }]
Exemple #12
0
    def parse(self):
        try:
            jsonld_flat = jsonld.flatten(
                self.feed,Feed.jsonld_ctx,{"base":self.base}
            )
        except Exception as e:
            raise Exception(
                "Could not flatten JSON-LD for feed: '{0}' [{1}]".format(
                self.base,
                str(e)
                )
            )

        for node in jsonld_flat["@graph"]:
            if self.hasType(node, "simplevod:SimpleVoD"):
                self.title = node["title"]
                if (
                    "hasMemberGroup" in node and
                    "@list" in node["hasMemberGroup"]
                ):
                    for group in node["hasMemberGroup"]["@list"]:
                        self.categories.append(self.get(group["@id"]))
            elif self.hasType(node, "simplevod:SimpleVoDCategory"):
                category = self.get(node["@id"])
                category.update({
                    "isCategory": True,
                    "title": node["title"],
                    "items": []
                })
                if "hasMember" in node and "@list" in node["hasMember"]:
                    for item in node["hasMember"]["@list"]:
                        category["items"].append(self.get(item["@id"]))
            elif self.hasType(node, "TVProgramme"):
                item = self.get(node["@id"])
                item.update({
                    "isItem": True,
                    "title": node["title"],
                    "video": self.get(node["isInstantiatedBy"]["@id"]),
                    "productcode": node["videodb:productcode"]
                })
            elif self.hasType(node, "MediaResource"):
                video = self.get(node["@id"])
                video.update({
                    "videoref": node["videodb:videoref"]
                })
Exemple #13
0
    def get_jsonld(self, context, new_context={}, format="full"):
        """Return the JSON-LD serialization.

        :param: context the context to use for raw publishing; each SmartJsonLD
            instance is expected to have a default context associated.
        :param: new_context the context to use for formatted publishing,
            usually supplied by the client; used by the 'compacted', 'framed',
            and 'normalized' formats.
        :param: format the publishing format; can be 'full', 'inline',
            'compacted', 'expanded', 'flattened', 'framed' or 'normalized'.
            Note that 'full' and 'inline' are synonims, referring to the
            document form which includes the context; for more information see:
            [http://www.w3.org/TR/json-ld/]
        """
        from pyld import jsonld

        if isinstance(context, six.string_types):
            ctx = self.get_context(context)
        elif isinstance(context, dict):
            ctx = context
        else:
            raise TypeError('JSON-LD context must be a string or dictionary')

        try:
            doc = self.translate(context, ctx)
        except NotImplementedError:
            # model does not require translation
            doc = self.dumps(clean=True)

        doc["@context"] = ctx

        if format in ["full", "inline"]:
            return doc
        if format == "compacted":
            return jsonld.compact(doc, new_context)
        elif format == "expanded":
            return jsonld.expand(doc)
        elif format == "flattened":
            return jsonld.flatten(doc)
        elif format == "framed":
            return jsonld.frame(doc, new_context)
        elif format == "normalized":
            return jsonld.normalize(doc, new_context)
        raise ValueError('Invalid JSON-LD serialization format')
Exemple #14
0
    def get_jsonld(self, context, new_context={}, format="full"):
        """Return the JSON-LD serialization.

        :param: context the context to use for raw publishing; each SmartJsonLD
            instance is expected to have a default context associated.
        :param: new_context the context to use for formatted publishing,
            usually supplied by the client; used by the 'compacted', 'framed',
            and 'normalized' formats.
        :param: format the publishing format; can be 'full', 'inline',
            'compacted', 'expanded', 'flattened', 'framed' or 'normalized'.
            Note that 'full' and 'inline' are synonims, referring to the
            document form which includes the context; for more information see:
            [http://www.w3.org/TR/json-ld/]
        """
        from pyld import jsonld

        if isinstance(context, six.string_types):
            ctx = self.get_context(context)
        elif isinstance(context, dict):
            ctx = context
        else:
            raise TypeError('JSON-LD context must be a string or dictionary')

        try:
            doc = self.translate(context, ctx)
        except NotImplementedError:
            # model does not require translation
            doc = self.dumps(clean=True)

        doc["@context"] = ctx

        if format in ["full", "inline"]:
            return doc
        if format == "compacted":
            return jsonld.compact(doc, new_context)
        elif format == "expanded":
            return jsonld.expand(doc)
        elif format == "flattened":
            return jsonld.flatten(doc)
        elif format == "framed":
            return jsonld.frame(doc, new_context)
        elif format == "normalized":
            return jsonld.normalize(doc, new_context)
        raise ValueError('Invalid JSON-LD serialization format')
Exemple #15
0
def flat_map(response):
    """
    Transform a response using JSON-LD's "flatten" operation, and return a
    dictionary mapping resources (as fully-qualified URLs) to their values
    (also containing fully-qualified URLs).
    """
    # The URL in '@context' may not be available yet, because we probably
    # haven't deployed. So replace the response's "@context" with the
    # contents of that file.
    response['@context'] = CONTEXT['@context']

    # jsonld.flatten gives us a list of objects, which all have @id values
    # (unless they're awkward "blank nodes", like definitions of features).
    # The @id values are unique after flattening, so we can make a dictionary
    # keyed by them.
    result = {}
    flat_objects = jsonld.flatten(response)
    for obj in flat_objects:
        if '@id' in obj:
            result[obj['@id']] = obj
    return result
Exemple #16
0
def modify_tap(start_schema_at, files):
    """Modify TAP information in Felis schema FILES.
    This command has some utilities to aid in rewriting felis FILES
    in specific ways. It will write out a merged version of these files.
    """
    count = 0
    graph = []
    for file in files:
        schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
        if "@graph" not in schema_obj:
            schema_obj["@type"] = "felis:Schema"
        schema_obj["@context"] = DEFAULT_CONTEXT
        schema_index = schema_obj.get("tap:schema_index")
        if not schema_index or (schema_index
                                and schema_index > start_schema_at):
            schema_index = start_schema_at + count
            count += 1
        schema_obj["tap:schema_index"] = schema_index
        graph.extend(jsonld.flatten(schema_obj))
    merged = {"@context": DEFAULT_CONTEXT, "@graph": graph}
    normalized = _normalize(merged)
    _dump(normalized)
Exemple #17
0
    def _serialize(self,
                   obj: typing.Union[_T, typing.Iterable[_T]],
                   *,
                   many: bool = False):
        """Serialize ``obj`` to jsonld."""
        if many and obj is not None:
            return [
                self._serialize(d, many=False)
                for d in typing.cast(typing.Iterable[_T], obj)
            ]
        ret = self.dict_class()
        for attr_name, field_obj in self.dump_fields.items():
            value = field_obj.serialize(attr_name,
                                        obj,
                                        accessor=self.get_attribute)
            if value is missing:
                continue
            key = field_obj.data_key if field_obj.data_key is not None else attr_name
            reverse = getattr(field_obj, "reverse", False)
            if reverse:
                if "@reverse" not in ret:
                    ret["@reverse"] = self.dict_class()
                ret["@reverse"][key] = value
            else:
                ret[key] = value

        # add type
        rdf_type = self.opts.rdf_type

        if not rdf_type:
            raise ValueError("No class type specified for schema")

        ret["@type"] = normalize_type(rdf_type)

        if self.flattened:
            ret = jsonld.flatten(ret)

        return ret
Exemple #18
0
def main():
    itemFile = sys.argv[1]
    with open(itemFile, "r") as f:
        item = json.load(f)
    numItems = len(item.keys()) - 2  # -2 for context and id
    context = {}
    ctxtResolver(item["@context"], context)
    doc = {k: v for k, v in item.items() if k not in "@context"}
    item = {}
    item["@context"] = context
    item.update(doc)

    expanded = jsonld.expand(item)
    print("Expanded")
    print(json.dumps(expanded, indent=2))
    print("\n\n")
    compacted = jsonld.compact(expanded, context)
    print("Compacted")
    print(json.dumps(compacted, indent=2))
    print("\n\n")
    flattened = jsonld.flatten(compacted, context)
    print("Flattened")
    print(json.dumps(flattened, indent=2))
    print("\n\n")
    normalized = jsonld.normalize(item, {
        'algorithm': 'URDNA2015',
        'format': 'application/n-quads'
    })
    print("Normalized")
    print(json.dumps(normalized, indent=2))
    print("\n\n")

    inpPropNames = set(item.keys()) - set(["@context", "latestResourceData"])
    expPropNames = set([k.split("/")[-1] for k in expanded[0].keys()])
    compPropNames = set([k.split(":")[-1] for k in compacted.keys()])

    print("Missing from expanded" + str(inpPropNames - expPropNames))
    print("Missing from compacted" + str(inpPropNames - compPropNames))
Exemple #19
0
    def main(self):
        print('PyLD Unit Tests')
        print('Use -h or --help to view options.')

        # add program options
        self.parser.add_option('-f',
                               '--file',
                               dest='file',
                               help='The single test file to run',
                               metavar='FILE')
        self.parser.add_option('-d',
                               '--directory',
                               dest='directory',
                               help='The directory full of test files',
                               metavar='DIR')
        self.parser.add_option('-e',
                               '--earl',
                               dest='earl',
                               help='The filename to write the EARL report to',
                               metavar='EARL')
        self.parser.add_option('-v',
                               '--verbose',
                               dest='verbose',
                               action='store_true',
                               default=False,
                               help='Prints verbose test data')

        # parse options
        (self.options, args) = self.parser.parse_args()

        # check if file or directory were specified
        if self.options.file == None and self.options.directory == None:
            raise Exception('No test file or directory specified.')

        # check if file was specified, exists, and is file
        if self.options.file is not None:
            if (os.path.exists(self.options.file)
                    and os.path.isfile(self.options.file)):
                # add manifest file to the file list
                self.manifest_files.append(os.path.abspath(self.options.file))
            else:
                raise Exception('Invalid test file: "%s"' % self.options.file)

        # check if directory was specified, exists and is dir
        if self.options.directory is not None:
            if (os.path.exists(self.options.directory)
                    and os.path.isdir(self.options.directory)):
                # load manifest files from test directory
                for test_dir, dirs, files in os.walk(self.options.directory):
                    for manifest in files:
                        # add all .jsonld manifest files to the file list
                        if (manifest.find('manifest') != -1
                                and manifest.endswith('.jsonld')):
                            self.manifest_files.append(join(
                                test_dir, manifest))
            else:
                raise Exception('Invalid test directory: "%s"' %
                                self.options.directory)

        # see if any manifests have been specified
        if len(self.manifest_files) == 0:
            raise Exception('No manifest files found.')

        passed = 0
        failed = 0
        total = 0

        # run the tests from each manifest file
        for manifest_file in self.manifest_files:
            test_dir = os.path.dirname(manifest_file)
            manifest = json.load(open(manifest_file, 'r'))
            count = 1

            for test in manifest['sequence']:
                # skip unsupported types
                skip = True
                test_type = test['@type']
                for tt in test_type:
                    if tt in SKIP_TEST_TYPES:
                        skip = True
                        break
                    if tt in TEST_TYPES:
                        skip = False
                if skip:
                    # print 'Skipping test: "%s" ...' % test['name']
                    continue

                print('JSON-LD/%s %04d/%s...' %
                      (manifest['name'], count, test['name']),
                      end=' ')

                total += 1
                count += 1

                # read input file
                with open(join(test_dir, test['input'])) as f:
                    if test['input'].endswith('.jsonld'):
                        input = json.load(f)
                    else:
                        input = f.read().decode('utf8')
                # read expect file
                with open(join(test_dir, test['expect'])) as f:
                    if test['expect'].endswith('.jsonld'):
                        expect = json.load(f)
                    else:
                        expect = f.read().decode('utf8')
                result = None

                # JSON-LD options
                options = {
                    'base':
                    'http://json-ld.org/test-suite/tests/' + test['input'],
                    'useNativeTypes': True
                }

                success = False
                try:
                    if 'jld:ExpandTest' in test_type:
                        result = jsonld.expand(input, options)
                    elif 'jld:CompactTest' in test_type:
                        ctx = json.load(open(join(test_dir, test['context'])))
                        result = jsonld.compact(input, ctx, options)
                    elif 'jld:FlattenTest' in test_type:
                        result = jsonld.flatten(input, None, options)
                    elif 'jld:FrameTest' in test_type:
                        frame = json.load(open(join(test_dir, test['frame'])))
                        result = jsonld.frame(input, frame, options)
                    elif 'jld:FromRDFTest' in test_type:
                        result = jsonld.from_rdf(input, options)
                    elif 'jld:ToRDFTest' in test_type:
                        options['format'] = 'application/nquads'
                        result = jsonld.to_rdf(input, options)
                    elif 'jld:NormalizeTest' in test_type:
                        options['format'] = 'application/nquads'
                        result = jsonld.normalize(input, options)

                    # check the expected value against the test result
                    success = deep_compare(expect, result)

                    if success:
                        passed += 1
                        print('PASS')
                    else:
                        failed += 1
                        print('FAIL')

                    if not success or self.options.verbose:
                        print('Expect:', json.dumps(expect, indent=2))
                        print('Result:', json.dumps(result, indent=2))
                except jsonld.JsonLdError as e:
                    print('\nError: ', e)
                    failed += 1
                    print('FAIL')

                # add EARL report assertion
                EARL['subjectOf'].append({
                    '@type':
                    'earl:Assertion',
                    'earl:assertedBy':
                    EARL['doap:developer']['@id'],
                    'earl:mode':
                    'earl:automatic',
                    'earl:test':
                    ('http://json-ld.org/test-suite/tests/' +
                     os.path.basename(manifest_file) + test.get('@id', '')),
                    'earl:result': {
                        '@type':
                        'earl:TestResult',
                        'dc:date':
                        datetime.datetime.utcnow().isoformat(),
                        'earl:outcome':
                        ('earl:' + 'passed' if success else 'failed')
                    }
                })

        if self.options.earl:
            f = open(self.options.earl, 'w')
            f.write(json.dumps(EARL, indent=2))
            f.close()

        print('Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed))
Exemple #20
0
# see: http://json-ld.org/spec/latest/json-ld/#expanded-document-form
expanded = jsonld.expand(compacted)

print(json.dumps(expanded, indent=2))
# Output:
# [{
#   "http://schema.org/image": [{"@id": "http://manu.sporny.org/images/manu.png"}],
#   "http://schema.org/name": [{"@value": "Manu Sporny"}],
#   "http://schema.org/url": [{"@id": "http://manu.sporny.org/"}]
# }]

# expand using URLs
jsonld.expand('http://example.org/doc')

# flatten a document
# see: http://json-ld.org/spec/latest/json-ld/#flattened-document-form
flattened = jsonld.flatten(doc)
# all deep-level trees flattened to the top-level

# frame a document
# see: http://json-ld.org/spec/latest/json-ld-framing/#introduction
framed = jsonld.frame(doc, frame)
# document transformed into a particular tree structure per the given frame

# normalize a document using the RDF Dataset Normalization Algorithm
# (URDNA2015), see: http://json-ld.github.io/normalization/spec/
normalized = jsonld.normalize(
    doc, {'algorithm': 'URDNA2015', 'format': 'application/n-quads'})
# normalized is a string that is a canonical representation of the document
# that can be used for hashing, comparison, etc.
from w3lib.html import get_base_url
import json

from pyld_document_loader import my_requests_document_loader


def get_schema_context():
    r = requests.get('https://schema.org/docs/jsonldcontext.json')
    return r.json()


def get_jsons(url):
    r = requests.get(url)
    base_url = get_base_url(r.text, r.url)
    data = extruct.extract(r.text, base_url=base_url)
    return data['json-ld']


if __name__ == '__main__':

    jsonld.set_document_loader(my_requests_document_loader())

    ctx = get_schema_context()

    url = 'https://www.imdb.com/title/tt7126948/'
    doc = get_jsons(url)
    #compacted = jsonld.compact(doc, ctx)
    data = jsonld.flatten(doc, ctx=ctx)
    print(json.dumps(data, indent=2))
    with open('imdb_pyld.json', 'w') as f:
        f.write(json.dumps(data))
Exemple #22
0
def test_page(ark):
    global elements
    elements = []

    if request.cookies.get("fairscapeAuth") is None:
        token = request.headers.get("Authorization")
    else:
        token = request.cookies.get("fairscapeAuth")

    data_jsonld = requests.get(EG_URL + ark, headers={
        "Authorization": token
    }).json()

    if 'error' in data_jsonld.keys():
        return data_jsonld['error']
    # try:
    try:
        data_jsonld_flat = jsonld.flatten(data_jsonld)  #
    except Exception as cause:
        raise JsonLdError('Error flattening JSON-LD content ', cause)

    # print("\nflattened JSON-LD content\n", json.dumps(data_jsonld_flat, indent=2))

    elements = []  # contains nodes and edges
    nodes = []  # vertices
    edges = []  # links between vertices
    id_position = {}  # mapping of each @id to a number
    counter = 0

    # TODO: to check if http://schema.org/name missing
    for level in data_jsonld_flat:
        if level.get('@id') is None or '_:b' in level[
                '@id']:  # flattening generates a blank node _:b when @id is missing
            print('Error: found blank node for missing @id at: ', level)
            sys.exit()
        if level.get(['@type'][0]) is None:
            print('Error: missing @type at: ', level)
            sys.exit()
        nodes_data = {}
        nodes_element = {}
        nodes_element['id'] = counter
        nodes_element['@id'] = level['@id']
        if os.environ.get("LOCAL", False):
            nodes_element['href'] = 'http://*****:*****@id']  # href in cytoscape to open as a URI
        else:
            nodes_element['href'] = MDS_URL + level[
                '@id']  # href in cytoscape to open as a URI
        nodes_element['@type'] = level['@type'][0]
        nodes_element['type'] = level['@type'][
            0]  # @type cannot be retrieved as node(@type)
        nodes_element['name'] = level['https://schema.org/name'][0]['@value']
        nodes_element['info'] = 'Name: ' + level['https://schema.org/name'][0]['@value'] + '\nType: ' + level['@type'][0] \
                                + '\nPID: ' + level['@id']  # all attributes together
        nodes_data['data'] = nodes_element
        nodes.append(nodes_data)
        id_position[level['@id']] = counter
        counter += 1

    # print('\nNodes\n', json.dumps(nodes, indent=2))

    # populate edges
    for item in data_jsonld_flat:
        source_id = item[
            '@id']  # chooses @id as source at each level for an edge
        for key, value in item.items(
        ):  # iterates through each flattened level
            if isinstance(value, list):
                for i in value:
                    if isinstance(i, dict):
                        if '@id' in i.keys():
                            edges_data = {}
                            edges_element = {}
                            edges_element['source'] = id_position[source_id]
                            edges_element['target'] = id_position[i['@id']]
                            edges_element['label'] = key
                            edges_data['data'] = edges_element
                            edges.append(edges_data)

    # print('\nEdges\n', json.dumps(edges, indent=2))

    # copies all nodes and edges inside elements
    elements = nodes.copy()
    for element in edges:
        elements.append(element)

    # Convert multiple edges such that e1(v1, v2), e2(v1, v2), e3(v1, v2) =>
    # Multiples edges between v1, v2 such as http://schema.org/founder, http://schema.org/member become [founder, member]
    source = []
    target = []
    label = []
    for edge_data in edges:
        for edge in edge_data.values():
            for key, value in edge.items():
                if key == 'source':
                    source.append(value)
                if key == 'target':
                    target.append(value)
                if key == 'label':
                    label.append(value)

    d = {'source': source, 'target': target, 'label': label}
    df = pd.DataFrame(data=d)

    # print('\nAll Edges\n', df)

    df_edge_has_common_nodes = df[df.duplicated(subset=['source', 'target'],
                                                keep=False)]
    # print('\nEdges with common nodes\n', df_edge_has_common_nodes)

    df_unique = df.drop_duplicates(subset=['source', 'target'], keep=False)
    # print('\nUnique Edges\n', df_unique)

    df_merged_edge_has_common_nodes = df_edge_has_common_nodes.groupby(['source', 'target'], as_index=False) \
        .agg({'label': ','.join})

    # print('\nMerged unique & non-unique edges\n', df_merged_edge_has_common_nodes)

    uri_prefix_suffix_dict_list = [
    ]  # Maps uri prefix to its suffix e.g. {http://schema.org/ : member"

    # populate common edge labels within [...] e.g. [founder, member, ...]
    def get_property_labels(labels):
        property_list = str(labels).split(',')
        if len(property_list) == 1:
            uri_prefix_suffix_dict = {}
            suffix = property_list[0].split('/')[-1]
            prefix = property_list[0].replace(suffix, '')
            uri_prefix_suffix_dict[prefix] = suffix
            uri_prefix_suffix_dict_list.append(uri_prefix_suffix_dict)
            return suffix
        elif len(property_list) > 1:
            property_list_size = len(property_list)
            # prop_list = []  # sending as a list does not add [] around the labels
            props_list = '['  # this string adds the anticipated []
            for prop in property_list:
                uri_prefix_suffix_dict = {}
                suffix = prop.split('/')[-1]
                prefix = prop.replace(suffix, '')
                uri_prefix_suffix_dict[prefix] = suffix
                uri_prefix_suffix_dict_list.append(uri_prefix_suffix_dict)
                # prop_list.append(suffix)
                property_list_size -= 1
                if property_list_size > 0:
                    props_list += suffix + ', '
                else:
                    props_list += suffix
            # return prop_list
            return props_list + ']'

    elements = []  # reinitialize empty nodes and edges
    # Populate only unique edges which are not shared between two nodes
    for index, row in df_unique.iterrows():
        edge_data = {}
        edges_element = {}
        edges_element['source'] = row['source']
        edges_element['target'] = row['target']
        property_label = get_property_labels(row['label'])
        if property_label is None:
            print('ERROR: Could not find property label!')
            sys.exit()
        edges_element['label'] = property_label
        edge_data['data'] = edges_element
        elements.append(edge_data)

    # Populate only those edges which are shared between two vertices
    for index, row in df_merged_edge_has_common_nodes.iterrows():
        edge_data = {}
        edges_element = {}
        edges_element['source'] = row['source']
        edges_element['target'] = row['target']
        property_labels = get_property_labels(row['label'])
        if property_labels is None:
            print('ERROR: Could not find property labels!')
            sys.exit()
        edges_element['label'] = property_labels
        edge_data['data'] = edges_element
        elements.append(edge_data)

    # print('\nRefined Edges\n', elements)

    # Adding the nodes
    def is_node_in_edges(node, edges):
        edge_nodes = set()
        for edge_data_value in edges:
            edge_nodes.add(edge_data_value['data']['source'])
            edge_nodes.add(edge_data_value['data']['target'])
        if node['data']['id'] in edge_nodes:
            return True
        else:
            return False

    for node in nodes:
        if is_node_in_edges(node, edges):
            elements.append(node)
    # except:
    #     return "Visual Failed. Probably missing type, name, or ID"
    print('Made it to render.')
    if os.environ.get("LOCAL", False):
        return render_template('local_index.html')
    return render_template('index.html')
from pyld import jsonld
import json
from os.path import join

doc = json.load(open(join("../json-instances/", "PDB-5AEM.jsonld")))

print("loaded jsonld", doc)

context = json.load(open(join("../json-schemas/contexts/", "dataset_sdo_context.jsonld")))

print("loaded context")

compacted = jsonld.compact(doc, context)

print("-------------COMPACTED")

print(json.dumps(compacted, indent=2))

expanded = jsonld.expand(compacted)

print("-------------EXPANDED")

print(json.dumps(expanded, indent=2))

flattened = jsonld.flatten(compacted)
Exemple #24
0
def flatten(document):
    """Flatten OA JSON-LD."""

    # See http://www.w3.org/TR/json-ld-api/#flattening

    return jsonld.flatten(document)
Exemple #25
0
    uri_generator=UriPatternGenerator('http://id.trees.org/types/%s'),
    concept_scheme=ConceptScheme('http://id.trees.org'))

# Generate a doc for a cs
doc = jsonld_dumper(provider, CONTEXT)
msg = 'Conceptscheme'
print(msg)
print(len(msg) * '=')
print(json.dumps(doc, indent=2))

# Print an expanded doc
expanded = jsonld.expand(doc, CONTEXT)
msg = 'Conceptscheme expanded'
print(msg)
print(len(msg) * '=')
print(json.dumps(expanded, indent=2))

# Compact the doc again
compacted = jsonld.compact(expanded, CONTEXT)
msg = 'Conceptscheme compacted again'
print(msg)
print(len(msg) * '=')
print(json.dumps(compacted, indent=2))

# And now flatten it
flattened = jsonld.flatten(compacted, CONTEXT)
msg = 'Conceptscheme flattened'
print(msg)
print(len(msg) * '=')
print(json.dumps(flattened, indent=2))
Exemple #26
0
 def the_function(self, source, context, expand_context):
     """ actually flatten
     """
     return jsonld.flatten(source, context,
                           dict(expandContext=expand_context))
Exemple #27
0
 def _norm(self, ld):
     j = json.loads(ld)
     flat = jsonld.flatten(j)
     return json.dumps(flat)
Exemple #28
0
# expand a document, removing its context
# see: http://json-ld.org/spec/latest/json-ld/#expanded-document-form
expanded = jsonld.expand(compacted)

print(json.dumps(expanded, indent=2))
# Output:
# {
#   "http://schema.org/image": [{"@id": "http://manu.sporny.org/images/manu.png"}],
#   "http://schema.org/name": [{"@value": "Manu Sporny"}],
#   "http://schema.org/url": [{"@id": "http://manu.sporny.org/"}]
# }

# expand using URLs
jsonld.expand('http://example.org/doc')

# flatten a document
# see: http://json-ld.org/spec/latest/json-ld/#flattened-document-form
flattened = jsonld.flatten(doc)
# all deep-level trees flattened to the top-level

# frame a document
# see: http://json-ld.org/spec/latest/json-ld-framing/#introduction
framed = jsonld.frame(doc, frame)
# document transformed into a particular tree structure per the given frame

# normalize a document
normalized = jsonld.normalize(doc, {'format': 'application/nquads'})
# normalized is a string that is a canonical representation of the document
# that can be used for hashing
Exemple #29
0
def test_import_jsonld_into_named_graph():
    """Test named graphs we use."""
    graph = ConjunctiveGraph()

    serialized_document = json.dumps(jsonld.expand(
        jsonld.flatten(JSONLD_DOCUMENT, ), ),
                                     indent=4)

    graph.parse(
        data=serialized_document,
        format='json-ld',
        # All the semantic data about my blog is stored in a particular
        # named graph.
        publicID=PUBLIC_ID,
    )

    assert list(
        map(
            operator.itemgetter(Variable('g')),
            graph.query(
                'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g',
            ).bindings,
        )) == [
            PUBLIC_ID,
        ]

    assert graph.query('''
        SELECT * WHERE {
            ?subclass_of_robot rdfs:subClassOf ex:Robot .
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('subclass_of_robot'):
                           URIRef('https://example.org/Rover'),
                       }]

    # THIS FAILS!
    # The information about ex:Rover cannot be found if I specify the GRAPH.
    # Meaning, this information is not in one of the named graphs.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                ex:Rover rdfs:subClassOf ex:Robot .
            }
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('g'):
                           PUBLIC_ID,
                       }]

    # `publicID` was used for the part of data which was on the top level
    # of the document.
    assert graph.query('''
        SELECT * WHERE {
            GRAPH ?g {
                blog:JSONLD-and-named-graphs a schema:blogPost .
            }
        }
    ''',
                       initNs=NAMESPACES).bindings == [{
                           Variable('g'):
                           PUBLIC_ID,
                       }]
Exemple #30
0
from pyld import jsonld
import json
import os


doc = json.load(open(os.join(".." + os.sep  + "json-instances" + os.sep  + "", "PDB-5AEM.jsonld")))

print("loaded jsonld", doc)

context = json.load(open(os.join(".." + os.sep  + "json-schemas" + os.sep  + "contexts" + os.sep, "dataset_sdo_context.jsonld")))

print("loaded context")

compacted = jsonld.compact(doc, context)

print("-------------COMPACTED")

print(json.dumps(compacted, indent=2))

expanded = jsonld.expand(compacted)

print("-------------EXPANDED")

print(json.dumps(expanded, indent=2))

flattened = jsonld.flatten(compacted)
Exemple #31
0
def flatten(document):
    """Flatten OA JSON-LD."""

    # See http://www.w3.org/TR/json-ld-api/#flattening

    return jsonld.flatten(document)
Exemple #32
0
    def main(self):
        print('PyLD Unit Tests')
        print('Use -h or --help to view options.')

        # add program options
        self.parser.add_option('-f', '--file', dest='file',
            help='The single test file to run', metavar='FILE')
        self.parser.add_option('-d', '--directory', dest='directory',
            help='The directory full of test files', metavar='DIR')
        self.parser.add_option('-e', '--earl', dest='earl',
            help='The filename to write the EARL report to', metavar='EARL')
        self.parser.add_option('-v', '--verbose', dest='verbose',
            action='store_true', default=False,
            help='Prints verbose test data')

        # parse options
        (self.options, args) = self.parser.parse_args()

        # check if file or directory were specified
        if self.options.file == None and self.options.directory == None:
            raise Exception('No test file or directory specified.')

        # check if file was specified, exists, and is file
        if self.options.file is not None:
            if (os.path.exists(self.options.file) and
                os.path.isfile(self.options.file)):
                # add manifest file to the file list
                self.manifest_files.append(os.path.abspath(self.options.file))
            else:
                raise Exception('Invalid test file: "%s"' % self.options.file)

        # check if directory was specified, exists and is dir
        if self.options.directory is not None:
            if (os.path.exists(self.options.directory) and
                os.path.isdir(self.options.directory)):
                # load manifest files from test directory
                for test_dir, dirs, files in os.walk(self.options.directory):
                    for manifest in files:
                        # add all .jsonld manifest files to the file list
                        if (manifest.find('manifest') != -1 and
                            manifest.endswith('.jsonld')):
                            self.manifest_files.append(
                                join(test_dir, manifest))
            else:
                raise Exception('Invalid test directory: "%s"' %
                    self.options.directory)

        # see if any manifests have been specified
        if len(self.manifest_files) == 0:
            raise Exception('No manifest files found.')

        passed = 0
        failed = 0
        total = 0

        # run the tests from each manifest file
        for manifest_file in self.manifest_files:
            test_dir = os.path.dirname(manifest_file)
            manifest = json.load(open(manifest_file, 'r'))
            count = 1

            for test in manifest['sequence']:
                # skip unsupported types
                skip = True
                test_type = test['@type']
                for tt in test_type:
                    if tt in SKIP_TEST_TYPES:
                        skip = True
                        break
                    if tt in TEST_TYPES:
                        skip = False
                if skip:
                    # print 'Skipping test: "%s" ...' % test['name']
                    continue

                print('JSON-LD/%s %04d/%s...' % (
                    manifest['name'], count, test['name']), end=' ')

                total += 1
                count += 1

                # read input file
                with open(join(test_dir, test['input'])) as f:
                    if test['input'].endswith('.jsonld'):
                        input = json.load(f)
                    else:
                        input = f.read().decode('utf8')
                # read expect file
                with open(join(test_dir, test['expect'])) as f:
                    if test['expect'].endswith('.jsonld'):
                        expect = json.load(f)
                    else:
                        expect = f.read().decode('utf8')
                result = None

                # JSON-LD options
                options = {
                    'base': 'http://json-ld.org/test-suite/tests/' +
                        test['input'],
                    'useNativeTypes': True
                }

                success = False
                try:
                    if 'jld:ExpandTest' in test_type:
                        result = jsonld.expand(input, options)
                    elif 'jld:CompactTest' in test_type:
                        ctx = json.load(open(join(test_dir, test['context'])))
                        result = jsonld.compact(input, ctx, options)
                    elif 'jld:FlattenTest' in test_type:
                        result = jsonld.flatten(input, None, options)
                    elif 'jld:FrameTest' in test_type:
                        frame = json.load(open(join(test_dir, test['frame'])))
                        result = jsonld.frame(input, frame, options)
                    elif 'jld:FromRDFTest' in test_type:
                        result = jsonld.from_rdf(input, options)
                    elif 'jld:ToRDFTest' in test_type:
                        options['format'] = 'application/nquads'
                        result = jsonld.to_rdf(input, options)
                    elif 'jld:NormalizeTest' in test_type:
                        options['format'] = 'application/nquads'
                        result = jsonld.normalize(input, options)

                    # check the expected value against the test result
                    success = deep_compare(expect, result)

                    if success:
                        passed += 1
                        print('PASS')
                    else:
                        failed += 1
                        print('FAIL')

                    if not success or self.options.verbose:
                        print('Expect:', json.dumps(expect, indent=2))
                        print('Result:', json.dumps(result, indent=2))
                except jsonld.JsonLdError as e:
                    print('\nError: ', e)
                    failed += 1
                    print('FAIL')

                # add EARL report assertion
                EARL['subjectOf'].append({
                    '@type': 'earl:Assertion',
                    'earl:assertedBy': EARL['doap:developer']['@id'],
                    'earl:mode': 'earl:automatic',
                    'earl:test': ('http://json-ld.org/test-suite/tests/' +
                        os.path.basename(manifest_file) + test.get('@id', '')),
                    'earl:result': {
                        '@type': 'earl:TestResult',
                        'dc:date': datetime.datetime.utcnow().isoformat(),
                        'earl:outcome': ('earl:' + 'passed' if success else
                            'failed')
                    }
                })

        if self.options.earl:
            f = open(self.options.earl, 'w')
            f.write(json.dumps(EARL, indent=2))
            f.close()

        print('Done. Total:%d Passed:%d Failed:%d' % (total, passed, failed))
Exemple #33
0
import os
import sys
import json
from pyld import jsonld
from pyld_document_loader import my_requests_document_loader

if __name__ == '__main__':

    fjson = sys.argv[1]
    with open(fjson, 'r') as f:
        doc = json.loads(f.read())

    jsonld.set_document_loader(my_requests_document_loader())
    data = jsonld.flatten(doc)

    jpath, jfile = os.path.split(fjson)

    flatten_file = f'{jpath}{os.path.sep}flatten_{jfile}'
    with open(flatten_file,'w') as f:
        f.write(json.dumps(data))