Example #1
0
    def test_delete_from_ctx(self, store):
        '''
        Delete triples from a named graph and from the default graph.
        '''
        gr_uri = URIRef('urn:bogus:graph#a')
        gr2_uri = URIRef('urn:bogus:graph#b')

        with TxnManager(store, True) as txn:
            store.remove((None, None, None), gr2_uri)
            assert len(set(store.triples((None, None, None), gr2_uri))) == 0
            assert len(set(store.triples((None, None, None), gr_uri))) == 3

        with TxnManager(store, True) as txn:
            store.remove((URIRef('urn:s:1'), None, None))
            assert len(set(store.triples((None, None, None), gr_uri))) == 2
            assert len(set(store.triples((None, None, None)))) == 3

        with TxnManager(store, True) as txn:
            store.remove((URIRef('urn:s:4'), None, None),
                    RDFLIB_DEFAULT_GRAPH_URI)
            assert len(set(store.triples((None, None, None)))) == 2

        with TxnManager(store,True) as txn:
            store.remove((None, None, None))
            assert len(set(store.triples((None, None, None)))) == 0
            assert len(set(store.triples((None, None, None), gr_uri))) == 0
            assert len(store) == 0
Example #2
0
    def test_ctx_mgr(self, store):
        '''
        Test enclosing a transaction in a context.
        '''
        with TxnManager(store) as txn:
            pass
        assert not store.is_txn_open

        try:
            with TxnManager(store) as txn:
                raise RuntimeError
        except RuntimeError:
            assert not store.is_txn_open
Example #3
0
    def test_rollback(self, store):
        '''
        Test rolling back a transaction.
        '''
        try:
            with TxnManager(store, True) as txn:
                store.add((URIRef('urn:nogo:s'), URIRef('urn:nogo:p'),
                           URIRef('urn:nogo:o')))
                raise RuntimeError  # This should roll back the transaction.
        except RuntimeError:
            pass

        with TxnManager(store) as txn:
            res = set(store.triples((None, None, None)))
        assert len(res) == 0
Example #4
0
    def test_add_trp_to_ctx(self, store):
        '''
        Test adding triples to a graph.
        '''
        gr_uri = URIRef('urn:bogus:graph#a') # From previous test
        gr2_uri = URIRef('urn:bogus:graph#b') # Never created before
        trp1 = (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1'))
        trp2 = (URIRef('urn:s:2'), URIRef('urn:p:2'), URIRef('urn:o:2'))
        trp3 = (URIRef('urn:s:3'), URIRef('urn:p:3'), URIRef('urn:o:3'))
        trp4 = (URIRef('urn:s:4'), URIRef('urn:p:4'), URIRef('urn:o:4'))

        with TxnManager(store, True) as txn:
            store.add(trp1, gr_uri)
            store.add(trp2, gr_uri)
            store.add(trp2, gr_uri) # Duplicate; dropped.
            store.add(trp2, None) # Goes to the default graph.
            store.add(trp3, gr2_uri)
            store.add(trp3, gr_uri)
            store.add(trp4) # Goes to the default graph.

            assert len(set(store.triples((None, None, None)))) == 4
            assert len(set(store.triples((None, None, None),
                RDFLIB_DEFAULT_GRAPH_URI))) == 2
            assert len(set(store.triples((None, None, None), gr_uri))) == 3
            assert len(set(store.triples((None, None, None), gr2_uri))) == 1

            assert gr2_uri in {gr.identifier for gr in store.contexts()}
            assert trp1 in _clean(store.triples((None, None, None)))
            assert trp1 not in _clean(store.triples((None, None, None),
                    RDFLIB_DEFAULT_GRAPH_URI))
            assert trp2 in _clean(store.triples((None, None, None), gr_uri))
            assert trp2 in _clean(store.triples((None, None, None)))
            assert trp3 in _clean(store.triples((None, None, None), gr2_uri))
            assert trp3 not in _clean(store.triples((None, None, None),
                    RDFLIB_DEFAULT_GRAPH_URI))
Example #5
0
 def test_pfx2ns(self, store, bindings):
     '''
     Test namespace to prefix conversion.
     '''
     with TxnManager(store, True) as txn:
         for b in bindings:
             pfx, ns = b
             assert store.prefix(ns) == pfx
Example #6
0
 def test_ns2pfx(self, store, bindings):
     '''
     Test namespace to prefix conversion.
     '''
     with TxnManager(store, True) as txn:
         for b in bindings:
             pfx, ns = b
             assert store.namespace(pfx) == ns
Example #7
0
    def test_add_graph(self, store):
        '''
        Test creating an empty and a non-empty graph.
        '''
        gr_uri = URIRef('urn:bogus:graph#a')

        with TxnManager(store, True) as txn:
            store.add_graph(gr_uri)
            assert gr_uri in {gr.identifier for gr in store.contexts()}
Example #8
0
    def count_rsrc(self):
        """
        Return a count of first-class resources, subdivided in "live" and
        historic snapshots.
        """
        with TxnManager(self.ds.store) as txn:
            main = set(self.ds.graph(META_GR_URI)[:nsc['foaf'].primaryTopic:])
            hist = set(self.ds.graph(HIST_GR_URI)[:nsc['foaf'].primaryTopic:])

        return {'main': len(main), 'hist': len(hist)}
Example #9
0
def stats():
    '''
    Get repository statistics.

    @return dict Store statistics, resource statistics.
    '''
    repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
    with TxnManager(env.app_globals.rdf_store) as txn:
        repo_stats['store_stats'] = env.app_globals.rdf_store.stats()

    return repo_stats
Example #10
0
    def test_empty_context(self, store):
        '''
        Test creating and deleting empty contexts.
        '''
        gr_uri = URIRef('urn:bogus:empty#a')

        with TxnManager(store, True) as txn:
            store.add_graph(gr_uri)
            assert gr_uri in {gr.identifier for gr in store.contexts()}
            store.remove_graph(gr_uri)
            assert gr_uri not in {gr.identifier for gr in store.contexts()}
Example #11
0
def stats():
    """
    Get repository statistics.

    :rtype: dict
    :return: Store statistics, resource statistics.
    """
    import lakesuperior.env_setup
    repo_stats = {'rsrc_stats': env.app_globals.rdfly.count_rsrc()}
    with TxnManager(env.app_globals.rdf_store) as txn:
        repo_stats['store_stats'] = env.app_globals.rdf_store.stats()

    return repo_stats
Example #12
0
 def _wrapper(*args, **kwargs):
     # Mark transaction begin timestamp. This is used for create and
     # update timestamps on resources.
     env.timestamp = arrow.utcnow()
     env.timestamp_term = Literal(env.timestamp, datatype=XSD.dateTime)
     with TxnManager(env.app_globals.rdf_store, write=write) as txn:
         ret = fn(*args, **kwargs)
     if len(env.app_globals.changelog):
         job = Thread(target=_process_queue)
         job.start()
     delattr(env, 'timestamp')
     delattr(env, 'timestamp_term')
     return ret
Example #13
0
 def test_triple_match_1bound(self, store):
     '''
     Test triple patterns matching one bound term.
     '''
     with TxnManager(store) as txn:
         res1 = set(store.triples((URIRef('urn:test:s'), None, None)))
         res2 = set(store.triples((None, URIRef('urn:test:p'), None)))
         res3 = set(store.triples((None, None, URIRef('urn:test:o'))))
         assert _clean(res1) == {(
             URIRef('urn:test:s'), URIRef('urn:test:p'),
             URIRef('urn:test:o'))}
         assert _clean(res2) == _clean(res1)
         assert _clean(res3) == _clean(res2)
Example #14
0
    def test_ns(self, store, bindings):
        '''
        Test namespace bindings.
        '''
        with TxnManager(store, True) as txn:
            for b in bindings:
                store.bind(*b)

            nslist = list(store.namespaces())
            assert len(nslist) == len(bindings)

            for i in range(len(bindings)):
                assert nslist[i] == bindings[i]
Example #15
0
def integrity_check(config_dir=None):
    """
    Check integrity of the data set.

    At the moment this is limited to referential integrity. Other checks can
    be added and triggered by different argument flags.
    """
    if config_dir:
        env.config = parse_config(config_dir)[0]
        env.app_globals = AppGlobals(env.config)
    else:
        import lakesuperior.env_setup
    with TxnManager(env.app_globals.rdfly.store):
        return {t for t in env.app_globals.rdfly.find_refint_violations()}
Example #16
0
    def test_remove(self, store):
        '''
        Test removing one or more triples.
        '''
        with TxnManager(store, True) as txn:
            store.remove((URIRef('urn:test:s3'),
                    URIRef('urn:test:p3'), URIRef('urn:test:o3')))

            res1 = set(store.triples((None, None, None)))
            assert len(res1) == 2

            store.remove((URIRef('urn:test:s'), None, None))
            res2 = set(store.triples((None, None, None)))
            assert len(res2) == 0
Example #17
0
    def bootstrap(self):
        '''
        Delete all graphs and insert the basic triples.
        '''
        logger.info('Deleting all data from the graph store.')
        store = self.ds.store
        if getattr(store, 'is_txn_open', False):
            store.rollback()
        store.destroy(store.path)

        logger.info('Initializing the graph store with system data.')
        store.open()
        with TxnManager(store, True):
            with open('data/bootstrap/rsrc_centric_layout.sparql', 'r') as f:
                self.ds.update(f.read())
Example #18
0
    def test_create_triple(self, store):
        '''
        Test creation of a single triple.
        '''
        trp = (
            URIRef('urn:test:s'), URIRef('urn:test:p'), URIRef('urn:test:o'))
        with TxnManager(store, True) as txn:
            store.add(trp)

            res1 = set(store.triples((None, None, None)))
            res2 = set(store.triples(trp))
            assert len(res1) == 1
            assert len(res2) == 1
            clean_res1 = _clean(res1)
            clean_res2 = _clean(res2)
            assert trp in clean_res1 & clean_res2
Example #19
0
def sparql_query(qry_str, fmt):
    '''
    Send a SPARQL query to the triplestore.

    @param qry_str (str) SPARQL query string. SPARQL 1.1 Query Language
    (https://www.w3.org/TR/sparql11-query/) is supported.
    @param fmt(string) Serialization format. This varies depending on the
    query type (SELECT, ASK, CONSTRUCT, etc.). [@TODO Add reference to RDFLib
    serialization formats]

    @return BytesIO
    '''
    with TxnManager(rdf_store) as txn:
        qres = rdfly.raw_query(qry_str)
        out_stream = BytesIO(qres.serialize(format=fmt))

    return out_stream
Example #20
0
def sparql_query(qry_str, fmt):
    """
    Send a SPARQL query to the triplestore.

    :param str qry_str: SPARQL query string. SPARQL 1.1 Query Language
        (https://www.w3.org/TR/sparql11-query/) is supported.
    :param str fmt: Serialization format. This varies depending on the
        query type (SELECT, ASK, CONSTRUCT, etc.). [TODO Add reference to
        RDFLib serialization formats]

    :rtype: BytesIO
    :return: Serialized SPARQL results.
    """
    with TxnManager(rdf_store) as txn:
        qres = rdfly.raw_query(qry_str)
        out_stream = BytesIO(qres.serialize(format=fmt))

    return out_stream
Example #21
0
    def test_triple_no_match(self, store):
        '''
        Test various mismatches.
        '''
        with TxnManager(store, True) as txn:
            store.add((
                URIRef('urn:test:s'),
                URIRef('urn:test:p2'), URIRef('urn:test:o2')))
            store.add((
                URIRef('urn:test:s3'),
                URIRef('urn:test:p3'), URIRef('urn:test:o3')))
            res1 = set(store.triples((None, None, None)))
            assert len(res1) == 3

            res1 = set(store.triples(
                (URIRef('urn:test:s2'), URIRef('urn:test:p'), None)))
            res2 = set(store.triples(
                (URIRef('urn:test:s3'), None, URIRef('urn:test:o'))))
            res3 = set(store.triples(
                (None, URIRef('urn:test:p3'), URIRef('urn:test:o2'))))

            assert len(res1) == len(res2) == len(res3) == 0
Example #22
0
def bootstrap():
    """
    Bootstrap binary and graph stores.

    This script will parse configuration files and initialize a filesystem and
    triplestore with an empty FCREPO repository.
    It is used in test suites and on a first run.

    Additional scaffolding files may be parsed to create initial contents.
    """
    import lakesuperior.env_setup

    rdfly = env.app_globals.rdfly
    nonrdfly = env.app_globals.nonrdfly

    click.echo(
        click.style('WARNING: This operation will WIPE ALL YOUR DATA.\n',
                    bold=True,
                    fg='red') +
        'Are you sure? (Please type `yes` to continue) > ',
        nl=False)
    choice = input().lower()
    if choice != 'yes':
        click.echo('Aborting.')
        sys.exit(1)

    click.echo('Initializing graph store at {}'.format(rdfly.store.path))
    with TxnManager(env.app_globals.rdf_store, write=True) as txn:
        rdfly.bootstrap()
        rdfly.store.close()
    click.echo('Graph store initialized.')

    click.echo('Initializing binary store at {}'.format(nonrdfly.root))
    nonrdfly.bootstrap()
    click.echo('Binary store initialized.')
    click.echo('Repository successfully set up. Go to town.')
Example #23
0
    def _crawl(self, uid):
        """
        Get the contents of a resource and its relationships recursively.

        This method recurses into itself each time a reference to a resource
        managed by the repository is encountered.

        :param str uid: The path relative to the source server webroot
            pointing to the resource to crawl, effectively the resource UID.
        """
        ibase = str(nsc['fcres'])
        # Public URI of source repo.
        uri = self.src + uid
        # Internal URI of destination.
        iuri = ibase + uid

        rsp = requests.head(uri)
        if not self.skip_errors:
            rsp.raise_for_status()
        elif rsp.status_code > 399:
            print('Error retrieving resource {} headers: {} {}'.format(
                uri, rsp.status_code, rsp.text))

        # Determine LDP type.
        ldp_type = 'ldp_nr'
        try:
            for link in requests.utils.parse_header_links(
                    rsp.headers.get('link')):
                if (link.get('rel') == 'type'
                        and (link.get('url') == str(nsc['ldp'].RDFSource)
                             or link.get('url') == str(nsc['ldp'].Container))):
                    # Resource is an LDP-RS.
                    ldp_type = 'ldp_rs'
                    break
        except TypeError:
            ldp_type = 'ldp_rs'
            #raise ValueError('URI {} is not an LDP resource.'.format(uri))

        # Get the whole RDF document now because we have to know all outbound
        # links.
        get_uri = (uri
                   if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri))
        get_rsp = requests.get(get_uri)
        if not self.skip_errors:
            get_rsp.raise_for_status()
        elif get_rsp.status_code > 399:
            print('Error retrieving resource {} body: {} {}'.format(
                uri, get_rsp.status_code, get_rsp.text))

        data = get_rsp.content.replace(self.src.encode('utf-8'),
                                       ibase.encode('utf-8'))
        gr = Graph(identifier=iuri).parse(data=data, format='turtle')

        # Store raw graph data. No checks.
        with TxnManager(self.rdfly.store, True):
            self.rdfly.modify_rsrc(uid, add_trp=set(gr))

        # Grab binary and set new resource parameters.
        if ldp_type == 'ldp_nr':
            provided_imr = gr.resource(URIRef(iuri))
            if self.zero_binaries:
                data = b''
            else:
                bin_rsp = requests.get(uri)
                if not self.skip_errors:
                    bin_rsp.raise_for_status()
                elif bin_rsp.status_code > 399:
                    print('Error retrieving resource {} body: {} {}'.format(
                        uri, bin_rsp.status_code, bin_rsp.text))
                data = bin_rsp.content
            #import pdb; pdb.set_trace()
            uuid = str(gr.value(URIRef(iuri),
                                nsc['premis'].hasMessageDigest)).split(':')[-1]
            fpath = self.nonrdfly.local_path(self.nonrdfly.config['path'],
                                             uuid)
            makedirs(path.dirname(fpath), exist_ok=True)
            with open(fpath, 'wb') as fh:
                fh.write(data)

        self._ct += 1
        if self._ct % 10 == 0:
            print('{} resources processed so far.'.format(self._ct))

        # Now, crawl through outbound links.
        # LDP-NR fcr:metadata must be checked too.
        for pred, obj in gr.predicate_objects():
            #import pdb; pdb.set_trace()
            obj_uid = obj.replace(ibase, '')
            with TxnManager(self.rdfly.store, True):
                conditions = bool(
                    isinstance(obj, URIRef) and obj.startswith(iuri)
                    # Avoid ∞ loop with fragment URIs.
                    and str(urldefrag(obj).url) != str(iuri)
                    # Avoid ∞ loop with circular references.
                    and not self.rdfly.ask_rsrc_exists(obj_uid)
                    and pred not in self.ignored_preds)
            if conditions:
                print('Object {} will be crawled.'.format(obj_uid))
                self._crawl(urldefrag(obj_uid).url)
Example #24
0
    def __init__(self,
                 src,
                 dest,
                 zero_binaries=False,
                 compact_uris=False,
                 skip_errors=False):
        """
        Set up base paths and clean up existing directories.

        :param rdflib.URIRef src: Webroot of source repository. This must
            correspond to the LDP root node (for Fedora it can be e.g.
            ``http://localhost:8080fcrepo/rest/``) and is used to determine if
            URIs retrieved are managed by this repository.
        :param str dest: Destination repository path. If the location exists
            it must be a writable directory. It will be deleted and recreated.
            If it does not exist, it will be created along with its parents if
            missing.
        :param str binary_handling: One of ``include``, ``truncate`` or
            ``split``.
        :param bool compact_uris: NOT IMPLEMENTED. Whether the process should
            attempt to compact URIs generated with broken up path segments. If
            the UID matches a pattern such as ``/12/34/56/123456...`` it is
            converted to ``/123456...``. This would remove a lot of cruft
            caused by the pairtree segments. Note that this will change the
            publicly exposed URIs. If durability is a concern, a rewrite
            directive can be added to the HTTP server that proxies the WSGI
            endpoint.
        """
        # Set up repo folder structure and copy default configuration to
        # destination file.
        cur_dir = path.dirname(path.dirname(path.abspath(__file__)))
        self.dbpath = '{}/data/ldprs_store'.format(dest)
        self.fpath = '{}/data/ldpnr_store'.format(dest)
        self.config_dir = '{}/etc'.format(dest)

        shutil.rmtree(dest, ignore_errors=True)
        shutil.copytree('{}/etc.defaults'.format(cur_dir), self.config_dir)

        # Modify and overwrite destination configuration.
        orig_config, _ = parse_config(self.config_dir)
        orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
        orig_config['application']['store']['ldp_nr']['path'] = self.fpath

        with open('{}/application.yml'.format(self.config_dir), 'w') \
                as config_file:
            config_file.write(yaml.dump(orig_config['application']))

        env.config = parse_config(self.config_dir)[0]
        env.app_globals = AppGlobals(env.config)

        self.rdfly = env.app_globals.rdfly
        self.nonrdfly = env.app_globals.nonrdfly

        with TxnManager(env.app_globals.rdf_store, write=True) as txn:
            self.rdfly.bootstrap()
            self.rdfly.store.close()
        env.app_globals.nonrdfly.bootstrap()

        self.src = src.rstrip('/')
        self.zero_binaries = zero_binaries
        self.skip_errors = skip_errors