Exemple #1
0
    def initgraphconfig(self, rev):
        """Initialize graph settings.

        Public method to initalize graph settings. This method will be run only once.
        """
        if self.graphconf is None:
            self.graphconf = Graph()
            self.nsMngrGraphconf = NamespaceManager(self.graphconf)
            self.nsMngrGraphconf.bind('', self.quit, override=False)

        graph_files, config_files, rdf_files = self.get_blobs_from_repository(
            rev)

        if len(graph_files) == 0 and len(config_files) == 0:
            self.mode = 'graphfiles'
        elif len(graph_files) > 0 and len(config_files) > 0:
            raise InvalidConfigurationError(
                "Conflict. Found graphfiles and QuitStore configuration file.")
        elif len(graph_files) > 0:
            self.mode = 'graphfiles'
            self.__init_graph_conf_with_blobs(graph_files, rev)
        elif len(config_files) == 1:
            self.mode = 'configuration'
            self.__init_graph_conf_from_configuration(config_files[0],
                                                      rdf_files)
        else:
            raise InvalidConfigurationError(
                "Conflict. Found more than one QuitStore configuration file.")
Exemple #2
0
    def __setgraphsfromconf(self):
        """Set all URIs and file paths of graphs that are configured in config.ttl."""
        nsQuit = 'http://quit.aksw.org/vocab/'
        query = 'SELECT DISTINCT ?graphuri ?filename WHERE { '
        query += '  ?graph a <' + nsQuit + 'Graph> . '
        query += '  ?graph <' + nsQuit + 'graphUri> ?graphuri . '
        query += '  ?graph <' + nsQuit + 'graphFile> ?filename . '
        query += '}'
        result = self.graphconf.query(query)

        repopath = self.getRepoPath()

        for row in result:
            filename = str(row['filename'])
            format = guess_format(filename)
            if format not in ['nt', 'nquads']:
                break

            graphuri = str(row['graphuri'])

            graphFile = join(repopath, filename)

            if isfile(graphFile):
                # everything is fine
                pass
            else:
                try:
                    open(graphFile, 'a+').close()
                except PermissionError:
                    raise InvalidConfigurationError(
                        "Permission denied. Can't create file {} in repo {}".
                        format(graphFile, self.getRepoPath()))
                except FileNotFoundError:
                    raise InvalidConfigurationError(
                        "File not found. Can't create file {} in repo {}".
                        format(graphFile, self.getRepoPath()))
                except Exception as e:
                    raise UnknownConfigurationError(
                        "Can't create file {} in repo {}. Error: {}".format(
                            graphFile, self.getRepoPath(), e))

            graphuri = URIRef(graphuri)

            # we store which named graph is serialized in which file
            self.graphs[graphuri] = filename
            # and furthermore we assume that one file can contain data of more
            # than one named graph and so we store for each file a set of graphs
            if filename in self.files:
                self.files[filename]['graphs'].append(graphuri)
            else:
                self.files[filename] = {
                    'serialization': format,
                    'graphs': [graphuri]
                }

        return
Exemple #3
0
    def __get_uri_from_graphfile_blob(self, oid):
        """Search for a graph uri in graph file and return it.

        Args
        ----
           oid: String oid of a graph file

        Returns
        -------
            graphuri: String with the graph URI

        """
        try:
            blob = self.repository.get(oid)
        except ValueError:
            logger.debug(
                "Object with OID {} not found in repository.".format(oid))
            return

        content = blob.read_raw().decode().strip()

        if content and _is_valid_uri(content):
            return content
        raise InvalidConfigurationError(
            "No graph URI found in blob with OID {}.".format(oid))
Exemple #4
0
    def __init_graph_conf_from_configuration(self, configfileId, known_blobs):
        """Init graphs with setting from config.ttl."""
        try:
            configfile = self.repository.get(configfileId)
        except Exception as e:
            raise InvalidConfigurationError(
                "Blob for configfile with id {} not found in repository {}".
                format(configfileId, e))

        content = configfile.read_raw()

        try:
            self.graphconf.parse(data=content, format='turtle')
        except Exception as e:
            raise InvalidConfigurationError(
                "Configfile could not be parsed {} {}".format(configfileId, e))
        nsQuit = 'http://quit.aksw.org/vocab/'
        query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { '
        query += '  ?graph a <' + nsQuit + 'Graph> . '
        query += '  ?graph <' + nsQuit + 'graphUri> ?graphuri . '
        query += '  ?graph <' + nsQuit + 'graphFile> ?filename . '
        query += '  OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} '
        query += '}'
        result = self.graphconf.query(query)

        for row in result:
            filename = str(row['filename'])
            if row['format'] is None:
                format = guess_format(filename)
            else:
                format = str(row['format'])
            if format != 'nt':
                break
            if filename not in known_blobs.keys():
                break

            graphuri = URIRef(str(row['graphuri']))

            # we store which named graph is serialized in which file
            self.graphs[graphuri] = filename
            self.files[filename] = {
                'serialization': format,
                'graph': graphuri,
                'oid': known_blobs[filename]
            }
Exemple #5
0
    def __initstoreconfig(self, namespace, repository, targetdir, configfile,
                          configmode):
        """Initialize store settings."""
        if isAbsoluteUri(namespace):
            self.namespace = namespace
        else:
            raise InvalidConfigurationError(
                "Quit expects an absolute http(s) base namespace, {} is not absolute."
                .format(namespace))

        if configfile and isfile(configfile):
            try:
                self.sysconf.parse(configfile, format='turtle')
            except notation3.BadSyntax:
                raise InvalidConfigurationError(
                    "Bad syntax. Configuration file could not be parsed. {}".
                    format(configfile))
            except PermissionError:
                raise InvalidConfigurationError(
                    "Configuration file could not be parsed. Permission denied. {}"
                    .format(configfile))
            except Exception as e:
                raise UnknownConfigurationError(
                    "UnknownConfigurationError: {}".format(e))

            self.configfile = configfile
        else:
            if not targetdir:
                raise InvalidConfigurationError(
                    'No target directory for git repo given')

        if configmode:
            self.setConfigMode(configmode)

        if targetdir:
            self.setRepoPath(targetdir)

        if repository:
            self.setGitOrigin(repository)

        return
Exemple #6
0
    def __initgraphsfromconf(self, configfile):
        """Init graphs with setting from config.ttl."""
        if not isfile(configfile):
            raise MissingConfigurationError(
                "Configfile is missing {}".format(configfile))

        try:
            self.graphconf.parse(configfile, format='turtle')
        except Exception as e:
            raise InvalidConfigurationError(
                "Configfile could not be parsed {} {}".format(configfile, e))

        # Get Graphs
        self.__setgraphsfromconf()
Exemple #7
0
    def __initstoreconfig(self,
                          repository=None,
                          targetdir=None,
                          configfile=None,
                          configmode=None):
        """Initialize store settings."""
        if isfile(configfile):
            try:
                self.sysconf.parse(configfile, format='turtle')
            except notation3.BadSyntax:
                raise InvalidConfigurationError(
                    "Bad syntax. Configuration file could not be parsed. {}".
                    format(configfile))
            except PermissionError:
                raise InvalidConfigurationError(
                    "Configuration file could not be parsed. Permission denied. {}"
                    .format(configfile))
            except Exception as e:
                raise UnknownConfigurationError(
                    "UnknownConfigurationError: {}".format(e))

            self.configfile = configfile
        else:
            if not targetdir:
                raise InvalidConfigurationError(
                    'No target directory for git repo given')

        if configmode:
            self.setConfigMode(configmode)

        if targetdir:
            self.setRepoPath(targetdir)

        if repository:
            self.setGitOrigin(repository)

        return
Exemple #8
0
    def __initgraphconfig(self, repository=None, targetdir=None):
        """Initialize graph settings."""
        self.graphconf = Graph()
        configmode = self.getConfigMode()

        if configmode == 'localconfig':
            self.__initgraphsfromconf(self.configfile)
        elif configmode == 'repoconfig':
            remConfigFile = join(self.getRepoPath(), 'config.ttl')
            self.__initgraphsfromconf(remConfigFile)
        elif configmode == 'graphfiles':
            self.__initgraphsfromdir(self.getRepoPath())
        else:
            raise InvalidConfigurationError('This mode is not supported.',
                                            self.configmode)
        return
Exemple #9
0
def initialize(args):
    """Build all needed objects.

    Returns:
        A dictionary containing the store object and git repo object.

    """
    if args.verbose:
        ch.setLevel(logging.INFO)
        logger.addHandler(ch)
        logger.debug('Loglevel: INFO')

    if args.verboseverbose:
        ch.setLevel(logging.DEBUG)
        logger.addHandler(ch)
        logger.debug('Loglevel: DEBUG')

    # add the handlers to the logger

    if args.logfile:
        try:
            fh = logging.FileHandler(args.logfile)
            fh.setLevel(logging.DEBUG)
            fh.setFormatter(formatter)
            logger.addHandler(fh)
            logger.debug("Logfile: {}".format(args.logfile))
        except FileNotFoundError:
            logger.error("Logfile not found: {}".format(args.logfile))
            sys.exit('Exiting quit')
        except PermissionError:
            logger.error("Can not create logfile: {}".format(args.logfile))
            sys.exit('Exiting quit')

    if args.disableversioning:
        logger.info('Versioning: disabled')
        v = False
    else:
        logger.info('Versioning: enabled')
        v = True

    try:
        config = QuitConfiguration(
            versioning=v,
            configfile=args.configfile,
            targetdir=args.targetdir,
            repository=args.repourl,
            configmode=args.configmode,
        )
    except InvalidConfigurationError as e:
        logger.error(e)
        sys.exit('Exiting quit')

    gitrepo = GitRepo(
        path=config.getRepoPath(),
        origin=config.getOrigin()
    )
    try:
        gitrepo = GitRepo(
            path=config.getRepoPath(),
            origin=config.getOrigin()
        )
    except Exception as e:
        raise InvalidConfigurationError(e)

    if args.garbagecollection:
        try:
            with subprocess.Popen(
                ["git", "config", "gc.auto"],
                stdout=subprocess.PIPE,
                cwd=config.getRepoPath()
            ) as gcAutoThresholdProcess:
                stdout, stderr = gcAutoThresholdProcess.communicate()
                gcAutoThreshold = stdout.decode("UTF-8").strip()

            if not gcAutoThreshold:
                gcAutoThreshold = 256
                subprocess.Popen(
                    ["git", "config", "gc.auto", str(gcAutoThreshold)],
                    cwd=config.getRepoPath()
                )
                logger.info("Set default gc.auto threshold {}".format(gcAutoThreshold))

            gitrepo.gc = True
            logger.info(
                "Garbage Collection is enabled with gc.auto threshold {}".format(
                    gcAutoThreshold
                )
            )
        except Exception as e:
            # Disable garbage collection for the rest of the run because it
            # is likely that git is not available
            logger.info('Git garbage collection could not be configured and was disabled')
            logger.debug(e)

    # since repo is handled, we can add graphs to config
    config.initgraphconfig()

    store = initializeMemoryStore(config)

    # Save file objects per file
    filereferences = {}

    for file in config.getfiles():
        graphs = config.getgraphuriforfile(file)
        content = []
        for graph in graphs:
            content += store.getgraphcontent(graph)
        fileobject = FileReference(join(config.getRepoPath(), file))
        # TODO: Quick Fix, add sorting to FileReference
        fileobject.setcontent(sorted(content))
        filereferences[file] = fileobject

    logger.info('QuitStore successfully running.')
    logger.info('Known graphs: ' + str(config.getgraphs()))
    logger.info('Known files: ' + str(config.getfiles()))
    logger.debug('Path of Gitrepo: ' + config.getRepoPath())
    logger.debug('Config mode: ' + str(config.getConfigMode()))
    logger.debug('All RDF files found in Gitepo:' + str(config.getgraphsfromdir()))

    updateConfig(store, config, gitrepo, filereferences)