def initgraphconfig(self, rev): """Initialize graph settings. Public method to initalize graph settings. This method will be run only once. """ if self.graphconf is None: self.graphconf = Graph() self.nsMngrGraphconf = NamespaceManager(self.graphconf) self.nsMngrGraphconf.bind('', self.quit, override=False) graph_files, config_files, rdf_files = self.get_blobs_from_repository( rev) if len(graph_files) == 0 and len(config_files) == 0: self.mode = 'graphfiles' elif len(graph_files) > 0 and len(config_files) > 0: raise InvalidConfigurationError( "Conflict. Found graphfiles and QuitStore configuration file.") elif len(graph_files) > 0: self.mode = 'graphfiles' self.__init_graph_conf_with_blobs(graph_files, rev) elif len(config_files) == 1: self.mode = 'configuration' self.__init_graph_conf_from_configuration(config_files[0], rdf_files) else: raise InvalidConfigurationError( "Conflict. Found more than one QuitStore configuration file.")
def __setgraphsfromconf(self): """Set all URIs and file paths of graphs that are configured in config.ttl.""" nsQuit = 'http://quit.aksw.org/vocab/' query = 'SELECT DISTINCT ?graphuri ?filename WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . ' query += ' ?graph <' + nsQuit + 'graphFile> ?filename . ' query += '}' result = self.graphconf.query(query) repopath = self.getRepoPath() for row in result: filename = str(row['filename']) format = guess_format(filename) if format not in ['nt', 'nquads']: break graphuri = str(row['graphuri']) graphFile = join(repopath, filename) if isfile(graphFile): # everything is fine pass else: try: open(graphFile, 'a+').close() except PermissionError: raise InvalidConfigurationError( "Permission denied. Can't create file {} in repo {}". format(graphFile, self.getRepoPath())) except FileNotFoundError: raise InvalidConfigurationError( "File not found. Can't create file {} in repo {}". format(graphFile, self.getRepoPath())) except Exception as e: raise UnknownConfigurationError( "Can't create file {} in repo {}. Error: {}".format( graphFile, self.getRepoPath(), e)) graphuri = URIRef(graphuri) # we store which named graph is serialized in which file self.graphs[graphuri] = filename # and furthermore we assume that one file can contain data of more # than one named graph and so we store for each file a set of graphs if filename in self.files: self.files[filename]['graphs'].append(graphuri) else: self.files[filename] = { 'serialization': format, 'graphs': [graphuri] } return
def __get_uri_from_graphfile_blob(self, oid): """Search for a graph uri in graph file and return it. Args ---- oid: String oid of a graph file Returns ------- graphuri: String with the graph URI """ try: blob = self.repository.get(oid) except ValueError: logger.debug( "Object with OID {} not found in repository.".format(oid)) return content = blob.read_raw().decode().strip() if content and _is_valid_uri(content): return content raise InvalidConfigurationError( "No graph URI found in blob with OID {}.".format(oid))
def __init_graph_conf_from_configuration(self, configfileId, known_blobs): """Init graphs with setting from config.ttl.""" try: configfile = self.repository.get(configfileId) except Exception as e: raise InvalidConfigurationError( "Blob for configfile with id {} not found in repository {}". format(configfileId, e)) content = configfile.read_raw() try: self.graphconf.parse(data=content, format='turtle') except Exception as e: raise InvalidConfigurationError( "Configfile could not be parsed {} {}".format(configfileId, e)) nsQuit = 'http://quit.aksw.org/vocab/' query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . ' query += ' ?graph <' + nsQuit + 'graphFile> ?filename . ' query += ' OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} ' query += '}' result = self.graphconf.query(query) for row in result: filename = str(row['filename']) if row['format'] is None: format = guess_format(filename) else: format = str(row['format']) if format != 'nt': break if filename not in known_blobs.keys(): break graphuri = URIRef(str(row['graphuri'])) # we store which named graph is serialized in which file self.graphs[graphuri] = filename self.files[filename] = { 'serialization': format, 'graph': graphuri, 'oid': known_blobs[filename] }
def __initstoreconfig(self, namespace, repository, targetdir, configfile, configmode): """Initialize store settings.""" if isAbsoluteUri(namespace): self.namespace = namespace else: raise InvalidConfigurationError( "Quit expects an absolute http(s) base namespace, {} is not absolute." .format(namespace)) if configfile and isfile(configfile): try: self.sysconf.parse(configfile, format='turtle') except notation3.BadSyntax: raise InvalidConfigurationError( "Bad syntax. Configuration file could not be parsed. {}". format(configfile)) except PermissionError: raise InvalidConfigurationError( "Configuration file could not be parsed. Permission denied. {}" .format(configfile)) except Exception as e: raise UnknownConfigurationError( "UnknownConfigurationError: {}".format(e)) self.configfile = configfile else: if not targetdir: raise InvalidConfigurationError( 'No target directory for git repo given') if configmode: self.setConfigMode(configmode) if targetdir: self.setRepoPath(targetdir) if repository: self.setGitOrigin(repository) return
def __initgraphsfromconf(self, configfile): """Init graphs with setting from config.ttl.""" if not isfile(configfile): raise MissingConfigurationError( "Configfile is missing {}".format(configfile)) try: self.graphconf.parse(configfile, format='turtle') except Exception as e: raise InvalidConfigurationError( "Configfile could not be parsed {} {}".format(configfile, e)) # Get Graphs self.__setgraphsfromconf()
def __initstoreconfig(self, repository=None, targetdir=None, configfile=None, configmode=None): """Initialize store settings.""" if isfile(configfile): try: self.sysconf.parse(configfile, format='turtle') except notation3.BadSyntax: raise InvalidConfigurationError( "Bad syntax. Configuration file could not be parsed. {}". format(configfile)) except PermissionError: raise InvalidConfigurationError( "Configuration file could not be parsed. Permission denied. {}" .format(configfile)) except Exception as e: raise UnknownConfigurationError( "UnknownConfigurationError: {}".format(e)) self.configfile = configfile else: if not targetdir: raise InvalidConfigurationError( 'No target directory for git repo given') if configmode: self.setConfigMode(configmode) if targetdir: self.setRepoPath(targetdir) if repository: self.setGitOrigin(repository) return
def __initgraphconfig(self, repository=None, targetdir=None): """Initialize graph settings.""" self.graphconf = Graph() configmode = self.getConfigMode() if configmode == 'localconfig': self.__initgraphsfromconf(self.configfile) elif configmode == 'repoconfig': remConfigFile = join(self.getRepoPath(), 'config.ttl') self.__initgraphsfromconf(remConfigFile) elif configmode == 'graphfiles': self.__initgraphsfromdir(self.getRepoPath()) else: raise InvalidConfigurationError('This mode is not supported.', self.configmode) return
def initialize(args): """Build all needed objects. Returns: A dictionary containing the store object and git repo object. """ if args.verbose: ch.setLevel(logging.INFO) logger.addHandler(ch) logger.debug('Loglevel: INFO') if args.verboseverbose: ch.setLevel(logging.DEBUG) logger.addHandler(ch) logger.debug('Loglevel: DEBUG') # add the handlers to the logger if args.logfile: try: fh = logging.FileHandler(args.logfile) fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logger.addHandler(fh) logger.debug("Logfile: {}".format(args.logfile)) except FileNotFoundError: logger.error("Logfile not found: {}".format(args.logfile)) sys.exit('Exiting quit') except PermissionError: logger.error("Can not create logfile: {}".format(args.logfile)) sys.exit('Exiting quit') if args.disableversioning: logger.info('Versioning: disabled') v = False else: logger.info('Versioning: enabled') v = True try: config = QuitConfiguration( versioning=v, configfile=args.configfile, targetdir=args.targetdir, repository=args.repourl, configmode=args.configmode, ) except InvalidConfigurationError as e: logger.error(e) sys.exit('Exiting quit') gitrepo = GitRepo( path=config.getRepoPath(), origin=config.getOrigin() ) try: gitrepo = GitRepo( path=config.getRepoPath(), origin=config.getOrigin() ) except Exception as e: raise InvalidConfigurationError(e) if args.garbagecollection: try: with subprocess.Popen( ["git", "config", "gc.auto"], stdout=subprocess.PIPE, cwd=config.getRepoPath() ) as gcAutoThresholdProcess: stdout, stderr = gcAutoThresholdProcess.communicate() gcAutoThreshold = stdout.decode("UTF-8").strip() if not gcAutoThreshold: gcAutoThreshold = 256 subprocess.Popen( ["git", "config", "gc.auto", str(gcAutoThreshold)], cwd=config.getRepoPath() ) logger.info("Set default gc.auto threshold {}".format(gcAutoThreshold)) gitrepo.gc = True logger.info( "Garbage Collection is enabled with gc.auto threshold {}".format( gcAutoThreshold ) ) except Exception as e: # Disable garbage collection for the rest of the run because it # is likely that git is not available logger.info('Git garbage collection could not be configured and was disabled') logger.debug(e) # since repo is handled, we can add graphs to config config.initgraphconfig() store = initializeMemoryStore(config) # Save file objects per file filereferences = {} for file in config.getfiles(): graphs = config.getgraphuriforfile(file) content = [] for graph in graphs: content += store.getgraphcontent(graph) fileobject = FileReference(join(config.getRepoPath(), file)) # TODO: Quick Fix, add sorting to FileReference fileobject.setcontent(sorted(content)) filereferences[file] = fileobject logger.info('QuitStore successfully running.') logger.info('Known graphs: ' + str(config.getgraphs())) logger.info('Known files: ' + str(config.getfiles())) logger.debug('Path of Gitrepo: ' + config.getRepoPath()) logger.debug('Config mode: ' + str(config.getConfigMode())) logger.debug('All RDF files found in Gitepo:' + str(config.getgraphsfromdir())) updateConfig(store, config, gitrepo, filereferences)