def get_events(self): new_events = [] for path in self._watched: ievents = inotifyx.get_events(self._ifd, 0) for event in ievents: # if `name` is empty, it's the same directory abspath = os.path.join(path, event.name) url = Url(abspath) if event.name else self.url new_events.append((url, event.mask)) if (self._recurse and event.mask & inotifyx.IN_ISDIR and event.mask & inotifyx.IN_CREATE): # A new directory has been created. Add a watch # for it too and for all its subdirectories. Also, # we need to trigger new events for any file # created in it. for (rootdir, dirnames, filenames) in os.walk(abspath): for dirname in dirnames: self.watch(os.path.join(rootdir, dirname)) for filename in filenames: # Trigger a fake event new_events.append( (Url(os.path.join(rootdir, filename)), events['IN_CLOSE_WRITE'] | events['IN_CREATE'])) return new_events
def _get_events_file(self, path): if not os.path.exists(path): self._watched.pop(path) return [(Url(path), 'deleted')] event = None mtime = os.stat(path).st_mtime if mtime > self._watched[path]: event = 'modified' self._watched[path] = mtime if event: return [(Url(path), event)] else: return []
def make_store(uri, *args, **extra_args): """ Factory producing concrete `Store`:class: instances. Given a URL and (optionally) initialization arguments, return a fully-constructed `Store`:class: instance. The only required argument is `uri`; if any other arguments are present in the function invocation, they are passed verbatim to the constructor associated with the scheme of the given `uri`. Example:: >>> fs1 = make_store('file:///tmp') >>> fs1.__class__.__name__ 'FilesystemStore' Argument `uri` can also consist of a path name, in which case a URL scheme 'file:///' is assumed:: >>> fs2 = make_store('/tmp') >>> fs2.__class__.__name__ 'FilesystemStore' """ if not isinstance(uri, Url): uri = Url(uri) # create and return store try: # hard-code schemes that are supported by GC3Pie itself if uri.scheme == 'file': import gc3libs.persistence.filesystem return gc3libs.persistence.filesystem.make_filesystemstore( uri, *args, **extra_args) elif uri.scheme in [ # XXX: list all supported SQLAlchemy back-ends 'firebird', 'mssql', 'mysql', 'oracle', 'postgres', 'sqlite', ]: import gc3libs.persistence.sql return gc3libs.persistence.sql.make_sqlstore( uri, *args, **extra_args) else: try: return _registered_store_ctors[uri.scheme](uri, *args, **extra_args) except KeyError: gc3libs.log.error( "Unknown URL scheme '%s' in" " `gc3libs.persistence.make_store`:" " has never been registered.", uri.scheme) raise except Exception as err: gc3libs.log.error("Error constructing store for URL '%s': %s: %s", uri, err.__class__.__name__, err) raise
def make_poller(url, **extra): """ Factory method that returns the registered poller for the specified :py:mod:`gc3libs.url.Url`. """ url = Url(url) try: registered = _AVAILABLE_POLLERS[url.scheme] except KeyError: raise ValueError( "No poller associated with scheme `{0}`" .format(url.scheme)) for cls in registered: try: poller = cls(url, **extra) gc3libs.log.debug("Using class %s to poll URL %s", cls, url) return poller except Exception as err: gc3libs.log.debug( "Could not create poller for scheme `%s` with class %s: %s", url.scheme, cls, err) raise ValueError( "No registered class could be instanciated to poll URL {0}" .format(url))
def _get_events_file(self, path): if not os.path.exists(path): self._watched.pop(path) return [(Url(path), events['IN_DELETE'])] event = 0 info = os.stat(path) if info.st_mtime > self._watched[path].st_mtime: event |= events['IN_MODIFY'] if info.st_ctime > self._watched[path].st_ctime: event |= events['IN_ATTRIB'] self._watched[path] = info if event: return [(Url(path), event)] else: return []
def setUp(self): with NamedTemporaryFile(prefix='gc3libs.', suffix='.tmp') as tmp: self.tmpfile = tmp.name self.db_url = Url('sqlite://%s' % self.tmpfile) self.store = self._make_store() # create a connection to the database self.conn = self.store._engine.connect() yield self.conn.close()
def __init__(self, url, mask, **kw): super(SwiftPoller, self).__init__(url, mask, **kw) try: self.username, self.project_name = self.url.username.split('+') except ValueError: raise gc3libs.exceptions.InvalidValue( "Missing project/tenant name in SWIFT URL '{0}'".format( self.url)) self.password = self.url.password self.container = self.url.query if not self.container: raise gc3libs.exceptions.InvalidValue( "Missing bucket name in SWIFT URL '{0}'".format(self.url)) # also check for hostname and password? if url.scheme in ['swifts', 'swts']: auth_url = 'https://%s' % self.url.hostname else: auth_url = 'http://%s' % self.url.hostname if self.url.port: auth_url += ":%d" % self.url.port if self.url.path: auth_url += self.url.path self.auth_url = auth_url self.conn = swiftclient.Connection(authurl=self.auth_url, user=self.username, key=self.password, os_options={ "auth_url": self.auth_url, "project_name": self.project_name, "username": self.username, "password": self.password }, auth_version='2') # List containers accstat, containers = self.conn.get_account() gc3libs.log.debug( "Successfully connected to SWIFT storage '%s':" " %d containers found", self.conn.url, len(containers)) if self.container not in [a.get('name') for a in containers]: gc3libs.log.warning("Container %s not found at SWIFT URL '%s'", self.container, self.url) constat, objects = self.conn.get_container(self.container) self._known_objects = {} for obj in objects: url = Url(str(self.url) + '&name=%s' % obj['name']) self._known_objects[url] = obj
def setUp(self): fd, self.tmpfile = tempfile.mkstemp() self.db_url = Url('sqlite://%s' % self.tmpfile) self.store = self._make_store() # create a connection to the database self.conn = self.store._engine.connect() yield self.conn.close() os.remove(self.tmpfile)
def make_poller(url, mask=events['IN_ALL_EVENTS'], **kw): """ Factory method that returns the registered poller for the specified :py:mod:`gc3libs.url.Url`. """ url = Url(url) try: cls = _available_pollers[url.scheme] except KeyError: raise ValueError("No poller associated with scheme `{0}`".format( url.scheme)) return cls(url, mask, **kw)
def __init__(self, directory=gc3libs.Default.JOBS_DIR, idfactory=IdFactory(), protocol=DEFAULT_PROTOCOL, **extra_args): if isinstance(directory, Url): super(FilesystemStore, self).__init__(directory) directory = directory.path else: super(FilesystemStore, self).__init__( Url(scheme='file', path=os.path.abspath(directory))) self._directory = directory self.idfactory = idfactory self._protocol = protocol
def create_gc3pie_sql_store(): '''Creates a `Store` instance for job persistence in the PostgreSQL table :class:`Tasks <tmlib.models.submission.Tasks>`. Returns ------- gc3libs.persistence.sql.SqlStore `GC3Pie` store Warning ------- The "tasks" table must already exist. ''' logger.debug('create GC3Pie store using "tasks" table') store_url = Url(cfg.db_master_uri) table_columns = tm.Task.__table__.columns now = datetime.now() return make_sqlstore( url=store_url, table_name='tasks', extra_fields={ table_columns['name']: lambda task: task.jobname, table_columns['exitcode']: lambda task: task.execution.exitcode, table_columns['time']: lambda task: _get_task_time(task, 'duration'), table_columns['memory']: lambda task: _get_task_memory(task, 'max_used_memory'), table_columns['cpu_time']: lambda task: _get_task_time(task, 'used_cpu_time'), table_columns['submission_id']: lambda task: task.submission_id, table_columns['parent_id']: lambda task: task.parent_id, table_columns['is_collection']: lambda task: hasattr(task, 'tasks'), table_columns['type']: lambda task: type(task).__name__, table_columns['created_at']: lambda task: now, table_columns['updated_at']: lambda task: datetime.now() } )
def _get_events_dir(self, dirpath): new_events = [] # check for new files contents = set( os.path.join(dirpath, entry) for entry in os.listdir(dirpath) if entry not in ['.', '..']) for path in contents: if path not in self._watched: # we cannot really know when a user is done writing # so `IN_CLOSE_WRITE` is technically incorrect here event = events['IN_CLOSE_WRITE'] | events['IN_CREATE'] if os.path.isdir(path): event |= events['IN_ISDIR'] new_events.append((Url(path), event)) self.watch(path) return new_events
def setUp(self): fd, tmpfile = tempfile.mkstemp() os.remove(tmpfile) self.table_name = tmpfile.split('/')[-1] try: self.db_url = Url('mysql://*****:*****@localhost/gc3') self.store = make_store(self.db_url, table_name=self.table_name) except sqlalchemy.exc.OperationalError: pytest.mark.skip("Cannot connect to MySQL database.") # create a connection to the database self.conn = self.store._engine.connect() yield self.conn.execute('drop table `%s`' % self.table_name) self.conn.close()
def _get_events_dir(self, dirpath): new_events = [] # check for new files contents = set( os.path.join(dirpath, entry) for entry in os.listdir(dirpath) if entry not in ['.', '..'] ) for path in contents: if path not in self._watched: new_events.append((Url(path), 'created')) # if path is a directory, add it to watch list only if # poller was created with `recurse=True` if (not os.path.isdir(path)) or self._recurse: self.watch(path) return new_events
def parse_args(self): self.valid_extensions = [ i.strip() for i in self.params.valid_extensions.split(',') ] if not self.params.inbox: # Add a default inbox if not passed from command line self.params.inbox = [ Url(os.path.join(self.params.working_dir, 'inbox')) ] if self.params.output == self.actions['output'].default: # Use directory 'output' as output directory by default self.params.output = os.path.join(self.params.working_dir, 'output') if int(self.params.threshold2) <= int(self.params.threshold1): gc3libs.exceptions.InvalidUsage( "Value of `--threshold2` should be greater than `--threshold1`" )
def get_new_events(self): new_events = [] ievents = self._ifd.read(0) cumulative = defaultdict(int) for ievent in ievents: try: path = os.path.join(self._wd[ievent.wd], ievent.name) except KeyError: raise AssertionError( "Received event {0} for unknown watch descriptor {1}" .format(ievent, ievent.wd)) # if `name` is empty, it's the same directory cumulative[path] |= ievent.mask accumulated = cumulative[path] # we want to dispatch a single `created` or `modified` # event, so check once the file is closed what past events # have been recorded if (ievent.mask & inotify.flags.CLOSE_WRITE): if (accumulated & inotify.flags.CREATE): new_events.append( self.__make_event(path, 'created')) elif (accumulated & inotify.flags.MODIFY): new_events.append( self.__make_event(path, 'modified')) if (ievent.mask & inotify.flags.DELETE): new_events.append( self.__make_event(path, 'deleted')) if (self._recurse and ievent.mask & inotify.flags.ISDIR and ievent.mask & inotify.flags.CREATE): # A new directory has been created. Add a watch # for it too and for all its subdirectories. Also, # we need to trigger new events for any file # created in it. for (rootdir, dirnames, filenames) in os.walk(path): for dirname in dirnames: self.watch(os.path.join(rootdir, dirname)) for filename in filenames: # report creation event new_events.append( (Url(os.path.join(rootdir, filename)), 'created')) return new_events
def get_new_events(self): # List objects in container _, objects = self.conn.get_container(self.container) newevents = [] objurls = [] for obj in objects: url = Url('{baseurl}&name={objname}' .format(baseurl=self.url, objname=obj['name'])) objurls.append(url) if url not in self._known_objects: self._known_objects[url] = obj newevents.append((url, 'created')) for url in list(self._known_objects): if url not in objurls: newevents.append((url, 'deleted')) self._known_objects.pop(url) return newevents
def setUp(self): # generate random table name from string import ascii_letters as letters import random self.table_name = 'test_' + (''.join( random.choice(letters) for _ in range(10))) try: self.db_url = Url('mysql://*****:*****@localhost/gc3') self.store = make_store(self.db_url, table_name=self.table_name) except sqlalchemy.exc.OperationalError: pytest.mark.skip("Cannot connect to MySQL database.") # create a connection to the database self.conn = self.store._engine.connect() yield self.conn.execute('drop table `%s`' % self.table_name) self.conn.close()
def create_gc3pie_sql_store(): '''Creates a `Store` instance for job persistence in the PostgreSQL table :class:`Tasks <tmlib.models.submission.Tasks>`. Returns ------- gc3libs.persistence.sql.SqlStore `GC3Pie` store Warning ------- The "tasks" table must already exist. ''' logger.debug('create GC3Pie store using "tasks" table') store_url = Url(cfg.db_master_uri) return make_sqlstore( url=store_url, table_name='tasks', extra_fields=get_gc3pie_store_extra_fields(), )
def get_events(self): # List objects in container constat, objects = self.conn.get_container(self.container) newevents = [] objurls = [] for obj in objects: url = Url(str(self.url) + '&name=' + obj['name']) objurls.append(url) if url not in self._known_objects: self._known_objects[url] = obj # Here it's correct not to put IN_CREATE because on # swift you will see an object only when it has been # completely uploaded. newevents.append((url, events['IN_CLOSE_WRITE'])) for url in list(self._known_objects): if url not in objurls: newevents.append((url, events['IN_DELETE'])) self._known_objects.pop(url) return newevents
def new_tasks(self, extra, epath=None, emask=0): extra['rparams'] = { 'memory': str(self.params.memory_per_core.amount(unit=gc3libs.quantity.MB)), 'fps': self.params.fps, 'pixel_to_scale': self.params.pixel_to_scale, 'difference_lag': self.params.difference_lag, 'threshold1': self.params.threshold1, 'threshold2': self.params.threshold2, } if not epath: # At startup, scan all the input directories and check if # there is a file which is not processed yet. # First, check which files we already did known_videos = {i.get('videofile', None): i for i in self.session} new_jobs = [] for inboxurl in self.params.inbox: # WARNING: we assume this is a filesystem directory try: inbox = inboxurl.path except: inboxurl = Url(inboxurl) inbox = inboxurl.path for dirpath, dirnames, fnames in os.walk(inbox): for fname in fnames: filename = os.path.join(dirpath, fname) if filename.rsplit('.', 1)[-1] not in self.valid_extensions: continue if filename.startswith('._'): self.log.warning( "Ignoring file %s as it starts with '._'", filename) continue if filename not in known_videos: app = BemoviWorkflow(filename, self.params.email_from, self.params.smtp_server, **extra) new_jobs.append(app) known_videos[filename] = app else: # In case it exists, but the application # termianted with an exit code, then we # want to resubmit the job anyway job = known_videos[filename] if job.should_resubmit(): self.log.info( "File %s might have been overwritten. Resubmitting job %s", filename, job.persistent_id) # self._controller.kill(job) # self._controller.progress() # self._controller.redo(job, from_stage=0) return new_jobs fpath = epath.path if emask & plr.events['IN_CLOSE_WRITE']: if fpath.rsplit('.', 1)[-1] not in self.valid_extensions: self.log.info( "Ignoring file %s as it does not end with a valid extension (%s)", fpath, str.join(',', self.valid_extensions)) return [] # Only resubmit the job if it failed for job in self.session.tasks.values(): try: job.videofile except AttributeError: # Not a GBemoviWorkflow application continue if job.videofile == fpath: if job.should_resubmit(): self.log.info( "Re-submitting job %s as file %s has been overwritten", job.persistent_id, fpath) job.update_configuration() self._controller.kill(job) self._controller.progress() self._controller.redo(job, from_stage=0) else: self.log.info( "Ignoring already successfully processed file %s", fpath) # In both case, do not return any new job return [] return [ BemoviWorkflow(fpath, self.params.email_from, self.params.smtp_server, **extra) ] return []
def make_store(uri, *args, **extra_args): """ Factory producing concrete `Store`:class: instances. Given a URL and (optionally) initialization arguments, return a fully-constructed `Store`:class: instance. The only required argument is `uri`; if any other arguments are present in the function invocation, they are passed verbatim to the constructor associated with the scheme of the given `uri`. Example:: >>> fs1 = make_store('file:///tmp') >>> fs1.__class__.__name__ 'FilesystemStore' Argument `uri` can also consist of a path name, in which case a URL scheme 'file:///' is assumed:: >>> fs2 = make_store('/tmp') >>> fs2.__class__.__name__ 'FilesystemStore' """ if not isinstance(uri, Url): uri = Url(uri) # since SQLAlchemy allows URIs of the form `db+driver://...` # (e.g., `postresql+psycopg://...`) we need to examine the URI # scheme only up to the first `+` scheme = uri.scheme.split('+')[0] try: # hard-code schemes that are supported by GC3Pie itself if uri.scheme == 'file': import gc3libs.persistence.filesystem return gc3libs.persistence.filesystem.make_filesystemstore( uri, *args, **extra_args) elif scheme in [ # DBs supported in SQLAlchemy core as of version 1.1, # see: http://docs.sqlalchemy.org/en/latest/dialects/index.html 'firebird', 'mssql', 'mysql', 'oracle', 'postgresql', 'sqlite', 'sybase', ]: import gc3libs.persistence.sql return gc3libs.persistence.sql.make_sqlstore( uri, *args, **extra_args) else: try: return _registered_store_ctors[uri.scheme](uri, *args, **extra_args) except KeyError: gc3libs.log.error( "Unknown URL scheme '%s' in" " `gc3libs.persistence.make_store`:" " has never been registered.", uri.scheme) raise except Exception as err: gc3libs.log.error("Error constructing store for URL '%s': %s: %s", uri, err.__class__.__name__, err) raise
def __init__(self, url=None): if url and not isinstance(url, Url): url = Url(url) self.url = url
def __init__(self, url, **kw): self.url = Url(url)
def __make_event(self, relpath, event): if relpath: url = Url(os.path.join(self.url.path, relpath)) else: url = self.url return (url, event)
def __init__(self, url, mask, **kw): self.url = Url(url) self.mask = mask