Ejemplo n.º 1
0
 def get_events(self):
     new_events = []
     for path in self._watched:
         ievents = inotifyx.get_events(self._ifd, 0)
         for event in ievents:
             # if `name` is empty, it's the same directory
             abspath = os.path.join(path, event.name)
             url = Url(abspath) if event.name else self.url
             new_events.append((url, event.mask))
             if (self._recurse and event.mask & inotifyx.IN_ISDIR
                     and event.mask & inotifyx.IN_CREATE):
                 # A new directory has been created. Add a watch
                 # for it too and for all its subdirectories. Also,
                 # we need to trigger new events for any file
                 # created in it.
                 for (rootdir, dirnames, filenames) in os.walk(abspath):
                     for dirname in dirnames:
                         self.watch(os.path.join(rootdir, dirname))
                     for filename in filenames:
                         # Trigger a fake event
                         new_events.append(
                             (Url(os.path.join(rootdir, filename)),
                              events['IN_CLOSE_WRITE']
                              | events['IN_CREATE']))
     return new_events
Ejemplo n.º 2
0
    def _get_events_file(self, path):
        if not os.path.exists(path):
            self._watched.pop(path)
            return [(Url(path), 'deleted')]

        event = None
        mtime = os.stat(path).st_mtime
        if mtime > self._watched[path]:
            event = 'modified'
        self._watched[path] = mtime

        if event:
            return [(Url(path), event)]
        else:
            return []
Ejemplo n.º 3
0
def make_store(uri, *args, **extra_args):
    """
    Factory producing concrete `Store`:class: instances.

    Given a URL and (optionally) initialization arguments, return a
    fully-constructed `Store`:class: instance.

    The only required argument is `uri`; if any other arguments are
    present in the function invocation, they are passed verbatim to
    the constructor associated with the scheme of the given `uri`.

    Example::

      >>> fs1 = make_store('file:///tmp')
      >>> fs1.__class__.__name__
      'FilesystemStore'

    Argument `uri` can also consist of a path name, in which case a
    URL scheme 'file:///' is assumed::

      >>> fs2 = make_store('/tmp')
      >>> fs2.__class__.__name__
      'FilesystemStore'

    """
    if not isinstance(uri, Url):
        uri = Url(uri)
    # create and return store
    try:
        # hard-code schemes that are supported by GC3Pie itself
        if uri.scheme == 'file':
            import gc3libs.persistence.filesystem
            return gc3libs.persistence.filesystem.make_filesystemstore(
                uri, *args, **extra_args)
        elif uri.scheme in [
                # XXX: list all supported SQLAlchemy back-ends
                'firebird',
                'mssql',
                'mysql',
                'oracle',
                'postgres',
                'sqlite',
        ]:
            import gc3libs.persistence.sql
            return gc3libs.persistence.sql.make_sqlstore(
                uri, *args, **extra_args)
        else:
            try:
                return _registered_store_ctors[uri.scheme](uri, *args,
                                                           **extra_args)
            except KeyError:
                gc3libs.log.error(
                    "Unknown URL scheme '%s' in"
                    " `gc3libs.persistence.make_store`:"
                    " has never been registered.", uri.scheme)
                raise
    except Exception as err:
        gc3libs.log.error("Error constructing store for URL '%s': %s: %s", uri,
                          err.__class__.__name__, err)
        raise
Ejemplo n.º 4
0
def make_poller(url, **extra):
    """
    Factory method that returns the registered poller for the specified
    :py:mod:`gc3libs.url.Url`.
    """

    url = Url(url)
    try:
        registered = _AVAILABLE_POLLERS[url.scheme]
    except KeyError:
        raise ValueError(
            "No poller associated with scheme `{0}`"
            .format(url.scheme))
    for cls in registered:
        try:
            poller = cls(url, **extra)
            gc3libs.log.debug("Using class %s to poll URL %s", cls, url)
            return poller
        except Exception as err:
            gc3libs.log.debug(
                "Could not create poller for scheme `%s` with class %s: %s",
                url.scheme, cls, err)
    raise ValueError(
        "No registered class could be instanciated to poll URL {0}"
        .format(url))
Ejemplo n.º 5
0
    def _get_events_file(self, path):
        if not os.path.exists(path):
            self._watched.pop(path)
            return [(Url(path), events['IN_DELETE'])]

        event = 0
        info = os.stat(path)
        if info.st_mtime > self._watched[path].st_mtime:
            event |= events['IN_MODIFY']
        if info.st_ctime > self._watched[path].st_ctime:
            event |= events['IN_ATTRIB']
        self._watched[path] = info

        if event:
            return [(Url(path), event)]
        else:
            return []
Ejemplo n.º 6
0
    def setUp(self):
        with NamedTemporaryFile(prefix='gc3libs.', suffix='.tmp') as tmp:
            self.tmpfile = tmp.name
            self.db_url = Url('sqlite://%s' % self.tmpfile)
            self.store = self._make_store()

            # create a connection to the database
            self.conn = self.store._engine.connect()

            yield
            self.conn.close()
Ejemplo n.º 7
0
    def __init__(self, url, mask, **kw):
        super(SwiftPoller, self).__init__(url, mask, **kw)

        try:
            self.username, self.project_name = self.url.username.split('+')
        except ValueError:
            raise gc3libs.exceptions.InvalidValue(
                "Missing project/tenant name in SWIFT URL '{0}'".format(
                    self.url))
        self.password = self.url.password
        self.container = self.url.query

        if not self.container:
            raise gc3libs.exceptions.InvalidValue(
                "Missing bucket name in SWIFT URL '{0}'".format(self.url))
        # also check for hostname and password?

        if url.scheme in ['swifts', 'swts']:
            auth_url = 'https://%s' % self.url.hostname
        else:
            auth_url = 'http://%s' % self.url.hostname
        if self.url.port:
            auth_url += ":%d" % self.url.port
        if self.url.path:
            auth_url += self.url.path
        self.auth_url = auth_url

        self.conn = swiftclient.Connection(authurl=self.auth_url,
                                           user=self.username,
                                           key=self.password,
                                           os_options={
                                               "auth_url": self.auth_url,
                                               "project_name":
                                               self.project_name,
                                               "username": self.username,
                                               "password": self.password
                                           },
                                           auth_version='2')

        # List containers
        accstat, containers = self.conn.get_account()
        gc3libs.log.debug(
            "Successfully connected to SWIFT storage '%s':"
            " %d containers found", self.conn.url, len(containers))
        if self.container not in [a.get('name') for a in containers]:
            gc3libs.log.warning("Container %s not found at SWIFT URL '%s'",
                                self.container, self.url)

        constat, objects = self.conn.get_container(self.container)

        self._known_objects = {}
        for obj in objects:
            url = Url(str(self.url) + '&name=%s' % obj['name'])
            self._known_objects[url] = obj
Ejemplo n.º 8
0
    def setUp(self):
        fd, self.tmpfile = tempfile.mkstemp()
        self.db_url = Url('sqlite://%s' % self.tmpfile)
        self.store = self._make_store()

        # create a connection to the database
        self.conn = self.store._engine.connect()

        yield
        self.conn.close()
        os.remove(self.tmpfile)
Ejemplo n.º 9
0
def make_poller(url, mask=events['IN_ALL_EVENTS'], **kw):
    """
    Factory method that returns the registered poller for the specified
    :py:mod:`gc3libs.url.Url`.
    """

    url = Url(url)
    try:
        cls = _available_pollers[url.scheme]
    except KeyError:
        raise ValueError("No poller associated with scheme `{0}`".format(
            url.scheme))
    return cls(url, mask, **kw)
Ejemplo n.º 10
0
    def __init__(self,
                 directory=gc3libs.Default.JOBS_DIR,
                 idfactory=IdFactory(),
                 protocol=DEFAULT_PROTOCOL,
                 **extra_args):
        if isinstance(directory, Url):
            super(FilesystemStore, self).__init__(directory)
            directory = directory.path
        else:
            super(FilesystemStore, self).__init__(
                Url(scheme='file', path=os.path.abspath(directory)))
        self._directory = directory

        self.idfactory = idfactory
        self._protocol = protocol
Ejemplo n.º 11
0
def create_gc3pie_sql_store():
    '''Creates a `Store` instance for job persistence in the PostgreSQL table
    :class:`Tasks <tmlib.models.submission.Tasks>`.

    Returns
    -------
    gc3libs.persistence.sql.SqlStore
        `GC3Pie` store

    Warning
    -------
    The "tasks" table must already exist.
    '''
    logger.debug('create GC3Pie store using "tasks" table')
    store_url = Url(cfg.db_master_uri)
    table_columns = tm.Task.__table__.columns
    now = datetime.now()
    return make_sqlstore(
        url=store_url,
        table_name='tasks',
        extra_fields={
            table_columns['name']:
                lambda task: task.jobname,
            table_columns['exitcode']:
                lambda task: task.execution.exitcode,
            table_columns['time']:
                lambda task: _get_task_time(task, 'duration'),
            table_columns['memory']:
                lambda task: _get_task_memory(task, 'max_used_memory'),
            table_columns['cpu_time']:
                lambda task: _get_task_time(task, 'used_cpu_time'),
            table_columns['submission_id']:
                lambda task: task.submission_id,
            table_columns['parent_id']:
                lambda task: task.parent_id,
            table_columns['is_collection']:
                lambda task: hasattr(task, 'tasks'),
            table_columns['type']:
                lambda task: type(task).__name__,
            table_columns['created_at']:
                lambda task: now,
            table_columns['updated_at']:
                lambda task: datetime.now()
        }
    )
Ejemplo n.º 12
0
    def _get_events_dir(self, dirpath):
        new_events = []

        # check for new files
        contents = set(
            os.path.join(dirpath, entry) for entry in os.listdir(dirpath)
            if entry not in ['.', '..'])
        for path in contents:
            if path not in self._watched:
                # we cannot really know when a user is done writing
                # so `IN_CLOSE_WRITE` is technically incorrect here
                event = events['IN_CLOSE_WRITE'] | events['IN_CREATE']
                if os.path.isdir(path):
                    event |= events['IN_ISDIR']
                new_events.append((Url(path), event))
                self.watch(path)

        return new_events
Ejemplo n.º 13
0
    def setUp(self):
        fd, tmpfile = tempfile.mkstemp()
        os.remove(tmpfile)
        self.table_name = tmpfile.split('/')[-1]

        try:
            self.db_url = Url('mysql://*****:*****@localhost/gc3')
            self.store = make_store(self.db_url, table_name=self.table_name)
        except sqlalchemy.exc.OperationalError:
            pytest.mark.skip("Cannot connect to MySQL database.")

        # create a connection to the database
        self.conn = self.store._engine.connect()

        yield

        self.conn.execute('drop table `%s`' % self.table_name)
        self.conn.close()
Ejemplo n.º 14
0
    def _get_events_dir(self, dirpath):
        new_events = []

        # check for new files
        contents = set(
            os.path.join(dirpath, entry)
            for entry in os.listdir(dirpath)
            if entry not in ['.', '..']
        )
        for path in contents:
            if path not in self._watched:
                new_events.append((Url(path), 'created'))
                # if path is a directory, add it to watch list only if
                # poller was created with `recurse=True`
                if (not os.path.isdir(path)) or self._recurse:
                    self.watch(path)

        return new_events
Ejemplo n.º 15
0
    def parse_args(self):
        self.valid_extensions = [
            i.strip() for i in self.params.valid_extensions.split(',')
        ]

        if not self.params.inbox:
            # Add a default inbox if not passed from command line
            self.params.inbox = [
                Url(os.path.join(self.params.working_dir, 'inbox'))
            ]
        if self.params.output == self.actions['output'].default:
            # Use directory 'output' as output directory by default
            self.params.output = os.path.join(self.params.working_dir,
                                              'output')

        if int(self.params.threshold2) <= int(self.params.threshold1):
            gc3libs.exceptions.InvalidUsage(
                "Value of `--threshold2` should be greater than `--threshold1`"
            )
Ejemplo n.º 16
0
 def get_new_events(self):
     new_events = []
     ievents = self._ifd.read(0)
     cumulative = defaultdict(int)
     for ievent in ievents:
         try:
             path = os.path.join(self._wd[ievent.wd], ievent.name)
         except KeyError:
             raise AssertionError(
                 "Received event {0} for unknown watch descriptor {1}"
                 .format(ievent, ievent.wd))
         # if `name` is empty, it's the same directory
         cumulative[path] |= ievent.mask
         accumulated = cumulative[path]
         # we want to dispatch a single `created` or `modified`
         # event, so check once the file is closed what past events
         # have been recorded
         if (ievent.mask & inotify.flags.CLOSE_WRITE):
             if (accumulated & inotify.flags.CREATE):
                 new_events.append(
                     self.__make_event(path, 'created'))
             elif (accumulated & inotify.flags.MODIFY):
                 new_events.append(
                     self.__make_event(path, 'modified'))
         if (ievent.mask & inotify.flags.DELETE):
             new_events.append(
                 self.__make_event(path, 'deleted'))
         if (self._recurse
             and ievent.mask & inotify.flags.ISDIR
             and ievent.mask & inotify.flags.CREATE):
             # A new directory has been created. Add a watch
             # for it too and for all its subdirectories. Also,
             # we need to trigger new events for any file
             # created in it.
             for (rootdir, dirnames, filenames) in os.walk(path):
                 for dirname in dirnames:
                     self.watch(os.path.join(rootdir, dirname))
                 for filename in filenames:
                     # report creation event
                     new_events.append(
                         (Url(os.path.join(rootdir, filename)),
                          'created'))
     return new_events
Ejemplo n.º 17
0
    def get_new_events(self):
        # List objects in container
        _, objects = self.conn.get_container(self.container)
        newevents = []

        objurls = []
        for obj in objects:
            url = Url('{baseurl}&name={objname}'
                      .format(baseurl=self.url,
                              objname=obj['name']))
            objurls.append(url)
            if url not in self._known_objects:
                self._known_objects[url] = obj
                newevents.append((url, 'created'))
        for url in list(self._known_objects):
            if url not in objurls:
                newevents.append((url, 'deleted'))
                self._known_objects.pop(url)
        return newevents
Ejemplo n.º 18
0
    def setUp(self):
        # generate random table name
        from string import ascii_letters as letters
        import random
        self.table_name = 'test_' + (''.join(
            random.choice(letters) for _ in range(10)))

        try:
            self.db_url = Url('mysql://*****:*****@localhost/gc3')
            self.store = make_store(self.db_url, table_name=self.table_name)
        except sqlalchemy.exc.OperationalError:
            pytest.mark.skip("Cannot connect to MySQL database.")

        # create a connection to the database
        self.conn = self.store._engine.connect()

        yield

        self.conn.execute('drop table `%s`' % self.table_name)
        self.conn.close()
Ejemplo n.º 19
0
def create_gc3pie_sql_store():
    '''Creates a `Store` instance for job persistence in the PostgreSQL table
    :class:`Tasks <tmlib.models.submission.Tasks>`.

    Returns
    -------
    gc3libs.persistence.sql.SqlStore
        `GC3Pie` store

    Warning
    -------
    The "tasks" table must already exist.
    '''
    logger.debug('create GC3Pie store using "tasks" table')
    store_url = Url(cfg.db_master_uri)
    return make_sqlstore(
        url=store_url,
        table_name='tasks',
        extra_fields=get_gc3pie_store_extra_fields(),
    )
Ejemplo n.º 20
0
    def get_events(self):
        # List objects in container
        constat, objects = self.conn.get_container(self.container)
        newevents = []

        objurls = []
        for obj in objects:
            url = Url(str(self.url) + '&name=' + obj['name'])
            objurls.append(url)
            if url not in self._known_objects:
                self._known_objects[url] = obj
                # Here it's correct not to put IN_CREATE because on
                # swift you will see an object only when it has been
                # completely uploaded.
                newevents.append((url, events['IN_CLOSE_WRITE']))
        for url in list(self._known_objects):
            if url not in objurls:
                newevents.append((url, events['IN_DELETE']))
                self._known_objects.pop(url)
        return newevents
Ejemplo n.º 21
0
    def new_tasks(self, extra, epath=None, emask=0):
        extra['rparams'] = {
            'memory':
            str(self.params.memory_per_core.amount(unit=gc3libs.quantity.MB)),
            'fps':
            self.params.fps,
            'pixel_to_scale':
            self.params.pixel_to_scale,
            'difference_lag':
            self.params.difference_lag,
            'threshold1':
            self.params.threshold1,
            'threshold2':
            self.params.threshold2,
        }

        if not epath:
            # At startup, scan all the input directories and check if
            # there is a file which is not processed yet.

            # First, check which files we already did
            known_videos = {i.get('videofile', None): i for i in self.session}
            new_jobs = []
            for inboxurl in self.params.inbox:
                # WARNING: we assume this is a filesystem directory
                try:
                    inbox = inboxurl.path
                except:
                    inboxurl = Url(inboxurl)
                    inbox = inboxurl.path
                for dirpath, dirnames, fnames in os.walk(inbox):
                    for fname in fnames:
                        filename = os.path.join(dirpath, fname)
                        if filename.rsplit('.',
                                           1)[-1] not in self.valid_extensions:
                            continue
                        if filename.startswith('._'):
                            self.log.warning(
                                "Ignoring file %s as it starts with '._'",
                                filename)
                            continue
                        if filename not in known_videos:
                            app = BemoviWorkflow(filename,
                                                 self.params.email_from,
                                                 self.params.smtp_server,
                                                 **extra)
                            new_jobs.append(app)
                            known_videos[filename] = app
                        else:
                            # In case it exists, but the application
                            # termianted with an exit code, then we
                            # want to resubmit the job anyway
                            job = known_videos[filename]
                            if job.should_resubmit():
                                self.log.info(
                                    "File %s might have been overwritten. Resubmitting job %s",
                                    filename, job.persistent_id)
                                # self._controller.kill(job)
                                # self._controller.progress()
                                # self._controller.redo(job, from_stage=0)
            return new_jobs

        fpath = epath.path
        if emask & plr.events['IN_CLOSE_WRITE']:
            if fpath.rsplit('.', 1)[-1] not in self.valid_extensions:
                self.log.info(
                    "Ignoring file %s as it does not end with a valid extension (%s)",
                    fpath, str.join(',', self.valid_extensions))
                return []

            # Only resubmit the job if it failed
            for job in self.session.tasks.values():
                try:
                    job.videofile
                except AttributeError:
                    # Not a GBemoviWorkflow application
                    continue
                if job.videofile == fpath:
                    if job.should_resubmit():
                        self.log.info(
                            "Re-submitting job %s as file %s has been overwritten",
                            job.persistent_id, fpath)
                        job.update_configuration()
                        self._controller.kill(job)
                        self._controller.progress()
                        self._controller.redo(job, from_stage=0)

                    else:
                        self.log.info(
                            "Ignoring already successfully processed file %s",
                            fpath)
                    # In both case, do not return any new job
                    return []
            return [
                BemoviWorkflow(fpath, self.params.email_from,
                               self.params.smtp_server, **extra)
            ]
        return []
Ejemplo n.º 22
0
Archivo: store.py Proyecto: imcf/gc3pie
def make_store(uri, *args, **extra_args):
    """
    Factory producing concrete `Store`:class: instances.

    Given a URL and (optionally) initialization arguments, return a
    fully-constructed `Store`:class: instance.

    The only required argument is `uri`; if any other arguments are
    present in the function invocation, they are passed verbatim to
    the constructor associated with the scheme of the given `uri`.

    Example::

      >>> fs1 = make_store('file:///tmp')
      >>> fs1.__class__.__name__
      'FilesystemStore'

    Argument `uri` can also consist of a path name, in which case a
    URL scheme 'file:///' is assumed::

      >>> fs2 = make_store('/tmp')
      >>> fs2.__class__.__name__
      'FilesystemStore'

    """
    if not isinstance(uri, Url):
        uri = Url(uri)

    # since SQLAlchemy allows URIs of the form `db+driver://...`
    # (e.g., `postresql+psycopg://...`) we need to examine the URI
    # scheme only up to the first `+`
    scheme = uri.scheme.split('+')[0]

    try:
        # hard-code schemes that are supported by GC3Pie itself
        if uri.scheme == 'file':
            import gc3libs.persistence.filesystem
            return gc3libs.persistence.filesystem.make_filesystemstore(
                uri, *args, **extra_args)
        elif scheme in [
                # DBs supported in SQLAlchemy core as of version 1.1,
                # see: http://docs.sqlalchemy.org/en/latest/dialects/index.html
                'firebird',
                'mssql',
                'mysql',
                'oracle',
                'postgresql',
                'sqlite',
                'sybase',
        ]:
            import gc3libs.persistence.sql
            return gc3libs.persistence.sql.make_sqlstore(
                uri, *args, **extra_args)
        else:
            try:
                return _registered_store_ctors[uri.scheme](uri, *args,
                                                           **extra_args)
            except KeyError:
                gc3libs.log.error(
                    "Unknown URL scheme '%s' in"
                    " `gc3libs.persistence.make_store`:"
                    " has never been registered.", uri.scheme)
                raise
    except Exception as err:
        gc3libs.log.error("Error constructing store for URL '%s': %s: %s", uri,
                          err.__class__.__name__, err)
        raise
Ejemplo n.º 23
0
 def __init__(self, url=None):
     if url and not isinstance(url, Url):
         url = Url(url)
     self.url = url
Ejemplo n.º 24
0
 def __init__(self, url, **kw):
     self.url = Url(url)
Ejemplo n.º 25
0
 def __make_event(self, relpath, event):
     if relpath:
         url = Url(os.path.join(self.url.path, relpath))
     else:
         url = self.url
     return (url, event)
Ejemplo n.º 26
0
 def __init__(self, url, mask, **kw):
     self.url = Url(url)
     self.mask = mask