Esempio n. 1
0
    def _start(self, src, parameters):
        """
        Client specific start implementation.

        Args:
            src (str): Input data.
            parameters (dict): Parameters dict.

        Returns:
            dict: response.
        """
        # Get environment and remove it from parameters
        parameters = parameters.copy()
        env = parameters.pop('env', dict())

        # Checks Apyfal version
        self._checks_apyfal_version(env)

        # Initialize metering
        with self._accelerator_lock:
            self._init_metering(env,
                                reload=parameters['app'].pop('reload', False))

        # Run and return response
        return self._run_executable(mode='0',
                                    input_file=src,
                                    input_json=str(_uuid()),
                                    output_json=str(_uuid()),
                                    parameters=parameters)
Esempio n. 2
0
    def as_tmp_file(self, url, mode):
        """
        Return temporary representation of a file.

        Args:
            url (str): apyfal.storage URL of the file.
            mode (str): Access mode. 'r' or 'w'.

        Returns:
            str or file-like object: temporary object.
        """
        # Generates randomized temporary filename
        local_path = _os_path.join(self._tmp_dir, str(_uuid()))

        # Gets input file
        if 'r' in mode:
            _srg.copy(url, local_path)

        # Yields local temporary path
        yield local_path

        # Sends output file
        if 'w' in mode:
            _srg.copy(local_path, url)

        # Clears temporary file
        _remove(local_path)
Esempio n. 3
0
    def _get_id():
        """
        Return an unique ID.

        Returns:
            str: id
        """
        return 'pycosio%s' % (str(_uuid()).replace('-', ''))
Esempio n. 4
0
 def state(self, id=None):
     if not id:
         id = '.{0}'.format(_uuid())
         # adds a leading dot to make use of stateconf's namespace feature.
     try:
         return self.get_all_decls()[id]
     except KeyError:
         self.get_all_decls()[id] = s = StateDeclaration(id)
         self.decls.append(s)
         return s
Esempio n. 5
0
    def run(self, inputs):
        wf = self.__model
        tk_id = str(_uuid())
        token = models.WorkflowToken(id=tk_id,
                                     wf=wf)

        wf = getWorkflowImplementation(wf.wf_implem)
        token.put()
        wf.initTokens(token, inputs)
        return WorkflowToken(token)
Esempio n. 6
0
 def state(self, id=None):
     if not id:
         id = '.{0}'.format(_uuid())
         # adds a leading dot to make use of stateconf's namespace feature.
     try:
         return self.get_all_decls()[id]
     except KeyError:
         self.get_all_decls()[id] = s = StateDeclaration(id)
         self.decls.append(s)
         return s
Esempio n. 7
0
 def state(self, id=None):
     if not id:
         id = '.'+str(_uuid()) 
         # adds a leading dot to make use of stateconf's namespace feature.
     try:
         return self.all_decls[id]
     except KeyError:
         self.all_decls[id] = s = StateDeclaration(id, self)
         self.decls.append(s)
         return s
Esempio n. 8
0
    def _stop(self):
        """
        Client specific stop implementation.

        Returns:
            dict: response.
        """
        try:
            return self._run_executable(mode='2', output_json=str(_uuid()))
        finally:
            _systemctl('stop', 'meteringsession', 'meteringclient')
Esempio n. 9
0
    def _process(self, src, dst, parameters):
        """
        Client specific process implementation.

        Args:
            src (str): Input data.
            dst (str): Output data.
            parameters (dict): Parameters dict.

        Returns:
            dict: response dict.
        """
        return self._run_executable(
            mode='1',
            input_file=src,
            output_file=dst,
            input_json=str(_uuid()),
            output_json=str(_uuid()),
            parameters=parameters,

            # Reduces verbosity to minimum by default
            extra_args=['-v4'])
Esempio n. 10
0
    def __init__(self, role=None, policy=None, acs_client_kwargs=None,
                 acs_create_instance_kwargs=None, **kwargs):
        _CSPHost.__init__(self, **kwargs)

        # Initializes attributes
        self._security_group_id = None
        self._role, self._policy = self._get_role_and_policy(role, policy)

        section = self._config[self._config_section]
        self._acs_client_kwargs = (
            acs_client_kwargs or
            section.get_literal('acs_client_kwargs') or dict())
        self._acs_create_instance_kwargs = (
            acs_create_instance_kwargs or
            section.get_literal('acs_create_instance_kwargs') or dict())

        # ClientToken guarantee idempotence of requests
        self._client_token = str(_uuid())
Esempio n. 11
0
    def __init__(self,
                 accelerator=None,
                 client_type=None,
                 accelize_client_id=None,
                 accelize_secret_id=None,
                 config=None,
                 **_):
        self._name = accelerator
        self._client_type = client_type
        self._url = None
        self._stopped = False

        # Define a session UUID
        self._session_uuid = str(_uuid())

        # Dict to cache values
        self._cache = {}

        # Read configuration
        self._config = config = _cfg.create_configuration(config)

        # Get Start parameters
        self._configuration_parameters = self._load_configuration(
            self.DEFAULT_CONFIGURATION_PARAMETERS, 'configuration')

        # Add credential information if available
        client_id = config['accelize'].set('client_id', accelize_client_id)
        if client_id:
            self._configuration_parameters['env']['client_id'] = client_id
        secret_id = config['accelize'].set('secret_id', accelize_secret_id)
        if secret_id:
            self._configuration_parameters['env']['client_secret'] = secret_id

        #: Directories that can be processed remotely on host
        self._authorized_host_dirs = [
            '%s/' % _os_path.abspath(_os_path.expanduser(path))
            for path in (config['security'].get_list('authorized_host_dirs')
                         or self.DEFAULT_AUTHORIZED_HOST_DIRS)
        ]

        # Get process parameters
        self._process_parameters = self._load_configuration(
            self.DEFAULT_PROCESS_PARAMETERS, 'process')
Esempio n. 12
0
def layout2yaml(layout, site, name, filename):
    """Write layout for a given siye to a YAML file

    layout
        layout from a wflopg.Owflop object
    site : str
        site name
    name : str
        layout name
    filename : str
        file to write to

    """
    output = {}
    output['name'] = name
    output['uuid'] = str(_uuid())
    output['site'] = site
    output['layout'] = layout.values.tolist()
    with open(filename, 'w') as f:
        _yaml(typ='safe').dump(output, f)
    def start(self):
        if self.is_running():
            raise RuntimeError('DML Execution Engine already started, use stop() to stop it.')

        environment = self.environment
        cluster_name = 'cluster_%s' % str(_uuid())
        self.cluster_working_dir = environment._create_job_home_dir(cluster_name)

        # put initial status file and metadata files
        self._prepare_cluster_init_files()

        # Wait for the application to start and then retrieve commander port
        try:
            _log.info('Step 1/4: submitting the engine application...')

            # submit actual application
            self.app_id = environment._submit_job(
                job_working_dir = self.cluster_working_dir,
                num_workers = environment.get_num_workers(),
                silent = True)

            _log.info('Step 2/4: waiting for the engine(%s) to run ...' % self.app_id)
            self._wait_for_application_start()

            # Read commander init file to get commander listening URI
            _log.info('Step 3/4: waiting for commander to be ready...')
            commander_url = self._wait_for_commander_ready()
            self.cluster_controller = commander_url

            # Wait for all workers to be ready
            _log.info('Step 4/4: waiting for all %s workers to be ready...' % environment.get_num_workers())
            self._wait_for_all_workers_ready()

            _log.info('Cluster is listening at: %s' % commander_url)
        except Exception as e:
            _log.error('Error encountered when waiting for DML Execution Engine to start: %s' % e)

            if self.app_id:
                environment._cancel_job(self.app_id, silent = True)
            raise
Esempio n. 14
0
    def _session(self):
        """
        Requests session

        Returns:
            requests.sessions.Session: Session
        """
        session_kwargs = dict(max_retries=self._REQUEST_RETRIES)

        # Gets SSL certificate
        if self._ssl_cert_crt is None and _os_path.exists(
                _cfg.APYFAL_CERT_CRT):
            # Uses default certificate if not provided and not not False
            self._ssl_cert_crt = _cfg.APYFAL_CERT_CRT

        elif (self._ssl_cert_crt
              and (hasattr(self._ssl_cert_crt, 'read')
                   or not _os_path.exists(self._ssl_cert_crt))):
            # Copies certificate locally if not reachable by local path
            ssl_cert_crt = _os_path.join(self._tmp_dir, str(_uuid()))
            _srg_copy(self._ssl_cert_crt, ssl_cert_crt)
            self._ssl_cert_crt = ssl_cert_crt

        # Enables certificates verification
        if self._ssl_cert_crt:
            session_kwargs['verify'] = self._ssl_cert_crt

            # Disables hostname verification if wildcard certificate
            from apyfal._certificates import \
                get_host_names_from_certificate
            with open(self._ssl_cert_crt, 'rb') as crt_file:
                if get_host_names_from_certificate(crt_file.read()) == ['*']:
                    session_kwargs['assert_hostname'] = False

            # Force url to use HTTPS
            self._url = _utl.format_url(self._url,
                                        force_secure=bool(self._ssl_cert_crt))

        # Initializes session
        return _utl.http_session(**session_kwargs)
Esempio n. 15
0
    def put_object(self,
                   locator,
                   path,
                   content=None,
                   headers=None,
                   data_range=None,
                   new_file=False):
        """
        Put object.

        Args:
            locator (str): locator name
            path (str): Object path.
            content (bytes like-object): File content.
            headers (dict): Header to put with the file.
            data_range (tuple of int): Range of position of content.
            new_file (bool): If True, force new file creation.

        Returns:
            dict: File header.
        """
        with self._put_lock:
            if new_file:
                self.delete_object(locator, path, not_exists_ok=True)
            try:
                # Existing file
                file = self._get_locator_content(locator)[path]
            except KeyError:
                # New file
                self._get_locator_content(locator)[path] = file = {
                    'Accept-Ranges': 'bytes',
                    'ETag': str(_uuid()),
                    '_content': bytearray(),
                    '_lock': _Lock()
                }

                if self._header_size:
                    file[self._header_size] = 0

                if self._header_ctime:
                    file[self._header_ctime] = self._format_date(_time())

        # Update file
        with file['_lock']:
            if content:
                file_content = file['_content']

                # Write full content
                if not data_range or (data_range[0] is None
                                      and data_range[1] is None):
                    file_content[:] = content

                # Write content range
                else:
                    # Define range
                    start, end = data_range
                    if start is None:
                        start = 0
                    if end is None:
                        end = start + len(content)

                    # Add padding if missing data
                    if start > len(file_content):
                        file_content[len(file_content):start] = (
                            start - len(file_content)) * b'\0'

                    # Flush new content
                    file_content[start:end] = content

            if headers:
                file.update(headers)

            if self._header_size:
                file[self._header_size] = len(file['_content'])

            if self._header_mtime:
                file[self._header_mtime] = self._format_date(_time())

            # Return Header
            header = file.copy()
        del header['_content']
        return header
 def create_job_home_dir(environment, job_name):
     '''
     Given a job name, create a home directory for the job in EC2 cluster
     '''
     return environment.s3_state_path + '/' + job_name + '-' + str(_uuid())
Esempio n. 17
0
    def __init__(self, name, stages=[[]], final_stage=None, environment=None,
                 _exec_dir=None, _task_output_paths=None, _job_type = 'PIPELINE'):
        """
        Construct a job.

        Parameters
        ----------
        name : str
            Name of this Job, must be unique.

        stages: list[list[Task]]
            Collection of task(s) to be executed.

        final_stage : list[task] | task
            Collection of task(s) whose outputs are to be returned._

        environment : Environment, optional
            Environment used for this execution. See
            :py:class:`~graphlab.deploy.environment.LocalAsync` for an example
            environment.

        """
        _raise_error_if_not_of_type(name, [str], 'name')
        _raise_error_if_not_of_type(stages, [list], 'stages')
        _raise_error_if_not_of_type(final_stage,
                                [list, _Task, type(None)], 'final_stage')

        self.name = name
        self.environment = environment

        self._stages = stages
        self._num_tasks = 0
        self._status = 'Pending'
        self._start_time = None
        self._end_time = None
        self._error = None

        self._job_type = _job_type

        # Set the packages
        self._packages = set()
        for task in self._stages:
            for t in task:
                self._num_tasks += 1
                self._packages.update(t.get_required_packages())


        self._final_stage = final_stage
        self._task_status = {}

        self._session = _gl.deploy._default_session
        if not _exec_dir:
            relative_path = "job-results-%s" % str(_uuid())
            self._exec_dir = self.get_path_join_method()(self._session.results_dir, relative_path)
        else:
            self._exec_dir = _exec_dir

        # Location where all the outputs for the tasks are saved.
        if not _task_output_paths:
            Job._update_exec_dir(self, self._exec_dir)
        else:
            self._task_output_paths = _task_output_paths
Esempio n. 18
0
    def __init__(self, command, args=None, kwargs=None, name=None, qtype=None,
                 profile=None, **kwds):
        """Initialization function arguments.

        Args:
            command (function/str): The command or function to execute.
            args (tuple/dict):      Optional arguments to add to command,
                                    particularly useful for functions.
            kwargs (dict):          Optional keyword arguments to pass to the
                                    command, only used for functions.
            name (str):             Optional name of the job. If not defined,
                                    guessed. If a job of the same name is
                                    already queued, an integer job number (not
                                    the queue number) will be added, ie.
                                    <name>.1
            qtype (str):            Override the default queue type
            profile (str):          The name of a profile saved in the
                                    conf

            *All other keywords are parsed into cluster keywords by the
            options system. For available keywords see `fyrd.option_help()`*
        """

        ########################
        #  Sanitize arguments  #
        ########################
        _logme.log('Args pre-check: {}'.format(kwds), 'debug')
        kwds = _options.check_arguments(kwds)
        _logme.log('Args post-check: {}'.format(kwds), 'debug')

        # Override autoclean state (set in config file)
        if 'clean_files' in kwds:
            self.clean_files = kwds.pop('clean_files')
        if 'clean_outputs' in kwds:
            self.clean_outputs = kwds.pop('clean_outputs')

        # Path handling
        [kwds, self.runpath,
         self.outpath, self.scriptpath] = _conf.get_job_paths(kwds)

        # Save command
        self.command = command
        self.args    = args

        # Merge in profile, this includes all args from the DEFAULT profile
        # as well, ensuring that those are always set at a minumum.
        profile = profile if profile else 'DEFAULT'
        prof = _conf.get_profile(profile)
        if not prof:
            raise _ClusterError('No profile found for {}'.format(profile))
        for k,v in prof.args.items():
            if k not in kwds:
                kwds[k] = v

        # Use the default profile as a backup if any arguments missing
        default_args = _conf.DEFAULT_PROFILES['DEFAULT']
        default_args.update(_conf.get_profile('DEFAULT').args)
        for opt, arg in default_args.items():
            if opt not in kwds:
                _logme.log('{} not in kwds, adding from default: {}:{}'
                           .format(opt, opt, arg), 'debug')
                kwds[opt] = arg

        # Get environment
        if not _queue.MODE:
            _queue.MODE = _queue.get_cluster_environment()
        self.qtype = qtype if qtype else _queue.MODE
        self.queue = _queue.Queue(user='******', qtype=self.qtype)
        self.state = 'Not_Submitted'

        # Set name
        if not name:
            if callable(command):
                strcmd = str(command).strip('<>')
                parts = strcmd.split(' ')
                if parts[0] == 'bound':
                    name = '_'.join(parts[2:3])
                else:
                    parts.remove('function')
                    try:
                        parts.remove('built-in')
                    except ValueError:
                        pass
                    name = parts[0]
            else:
                name = command.split(' ')[0].split('/')[-1]

        # Make sure name not in queue
        self.uuid = str(_uuid()).split('-')[0]
        names     = [i.name.split('.')[0] for i in self.queue]
        namecnt   = len([i for i in names if i == name])
        name      = '{}.{}.{}'.format(name, namecnt, self.uuid)
        self.name = name

        # Set modules
        self.modules = kwds.pop('modules') if 'modules' in kwds else None
        if self.modules:
            self.modules = _run.opt_split(self.modules, (',', ';'))

        # Make sure args are a tuple or dictionary
        if args:
            if isinstance(args, str):
                args = tuple(args)
            if not isinstance(args, (tuple, dict)):
                try:
                    args = tuple(args)
                except TypeError:
                    args = (args,)

        # In case cores are passed as None
        if 'nodes' not in kwds:
            kwds['nodes'] = default_args['nodes']
        if 'cores' not in kwds:
            kwds['cores'] = default_args['cores']
        self.nodes = kwds['nodes']
        self.cores = kwds['cores']

        # Set output files
        suffix = kwds.pop('suffix') if 'suffix' in kwds \
                 else _conf.get_option('jobs', 'suffix')
        if 'outfile' in kwds:
            pth, fle = _os.path.split(kwds['outfile'])
            if not pth:
                pth = self.outpath
            kwds['outfile'] = _os.path.join(pth, fle)
        else:
            kwds['outfile'] = _os.path.join(
                self.outpath, '.'.join([name, suffix, 'out']))
        if 'errfile' in kwds:
            pth, fle = _os.path.split(kwds['errfile'])
            if not pth:
                pth = self.outpath
            kwds['errfile'] = _os.path.join(pth, fle)
        else:
            kwds['errfile'] = _os.path.join(
                self.outpath, '.'.join([name, suffix, 'err']))
        self.outfile = kwds['outfile']
        self.errfile = kwds['errfile']

        # Check and set dependencies
        if 'depends' in kwds:
            dependencies = _run.listify(kwds.pop('depends'))
            self.dependencies = []
            errmsg = 'Dependencies must be number or list'
            for dependency in dependencies:
                if isinstance(dependency, str):
                    if not dependency.isdigit():
                        raise _ClusterError(errmsg)
                    dependency  = int(dependency)
                if not isinstance(dependency, (int, Job)):
                    raise _ClusterError(errmsg)
                self.dependencies.append(dependency)

        ######################################
        #  Command and Function Preparation  #
        ######################################

        # Get imports
        imports = kwds.pop('imports') if 'imports' in kwds else None

        # Get syspaths
        syspaths = kwds.pop('syspaths') if 'syspaths' in kwds else None

        # Split out sys.paths from imports and set imports in self
        if imports:
            self.imports = []
            syspaths = syspaths if syspaths else []
            for i in imports:
                if i.startswith('sys.path.append')\
                        or i.startswith('sys.path.insert'):
                    syspaths.append(i)
                else:
                    self.imports.append(i)

        # Function specific initialization
        if callable(command):
            self.kind = 'function'
            script_file = _os.path.join(
                self.scriptpath, '{}_func.{}.py'.format(name, suffix)
                )
            self.poutfile = self.outfile + '.func.pickle'
            self.function = _Function(
                file_name=script_file, function=command, args=args,
                kwargs=kwargs, imports=self.imports, syspaths=syspaths,
                outfile=self.poutfile
            )
            # Collapse the _command into a python call to the function script
            executable = '#!/usr/bin/env python{}'.format(
                _sys.version_info.major) if _conf.get_option(
                    'jobs', 'generic_python') else _sys.executable

            command = '{} {}'.format(executable, self.function.file_name)
            args = None
        else:
            self.kind = 'script'
            self.poutfile = None

        # Collapse args into command
        command = command + ' '.join(args) if args else command

        #####################
        #  Script Creation  #
        #####################

        # Build execution wrapper with modules
        precmd  = ''
        if self.modules:
            for module in self.modules:
                precmd += 'module load {}\n'.format(module)

        # Create queue-dependent scripts
        sub_script = ''
        if self.qtype == 'slurm':
            scrpt = _os.path.join(
                self.scriptpath, '{}.{}.sbatch'.format(name, suffix)
            )

            # We use a separate script and a single srun command to avoid
            # issues with multiple threads running at once
            exec_script  = _os.path.join(self.scriptpath,
                                         '{}.{}.script'.format(name, suffix))
            exe_script   = _scrpts.CMND_RUNNER_TRACK.format(
                precmd=precmd, usedir=self.runpath, name=name, command=command)
            # Create the exec_script Script object
            self.exec_script = _Script(script=exe_script,
                                       file_name=exec_script)

            # Add all of the keyword arguments at once
            precmd = _options.options_to_string(kwds, self.qtype) + precmd

            ecmnd = 'srun bash {}'.format(exec_script)
            sub_script = _scrpts.SCRP_RUNNER.format(precmd=precmd,
                                                    script=exec_script,
                                                    command=ecmnd)

        elif self.qtype == 'torque':
            scrpt = _os.path.join(self.scriptpath,
                                  '{}.cluster.qsub'.format(name))

            # Add all of the keyword arguments at once
            precmd = _options.options_to_string(kwds, self.qtype) + precmd

            sub_script = _scrpts.CMND_RUNNER_TRACK.format(
                precmd=precmd, usedir=self.runpath, name=name, command=command)

        elif self.qtype == 'local':
            # Create the pool
            if not _local.JQUEUE or not _local.JQUEUE.runner.is_alive():
                threads = kwds['threads'] if 'threads' in kwds \
                        else _local.THREADS
                _local.JQUEUE = _local.JobQueue(cores=threads)

            scrpt = _os.path.join(self.scriptpath, '{}.cluster'.format(name))
            sub_script = _scrpts.CMND_RUNNER_TRACK.format(
                precmd=precmd, usedir=self.runpath, name=name, command=command)

        else:
            raise _ClusterError('Invalid queue type')

        # Create the submission Script object
        self.submission = _Script(script=sub_script,
                                  file_name=scrpt)

        # Save the keyword arguments for posterity
        self.kwargs = kwds
 def create_job_home_dir(environment, job_name):
     '''
     Given a job name, create a home directory for the job in EC2 cluster
     '''
     return environment.s3_state_path + '/' + job_name + '-' + str(_uuid())
    def __init__(self, name, stages=[[]], final_stage=None, environment=None,
                 _exec_dir=None, _task_output_paths=None, _job_type = 'PIPELINE'):
        """
        Construct a job.

        Parameters
        ----------
        name : str
            Name of this Job, must be unique.

        stages: list[list[Task]]
            Collection of task(s) to be executed.

        final_stage : list[task] | task
            Collection of task(s) whose outputs are to be returned._

        environment : Environment, optional
            Environment used for this execution. See
            :py:class:`~graphlab.deploy.environment.LocalAsync` for an example
            environment.

        """
        _raise_error_if_not_of_type(name, [str], 'name')
        _raise_error_if_not_of_type(stages, [list], 'stages')
        _raise_error_if_not_of_type(final_stage,
                                [list, _Task, type(None)], 'final_stage')

        self.name = name
        self.environment = environment

        self._stages = stages
        self._num_tasks = 0
        self._status = 'Pending'
        self._start_time = None
        self._end_time = None
        self._error = None

        self._job_type = _job_type

        # Set the packages
        self._packages = set()
        for task in self._stages:
            for t in task:
                self._num_tasks += 1
                self._packages.update(t.get_required_packages())


        self._final_stage = final_stage
        self._task_status = {}

        self._session = _gl.deploy._default_session
        if not _exec_dir:
            relative_path = "job-results-%s" % str(_uuid())
            self._exec_dir = self.get_path_join_method()(self._session.results_dir, relative_path)
        else:
            self._exec_dir = _exec_dir

        # Location where all the outputs for the tasks are saved.
        if not _task_output_paths:
            Job._update_exec_dir(self, self._exec_dir)
        else:
            self._task_output_paths = _task_output_paths
Esempio n. 21
0
    def put_object(self,
                   locator,
                   path,
                   content=None,
                   headers=None,
                   data_range=None,
                   new_file=False):
        """
        Put object.

        Args:
            locator (str): locator name
            path (str): Object path.
            content (bytes like-object): File content.
            headers (dict): Header to put with the file.
            data_range (tuple of int): Range of position of content.
            new_file (bool): If True, force new file creation.

        Returns:
            dict: File header.
        """
        with self._put_lock:
            if new_file:
                self.delete_object(locator, path, not_exists_ok=True)
            try:
                file = self._get_locator_content(locator)[path]
            except KeyError:
                self._get_locator_content(locator)[path] = file = {
                    "Accept-Ranges": "bytes",
                    "ETag": str(_uuid()),
                    "_content": bytearray(),
                    "_lock": _Lock(),
                }

                if self._header_size:
                    file[self._header_size] = 0

                if self._header_ctime:
                    file[self._header_ctime] = self._format_date(_time())

        # Update file
        with file["_lock"]:
            if content:
                file_content = file["_content"]

                if not data_range or (data_range[0] is None
                                      and data_range[1] is None):
                    file_content[:] = content

                else:
                    start, end = data_range
                    if start is None:
                        start = 0
                    if end is None:
                        end = start + len(content)

                    if start > len(file_content):
                        file_content[len(file_content):start] = (
                            start - len(file_content)) * b"\0"

                    file_content[start:end] = content

            if headers:
                file.update(headers)

            if self._header_size:
                file[self._header_size] = len(file["_content"])

            if self._header_mtime:
                file[self._header_mtime] = self._format_date(_time())

            header = file.copy()
        del header["_content"]
        return header
Esempio n. 22
0
    def __init__(self, path=None, sensor='SeaWiFS', resolution='9km', 
        mask_file=None, xlim=None, ylim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.data = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()

        # Sets global parameters for grid.
        if path == None:
            path = '/academia/data/raw/oceancolor'
        self.params['path'] = '%s/%s' % (path, sensor)
        self.params['mask_file'] = mask_file
        self.params['uuid'] = str(_uuid())
        self.params['var_list'] = ['chla']
        
        # Generates list of files, tries to match them to the pattern and to 
        # extract the time. To help understanding the naming convetion and 
        # pattern, see the following example:
        #   A20131612013168.L3m_8D_CHL_chlor_a_9km.bz2
        # resolution = '[0-9]+km'
        if sensor == 'SeaWiFS':
            sensor_prefix = 'S'
        elif sensor == 'MODISA':
            sensor_prefix = 'A'
        else:
            sensor = '.*'
        file_pattern = ('(%s)([0-9]{4})([0-9]{3})([0-9]{4})([0-9]{3}).(L3m)_'
            '(8D)_(CHL)_(chlor_a)_(%s).bz2') % (sensor_prefix, resolution)
        flist = listdir(self.params['path'])
        flist, match = _reglist(flist, file_pattern)
        self.params['file_list'] = flist

        # Reads first file in dataset to determine array geometry and 
        # dimenstions (lon, lat)
        HDF = self._open_HDF('%s/%s' % (self.params['path'], 
            self.params['file_list'][0]))
        HDF_att = HDF.attributes()
        lon = arange(HDF_att['Westernmost Longitude'], 
            HDF_att['Easternmost Longitude'], HDF_att['Longitude Step'])
        lat = arange(HDF_att['Northernmost Latitude'], 
            HDF_att['Southernmost Latitude'], -HDF_att['Latitude Step'])
        
        # If lon_0 is set, calculate how many indices have to be moved in 
        # order for latitude array to start at lon_0.
        lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon,
            lat, xlim, ylim)
        self.params['xlim'], self.params['ylim'] = xlim, ylim
        self.params['lon_i'], self.params['lat_j'] = ii, jj
        
        # Creates a structured array for start year, start day, end year and 
        # end day. Aftwerwards, the dates are converted from julian day to 
        # matplotlib format, i.e. days since 0001-01-01 UTC.
        time_list = array([('%s-01-01' % (item[1]), atof(item[2]), 
            '%s-01-01' % (item[3]), atof(item[4])) for item in match], 
            dtype=[('start_year', 'a10'), ('start_day', 'f2'), 
            ('end_year', 'a10'), ('end_day', 'f2')])
        time_start = (dates.datestr2num(time_list['start_year']) + 
            time_list['start_day'] - 1)
        time_end = (dates.datestr2num(time_list['end_year']) + 
            time_list['end_day'] - 1)
        time_middle = 0.5 * (time_start + time_end)
        
        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'mass_concentration_of_chlorophyll_a_in_sea_water'
        self.description = ('Chlorophyll-a pigment concentration '
            'inferred from satellite visible light radiance measurements.')
        self.attributes['institution'] = HDF_att['Data Center']
        self.attributes['sensor name'] = HDF_att['Sensor Name']
        self.dimensions = dict(n=time_middle.size, k=0, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time', k='height', j='latitude',
            i='longitude')
        self.variables = dict(
            time = atlantis.data.Variable(),
            height = atlantis.data.get_standard_variable('height'),
            latitude = atlantis.data.get_standard_variable('latitude'),
            longitude = atlantis.data.get_standard_variable('longitude'),
            chla = atlantis.data.get_standard_variable(
                'mass_concentration_of_chlorophyll_a_in_sea_water'
            ),
            xm = atlantis.data.Variable(),
            ym = atlantis.data.Variable(),
        )
        self.variables['time'].data = time_middle
        self.variables['time'].canonical_units = 'days since 0001-01-01 UTC' 
        #
        self.variables['height'].data = 0.
        self.variables['latitude'].data = lat
        self.variables['longitude'].data = lon
        self.variables['chla'].canonical_units = 'mg m-3'
        #
        self.variables['xm'].canonical_units = 'km'
        self.variables['xm'].description = 'Zonal distance.'
        self.variables['ym'].canonical_units = 'km'
        self.variables['ym'].description = 'Meridional distance.'
        self.variables['xm'].data, self.variables['ym'].data = (
            metergrid(self.variables['longitude'].data, 
            self.variables['latitude'].data, units='km')
        )
        return
Esempio n. 23
0
    def __init__(self,
                 path=None,
                 sensor='SeaWiFS',
                 resolution='9km',
                 mask_file=None,
                 xlim=None,
                 ylim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.data = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()

        # Sets global parameters for grid.
        if path == None:
            path = '/academia/data/raw/oceancolor'
        self.params['path'] = '%s/%s' % (path, sensor)
        self.params['mask_file'] = mask_file
        self.params['uuid'] = str(_uuid())
        self.params['var_list'] = ['chla']

        # Generates list of files, tries to match them to the pattern and to
        # extract the time. To help understanding the naming convetion and
        # pattern, see the following example:
        #   A20131612013168.L3m_8D_CHL_chlor_a_9km.bz2
        # resolution = '[0-9]+km'
        if sensor == 'SeaWiFS':
            sensor_prefix = 'S'
        elif sensor == 'MODISA':
            sensor_prefix = 'A'
        else:
            sensor = '.*'
        file_pattern = ('(%s)([0-9]{4})([0-9]{3})([0-9]{4})([0-9]{3}).(L3m)_'
                        '(8D)_(CHL)_(chlor_a)_(%s).bz2') % (sensor_prefix,
                                                            resolution)
        flist = listdir(self.params['path'])
        flist, match = _reglist(flist, file_pattern)
        self.params['file_list'] = flist

        # Reads first file in dataset to determine array geometry and
        # dimenstions (lon, lat)
        HDF = self._open_HDF(
            '%s/%s' % (self.params['path'], self.params['file_list'][0]))
        HDF_att = HDF.attributes()
        lon = arange(HDF_att['Westernmost Longitude'],
                     HDF_att['Easternmost Longitude'],
                     HDF_att['Longitude Step'])
        lat = arange(HDF_att['Northernmost Latitude'],
                     HDF_att['Southernmost Latitude'],
                     -HDF_att['Latitude Step'])

        # If lon_0 is set, calculate how many indices have to be moved in
        # order for latitude array to start at lon_0.
        lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(
            lon, lat, xlim, ylim)
        self.params['xlim'], self.params['ylim'] = xlim, ylim
        self.params['lon_i'], self.params['lat_j'] = ii, jj

        # Creates a structured array for start year, start day, end year and
        # end day. Aftwerwards, the dates are converted from julian day to
        # matplotlib format, i.e. days since 0001-01-01 UTC.
        time_list = array([('%s-01-01' % (item[1]), atof(item[2]), '%s-01-01' %
                            (item[3]), atof(item[4])) for item in match],
                          dtype=[('start_year', 'a10'), ('start_day', 'f2'),
                                 ('end_year', 'a10'), ('end_day', 'f2')])
        time_start = (dates.datestr2num(time_list['start_year']) +
                      time_list['start_day'] - 1)
        time_end = (dates.datestr2num(time_list['end_year']) +
                    time_list['end_day'] - 1)
        time_middle = 0.5 * (time_start + time_end)

        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'mass_concentration_of_chlorophyll_a_in_sea_water'
        self.description = (
            'Chlorophyll-a pigment concentration '
            'inferred from satellite visible light radiance measurements.')
        self.attributes['institution'] = HDF_att['Data Center']
        self.attributes['sensor name'] = HDF_att['Sensor Name']
        self.dimensions = dict(n=time_middle.size, k=0, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time',
                                k='height',
                                j='latitude',
                                i='longitude')
        self.variables = dict(
            time=atlantis.data.Variable(),
            height=atlantis.data.get_standard_variable('height'),
            latitude=atlantis.data.get_standard_variable('latitude'),
            longitude=atlantis.data.get_standard_variable('longitude'),
            chla=atlantis.data.get_standard_variable(
                'mass_concentration_of_chlorophyll_a_in_sea_water'),
            xm=atlantis.data.Variable(),
            ym=atlantis.data.Variable(),
        )
        self.variables['time'].data = time_middle
        self.variables['time'].canonical_units = 'days since 0001-01-01 UTC'
        #
        self.variables['height'].data = 0.
        self.variables['latitude'].data = lat
        self.variables['longitude'].data = lon
        self.variables['chla'].canonical_units = 'mg m-3'
        #
        self.variables['xm'].canonical_units = 'km'
        self.variables['xm'].description = 'Zonal distance.'
        self.variables['ym'].canonical_units = 'km'
        self.variables['ym'].description = 'Meridional distance.'
        self.variables['xm'].data, self.variables['ym'].data = (metergrid(
            self.variables['longitude'].data,
            self.variables['latitude'].data,
            units='km'))
        return