Exemple #1
0
    def save_parse_results_v6(self, id_to_metadata):
        """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param id_to_metadata: Mapping of IDs to metadata objects/
        :type id_to_metadata: { int: class:`job.seed.metadata.SeedMetadata` }
        """

        ids = id_to_metadata.keys()
        logger.debug('List of IDs to update: {}'.format(ids))
        source_file_ids = ScaleFile.objects.filter(id__in=ids, file_type='SOURCE').values_list('id', flat=True)
        ignored_ids = list(set(ids) - set(source_file_ids))
        if len(ignored_ids):
            logger.warning('Ignored all parse results for file IDs not of SOURCE file_type: {}'
                           .format(','.join(map(str, ignored_ids))))

        for file_id in source_file_ids:
            metadata = id_to_metadata[int(file_id)]

            geo_json = metadata.data
            data_started = metadata.get_property('dataStarted')
            data_ended = metadata.get_property('dataEnded')
            data_types = metadata.get_property('dataTypes', [])
            new_workspace_path = metadata.get_property('newWorkspacePath')

            if data_started:
                data_started = parse_datetime(data_started)
            if data_ended:
                data_ended = parse_datetime(data_ended)

            logger.debug('Captured input for file ID {}:\n{}\n{}\n{}\n{}'.format(file_id, geo_json,
                                                                                 data_started, data_ended, data_types))
            SourceFile.objects.save_parse_results(file_id, geo_json, data_started, data_ended, data_types,
                                                  new_workspace_path)
Exemple #2
0
    def save_parse_results(self, parse_results, input_file_ids):
        """See :meth:`job.configuration.data.data_file.AbstractDataFileParseSaver.save_parse_results`
        """

        file_name_to_id = {}
        source_files = ScaleFile.objects.filter(id__in=input_file_ids,
                                                file_type='SOURCE')
        for source_file in source_files:
            file_name_to_id[source_file.file_name] = source_file.id

        for file_name in parse_results:
            if file_name not in file_name_to_id:
                continue
            src_file_id = file_name_to_id[file_name]

            parse_result = parse_results[file_name]
            geo_json = parse_result[0]
            data_started = parse_result[1]
            data_ended = parse_result[2]
            data_types = parse_result[3]
            new_workspace_path = parse_result[4]
            if data_started:
                data_started = parse_datetime(data_started)
            if data_ended:
                data_ended = parse_datetime(data_ended)

            SourceFile.objects.save_parse_results(src_file_id, geo_json,
                                                  data_started, data_ended,
                                                  data_types,
                                                  new_workspace_path)
Exemple #3
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = CreateJobExecutionEnd()

        for job_exe_end_dict in json_dict['job_exe_end_models']:
            job_exe_end = JobExecutionEnd()
            task_results = TaskResults(job_exe_end_dict['task_results'],
                                       do_validate=False)

            job_exe_end.job_exe_id = job_exe_end_dict['id']
            job_exe_end.job_id = job_exe_end_dict['job_id']
            job_exe_end.job_type_id = job_exe_end_dict['job_type_id']
            job_exe_end.exe_num = job_exe_end_dict['exe_num']
            job_exe_end.task_results = job_exe_end_dict['task_results']
            job_exe_end.status = job_exe_end_dict['status']
            job_exe_end.queued = parse_datetime(job_exe_end_dict['queued'])
            job_exe_end.seed_started = task_results.get_task_started('main')
            job_exe_end.seed_ended = task_results.get_task_ended('main')
            job_exe_end.ended = parse_datetime(job_exe_end_dict['ended'])
            if 'error_id' in job_exe_end_dict:
                job_exe_end.error_id = job_exe_end_dict['error_id']
            if 'node_id' in job_exe_end_dict:
                job_exe_end.node_id = job_exe_end_dict['node_id']
            if 'started' in job_exe_end_dict:
                job_exe_end.started = job_exe_end_dict['started']
            message.add_job_exe_end(job_exe_end)

        return message
Exemple #4
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = RequeueJobsBulk()
        if 'current_job_id' in json_dict:
            message.current_job_id = json_dict['current_job_id']
        if 'started' in json_dict:
            message.started = parse_datetime(json_dict['started'])
        if 'ended' in json_dict:
            message.ended = parse_datetime(json_dict['ended'])
        if 'error_categories' in json_dict:
            message.error_categories = json_dict['error_categories']
        if 'error_ids' in json_dict:
            message.error_ids = json_dict['error_ids']
        if 'job_ids' in json_dict:
            message.job_ids = json_dict['job_ids']
        if 'job_type_ids' in json_dict:
            message.job_type_ids = json_dict['job_type_ids']
        if 'priority' in json_dict:
            message.priority = json_dict['priority']
        if 'status' in json_dict:
            message.status = json_dict['status']

        return message
Exemple #5
0
    def save_parse_results(self, parse_results, input_file_ids):
        """See :meth:`job.configuration.data.data_file.AbstractDataFileParseSaver.save_parse_results`
        """

        file_name_to_id = {}
        source_files = SourceFile.objects.filter(id__in=input_file_ids)
        for source_file in source_files:
            file_name_to_id[source_file.file_name] = source_file.id

        for file_name in parse_results:
            if file_name not in file_name_to_id:
                continue
            src_file_id = file_name_to_id[file_name]

            parse_result = parse_results[file_name]
            geo_json = parse_result[0]
            data_started = parse_result[1]
            data_ended = parse_result[2]
            data_types = parse_result[3]
            new_workspace_path = parse_result[4]
            if data_started:
                data_started = parse_datetime(data_started)
            if data_ended:
                data_ended = parse_datetime(data_ended)

            SourceFile.objects.save_parse_results(src_file_id, geo_json, data_started, data_ended, data_types,
                                                  new_workspace_path)
Exemple #6
0
    def test_successful(self, mock_save):
        """Tests calling SourceDataFileParseSaver.save_parse_results() successfully"""

        geo_json = {'type': 'Feature'}
        started = now()
        ended = started + datetime.timedelta(days=1)
        # quick hack to give these a valid timezone. Easier than creating a TZ object since we don't really care about the time for this test.
        started = parse_datetime(started.isoformat() + "Z")
        ended = parse_datetime(ended.isoformat() + "Z")

        file_ids = [
            self.source_file_1.id, self.source_file_2.id,
            self.extra_source_file_id
        ]
        parse_results = {
            self.file_name_1: (geo_json, started, None, [], None),
            self.file_name_2: (None, None, ended, [], None),
            'FILE_WITH_NO_SOURCE_FILE_MODEL': (None, None, None, None, None)
        }

        SourceDataFileParseSaver().save_parse_results(parse_results, file_ids)

        calls = [
            call(self.source_file_1.id, geo_json, started, None, [], None),
            call(self.source_file_2.id, None, None, ended, [], None)
        ]

        self.assertEqual(mock_save.call_count, 2)
        mock_save.assert_has_calls(calls, any_order=True)
Exemple #7
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = CancelJobsBulk()
        if 'current_job_id' in json_dict:
            message.current_job_id = json_dict['current_job_id']
        if 'started' in json_dict:
            message.started = parse_datetime(json_dict['started'])
        if 'ended' in json_dict:
            message.ended = parse_datetime(json_dict['ended'])
        if 'error_categories' in json_dict:
            message.error_categories = json_dict['error_categories']
        if 'error_ids' in json_dict:
            message.error_ids = json_dict['error_ids']
        if 'job_ids' in json_dict:
            message.job_ids = json_dict['job_ids']
        if 'job_type_ids' in json_dict:
            message.job_type_ids = json_dict['job_type_ids']
        if 'status' in json_dict:
            message.status = json_dict['status']
        if 'job_type_names' in json_dict:
            message.job_type_names = json_dict['job_type_names']
        if 'batch_ids' in json_dict:
            message.batch_ids = json_dict['batch_ids']
        if 'recipe_ids' in json_dict:
            message.recipe_ids = json_dict['recipe_ids']
        if 'is_superseded' in json_dict:
            message.is_superseded = json_dict['is_superseded']

        return message
    def test_successful_v6(self, mock_save):
        """Tests calling SourceDataFileParseSaver.save_parse_results_v6() successfully"""

        started = '2018-06-01T00:00:00Z'
        ended = '2018-06-01T01:00:00Z'
        types = ['one', 'two', 'three']
        new_workspace_path = 'awful/path'
        data = {
            'type': 'Feature',
            'geometry': {
                'type': 'Point',
                'coordinates': [0, 1]
            },
            'properties': {
                'dataStarted': started,
                'dataEnded': ended,
                'dataTypes': types,
                'newWorkspacePath': new_workspace_path
            }
        }

        metadata = {
            self.source_file_1.id:
            SeedMetadata.metadata_from_json(data, do_validate=False)
        }

        calls = [
            call(self.source_file_1.id, data, parse_datetime(started),
                 parse_datetime(ended), types, new_workspace_path)
        ]

        SourceDataFileParseSaver().save_parse_results_v6(metadata)

        self.assertEqual(mock_save.call_count, 1)
        mock_save.assert_has_calls(calls, any_order=True)
Exemple #9
0
    def __init__(self, definition):
        """Creates a batch definition object from the given dictionary. The general format is checked for correctness.

        :param definition: The batch definition
        :type definition: dict

        :raises :class:`batch.configuration.definition.exceptions.InvalidDefinition`:
            If the given definition is invalid
        """

        self._definition = definition

        try:
            validate(definition, BATCH_DEFINITION_SCHEMA)
        except ValidationError as ex:
            raise InvalidDefinition('', 'Invalid batch definition: %s' % unicode(ex))

        self._populate_default_values()
        if not self._definition['version'] == '1.0':
            raise InvalidDefinition('', '%s is an unsupported version number' % self._definition['version'])

        date_range = self._definition['date_range'] if 'date_range' in self._definition else None
        self.date_range_type = None
        if date_range and 'type' in date_range:
            self.date_range_type = date_range['type']

        self.started = None
        if date_range and 'started' in date_range:
            try:
                self.started = parse.parse_datetime(date_range['started'])
            except ValueError:
                raise InvalidDefinition('', 'Invalid start date format: %s' % date_range['started'])
        self.ended = None
        if date_range and 'ended' in date_range:
            try:
                self.ended = parse.parse_datetime(date_range['ended'])
            except ValueError:
                raise InvalidDefinition('', 'Invalid end date format: %s' % date_range['ended'])

        self.job_names = self._definition['job_names']
        self.all_jobs = self._definition['all_jobs']

        self.priority = None
        if 'priority' in self._definition:
            try:
                self.priority = self._definition['priority']
            except ValueError:
                raise InvalidDefinition('', 'Invalid priority: %s' % self._definition['priority'])

        self.trigger_rule = False
        self.trigger_config = None
        if 'trigger_rule' in self._definition:
            if isinstance(self._definition['trigger_rule'], bool):
                self.trigger_rule = self._definition['trigger_rule']
            else:
                self.trigger_config = BatchTriggerConfiguration('BATCH', self._definition['trigger_rule'])
Exemple #10
0
    def __init__(self, definition):
        """Creates a batch definition object from the given dictionary. The general format is checked for correctness.

        :param definition: The batch definition
        :type definition: dict

        :raises :class:`batch.configuration.definition.exceptions.InvalidDefinition`:
            If the given definition is invalid
        """

        self._definition = definition

        try:
            validate(definition, BATCH_DEFINITION_SCHEMA)
        except ValidationError as ex:
            raise InvalidDefinition("Invalid batch definition: %s" % unicode(ex))

        self._populate_default_values()
        if not self._definition["version"] == "1.0":
            raise InvalidDefinition("%s is an unsupported version number" % self._definition["version"])

        date_range = self._definition["date_range"] if "date_range" in self._definition else None
        self.date_range_type = None
        if date_range and "type" in date_range:
            self.date_range_type = date_range["type"]

        self.started = None
        if date_range and "started" in date_range:
            try:
                self.started = parse.parse_datetime(date_range["started"])
            except ValueError:
                raise InvalidDefinition("Invalid start date format: %s" % date_range["started"])
        self.ended = None
        if date_range and "ended" in date_range:
            try:
                self.ended = parse.parse_datetime(date_range["ended"])
            except ValueError:
                raise InvalidDefinition("Invalid end date format: %s" % date_range["ended"])

        self.job_names = self._definition["job_names"]
        self.all_jobs = self._definition["all_jobs"]

        self.priority = None
        if "priority" in self._definition:
            try:
                self.priority = self._definition["priority"]
            except ValueError:
                raise InvalidDefinition("Invalid priority: %s" % self._definition["priority"])
Exemple #11
0
def parse_datetime(request, name, default_value=None, required=True):
    """Parses a datetime parameter from the given request.

    :param request: The context of an active HTTP request.
    :type request: :class:`rest_framework.request.Request`
    :param name: The name of the parameter to parse.
    :type name: string
    :param default_value: The name of the parameter to parse.
    :type default_value: datetime.datetime
    :param required: Indicates whether or not the parameter is required. An exception will be raised if the parameter
        does not exist, there is no default value, and required is True.
    :type required: bool
    :returns: The value of the named parameter or the default value if provided.
    :rtype: datetime.datetime

    :raises :class:`util.rest.BadParameter`: If the value cannot be parsed.
    """
    value = _get_param(request, name, default_value, required)
    if not isinstance(value, basestring):
        return value

    try:
        result = parse_util.parse_datetime(value)
        if result:
            return result
        raise
    except:
        raise BadParameter(
            'Datetime values must follow ISO-8601 and include a timezone: %s' %
            name)
Exemple #12
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = RestartScheduler()
        message.when = parse_datetime(json_dict['when'])
        return message
Exemple #13
0
def parse_datetime(request, name, default_value=None, required=True):
    '''Parses a datetime parameter from the given request.

    :param request: The context of an active HTTP request.
    :type request: :class:`rest_framework.request.Request`
    :param name: The name of the parameter to parse.
    :type name: str
    :param default_value: The name of the parameter to parse.
    :type default_value: datetime.datetime
    :param required: Indicates whether or not the parameter is required. An exception will be raised if the parameter
        does not exist, there is no default value, and required is True.
    :type required: bool
    :returns: The value of the named parameter or the default value if provided.
    :rtype: datetime.datetime

    :raises :class:`util.rest.BadParameter`: If the value cannot be parsed.
    '''
    value = _get_param(request, name, default_value, required)
    if not isinstance(value, basestring):
        return value

    try:
        result = parse_util.parse_datetime(value)
        if result:
            return result
        raise
    except ParseError:
        raise BadParameter('Datetime value must include a timezone: %s' % name)
    except:
        raise BadParameter('Invalid datetime format for parameter: %s' % name)
Exemple #14
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = CancelJobs()
        message.when = parse_datetime(json_dict['when'])
        for job_id in json_dict['job_ids']:
            message.add_job(job_id)
        return message
Exemple #15
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        status_change = parse_datetime(json_dict['status_change'])

        message = BlockedJobs()
        message.status_change = status_change
        for job_id in json_dict['job_ids']:
            message.add_job(job_id)

        return message
Exemple #16
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = CompletedJobs()
        message.ended = parse_datetime(json_dict['ended'])

        for job_dict in json_dict['jobs']:
            job_id = job_dict['id']
            exe_num = job_dict['exe_num']
            message.add_completed_job(CompletedJob(job_id, exe_num))

        return message
    def test_successful(self, mock_save):
        '''Tests calling SourceDataFileParseSaver.save_parse_results() successfully'''

        geo_json = {u'type': u'Feature'}
        started = now()
        ended = started + datetime.timedelta(days=1)
        # quick hack to give these a valid timezone. Easier than creating a TZ object since we don't really care about the time for this test.
        started = parse_datetime(started.isoformat() + "Z")
        ended = parse_datetime(ended.isoformat() + "Z")

        file_ids = [self.source_file_1.id, self.source_file_2.id, self.extra_source_file_id]
        parse_results = {self.file_name_1: (geo_json, started, None, [], None, None),
                         self.file_name_2: (None, None, ended, [], None, None),
                         u'FILE_WITH_NO_SOURCE_FILE_MODEL': (None, None, None, None, None)}
        
        SourceDataFileParseSaver().save_parse_results(parse_results, file_ids)
        
        calls = [call(self.source_file_1.id, geo_json, started, None, [], None, None),
                 call(self.source_file_2.id, None, None, ended, [], None, None)]
        
        self.assertEqual(mock_save.call_count, 2)
        mock_save.assert_has_calls(calls, any_order=True)
Exemple #18
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        started = parse_datetime(json_dict['started'])
        message = RunningJobs(started)

        for node_dict in json_dict['nodes']:
            node_id = node_dict['id']
            for job_dict in node_dict['jobs']:
                job_id = job_dict['id']
                exe_num = job_dict['exe_num']
                message.add_running_job(job_id, exe_num, node_id)

        return message
Exemple #19
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = FailedJobs()
        message.ended = parse_datetime(json_dict['ended'])

        for error_dict in json_dict['errors']:
            error_id = error_dict['id']
            for job_dict in error_dict['jobs']:
                job_id = job_dict['id']
                exe_num = job_dict['exe_num']
                message.add_failed_job(FailedJob(job_id, exe_num, error_id))

        return message
Exemple #20
0
    def from_json(json_dict):
        """See :meth:`messaging.messages.message.CommandMessage.from_json`
        """

        message = SupersedeRecipeNodes()

        for recipe_id in json_dict['recipe_ids']:
            message.add_recipe(recipe_id)
        message.when = parse_datetime(json_dict['when'])
        message.supersede_all = json_dict['supersede_all']
        message.supersede_jobs = set(json_dict['supersede_jobs'])
        message.supersede_subrecipes = set(json_dict['supersede_subrecipes'])
        message.unpublish_all = json_dict['unpublish_all']
        message.unpublish_jobs = set(json_dict['unpublish_jobs'])
        message.supersede_recursive_all = json_dict['supersede_recursive_all']
        message.supersede_recursive = set(json_dict['supersede_recursive'])
        message.unpublish_recursive_all = json_dict['unpublish_recursive_all']
        message.unpublish_recursive = set(json_dict['unpublish_recursive'])

        return message
Exemple #21
0
            ],
            'level':
            'DEBUG',
        },
    },
}
LOG_CONSOLE_FILE_INFO = {
    'version': 1,
    'formatters': LOG_FORMATTERS,
    'filters': LOG_FILTERS,
    'handlers': LOG_HANDLERS,
    'loggers': {
        '': {
            'handlers': ['console', 'console-err', 'file-info', 'file-error'],
            'level': 'INFO',
        },
    },
}
LOGGING = LOG_CONSOLE_INFO

# Hack to fix ISO8601 for datetime filters.
# This should be taken care of by a future django fix.  And might even be handled
# by a newer version of django-rest-framework.  Unfortunately, both of these solutions
# will accept datetimes without timezone information which we do not want to allow
# see https://code.djangoproject.com/tickets/23448
# Solution modified from http://akinfold.blogspot.com/2012/12/datetimefield-doesnt-accept-iso-8601.html
from django.forms import fields
from util.parse import parse_datetime
fields.DateTimeField.strptime = lambda _self, datetime_string, _format: parse_datetime(
    datetime_string)
Exemple #22
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files to upload
        :type file_entries: list[:class:`product.types.ProductFileMetadata`]
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`storage.models.ScaleFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Get property names and values as strings
        properties = job_exe.job.get_job_data().get_all_properties()

        # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input
        # properties)
        input_strings = input_file_uuids
        input_strings.extend(properties)

        # Determine if any input files are non-operational products
        input_products = ScaleFile.objects.filter(
            id__in=[f['id'] for f in input_files], file_type='PRODUCT')
        input_products_operational = all(
            [f.is_operational for f in input_products])

        source_started = job_exe.job.source_started
        source_ended = job_exe.job.source_ended
        source_sensor_class = job_exe.job.source_sensor_class
        source_sensor = job_exe.job.source_sensor
        source_collection = job_exe.job.source_collection
        source_task = job_exe.job.source_task
        if not source_started:
            # Compute the overall start and stop times for all file_entries
            source_files = FileAncestryLink.objects.get_source_ancestors(
                [f['id'] for f in input_files])
            start_times = [f.data_started for f in source_files]
            end_times = [f.data_ended for f in source_files]
            start_times.sort()
            end_times.sort(reverse=True)
            if start_times:
                source_started = start_times[0]
            if end_times:
                source_ended = end_times[0]

        products_to_save = []
        for entry in file_entries:
            product = ProductFile.create()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(entry.local_path)
            file_size = os.path.getsize(entry.local_path)
            product.set_basic_fields(file_name, file_size, entry.media_type)
            product.file_path = entry.remote_path
            product.job_output = entry.output_name

            # Add a stable identifier based on the job type, input files, input properties, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_strings)

            # Add temporal info to product if available
            if entry.data_start:
                product.data_started = parse_datetime(entry.data_start)
            if entry.data_end:
                product.data_ended = parse_datetime(entry.data_end)

            if entry.geojson:
                geom, props = geo_utils.parse_geo_json(entry.geojson)
                product.geometry = geom
                if props:
                    product.meta_data = props
                product.center_point = geo_utils.get_center_point(geom)

            # Add recipe info to product if available.
            job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id)
            if job_recipe:
                product.recipe_id = job_recipe.recipe.id
                product.recipe_type = job_recipe.recipe.recipe_type
                product.recipe_node = job_recipe.node_name

                # Add batch info to product if available.
                try:
                    from batch.models import BatchJob
                    product.batch_id = BatchJob.objects.get(
                        job_id=job_exe.job_id).batch_id
                except BatchJob.DoesNotExist:
                    product.batch_id = None

            # Allow override, if set via side-car metadata, otherwise take derived values from above
            product.source_started = entry.source_started if entry.source_started else source_started
            product.source_ended = entry.source_ended if entry.source_ended else source_ended

            # Supplemental source metadata
            product.source_sensor_class = entry.source_sensor_class if entry.source_sensor_class else source_sensor_class
            product.source_sensor = entry.source_sensor if entry.source_sensor else source_sensor
            product.source_collection = entry.source_collection if entry.source_collection else source_collection
            product.source_task = entry.source_task if entry.source_task else source_task

            # Update product model with details derived from the job_type
            product.meta_data['url'] = product.url
            product.meta_data['job_name'] = job_exe.job_type.name
            product.meta_data[
                'job_version'] = job_exe.job_type.get_job_version()
            product.meta_data[
                'package_version'] = job_exe.job_type.get_package_version()

            products_to_save.append(FileUpload(product, entry.local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #23
0
 def test_parse_datetime(self):
     '''Tests parsing a valid ISO datetime.'''
     self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'),
                      datetime.datetime(2015, 1, 1, tzinfo=timezone.utc))
Exemple #24
0
    def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids,
                     job_exe, workspace):
        '''Uploads the given local product files into the workspace. All database changes will be made in an atomic
        transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same
        upload and work directories.

        :param upload_dir: Absolute path to the local directory of the files to upload
        :type upload_dir: str
        :param work_dir: Absolute path to a local work directory available to assist in uploading
        :type work_dir: str
        :param file_entries: List of files where each file is a tuple of (source path relative to upload directory,
            workspace path for storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        '''

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(
            file__in=[f['id'] for f in input_files])
        input_products_operational = all(
            [f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            product.media_type = media_type

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            file_name = os.path.basename(local_path)
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(
                        geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(
                        geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(
                        geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append((product, local_path, remote_path))

        return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace,
                                              products_to_save)
Exemple #25
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type, output_name)
        :type file_entries: list of tuple(str, str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`storage.models.ScaleFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Get property names and values as strings
        properties = job_exe.job.get_job_data().get_all_properties()

        # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input
        # properties)
        input_strings = input_file_uuids
        input_strings.extend(properties)

        # Determine if any input files are non-operational products
        input_products = ScaleFile.objects.filter(id__in=[f['id'] for f in input_files], file_type='PRODUCT')
        input_products_operational = all([f.is_operational for f in input_products])

        # Compute the overall start and stop times for all file_entries
        source_files = FileAncestryLink.objects.get_source_ancestors([f['id'] for f in input_files])
        start_times = [f.data_started for f in source_files]
        end_times = [f.data_ended for f in source_files]
        start_times.sort()
        end_times.sort(reverse=True)

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]
            output_name = entry[3]

            product = ProductFile.create()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path
            product.job_output = output_name

            # Add a stable identifier based on the job type, input files, input properties, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings)

            # Add geospatial info to product if available
            if len(entry) > 4:
                geo_metadata = entry[4]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    if props:
                        product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            # Add recipe info to product if available.
            job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id)
            if job_recipe:
                product.recipe_id = job_recipe.recipe.id
                product.recipe_type = job_recipe.recipe.recipe_type
                product.recipe_job = job_recipe.job_name

                # Add batch info to product if available.
                try:
                    from batch.models import BatchJob
                    product.batch_id = BatchJob.objects.get(job_id=job_exe.job_id).batch_id
                except BatchJob.DoesNotExist:
                    product.batch_id = None

            # Add start and stop times if available
            if start_times:
                product.source_started = start_times[0]

            if end_times:
                product.source_ended = end_times[0]

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #26
0
 def test_parse_datetime(self):
     """Tests parsing a valid ISO datetime."""
     self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'),
                      datetime.datetime(2015, 1, 1, tzinfo=utc))
Exemple #27
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files])
        input_products_operational = all([f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #28
0
 def test_parse_datetime(self):
     """Tests parsing a valid ISO datetime."""
     self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'),
                      datetime.datetime(2015, 1, 1, tzinfo=timezone.utc))
Exemple #29
0
 def test_parse_datetime_invalid(self):
     """Tests parsing an invalid ISO datetime."""
     self.assertIsNone(parse_util.parse_datetime('20150101T00:00:00Z'))
Exemple #30
0
        '': {
            'handlers': ['console', 'console-err', 'file-debug', 'file-info', 'file-error'],
            'level': 'DEBUG',
        },
    },
}
LOG_CONSOLE_FILE_INFO = {
    'version': 1,
    'formatters': LOG_FORMATTERS,
    'filters': LOG_FILTERS,
    'handlers': LOG_HANDLERS,
    'loggers': {
        '': {
            'handlers': ['console', 'console-err', 'file-info', 'file-error'],
            'level': 'INFO',
        },
    },
}
LOGGING = LOG_CONSOLE_INFO


# Hack to fix ISO8601 for datetime filters.
# This should be taken care of by a future django fix.  And might even be handled
# by a newer version of django-rest-framework.  Unfortunately, both of these solutions
# will accept datetimes without timezone information which we do not want to allow
# see https://code.djangoproject.com/tickets/23448
# Solution modified from http://akinfold.blogspot.com/2012/12/datetimefield-doesnt-accept-iso-8601.html
from django.forms import fields
from util.parse import parse_datetime
fields.DateTimeField.strptime = lambda _self, datetime_string, _format: parse_datetime(datetime_string)
Exemple #31
0
 def test_parse_datetime_invalid(self):
     """Tests parsing an invalid ISO datetime."""
     self.assertIsNone(parse_util.parse_datetime('20150101T00:00:00Z'))
Exemple #32
0
    def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace):
        '''Uploads the given local product files into the workspace. All database changes will be made in an atomic
        transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same
        upload and work directories.

        :param upload_dir: Absolute path to the local directory of the files to upload
        :type upload_dir: str
        :param work_dir: Absolute path to a local work directory available to assist in uploading
        :type work_dir: str
        :param file_entries: List of files where each file is a tuple of (source path relative to upload directory,
            workspace path for storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        '''

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files])
        input_products_operational = all([f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            product.media_type = media_type

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            file_name = os.path.basename(local_path)
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append((product, local_path, remote_path))

        return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
Exemple #33
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(
            file__in=[f['id'] for f in input_files])
        input_products_operational = all(
            [f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(
                        geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(
                        geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(
                        geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)