def save_parse_results_v6(self, id_to_metadata): """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param id_to_metadata: Mapping of IDs to metadata objects/ :type id_to_metadata: { int: class:`job.seed.metadata.SeedMetadata` } """ ids = id_to_metadata.keys() logger.debug('List of IDs to update: {}'.format(ids)) source_file_ids = ScaleFile.objects.filter(id__in=ids, file_type='SOURCE').values_list('id', flat=True) ignored_ids = list(set(ids) - set(source_file_ids)) if len(ignored_ids): logger.warning('Ignored all parse results for file IDs not of SOURCE file_type: {}' .format(','.join(map(str, ignored_ids)))) for file_id in source_file_ids: metadata = id_to_metadata[int(file_id)] geo_json = metadata.data data_started = metadata.get_property('dataStarted') data_ended = metadata.get_property('dataEnded') data_types = metadata.get_property('dataTypes', []) new_workspace_path = metadata.get_property('newWorkspacePath') if data_started: data_started = parse_datetime(data_started) if data_ended: data_ended = parse_datetime(data_ended) logger.debug('Captured input for file ID {}:\n{}\n{}\n{}\n{}'.format(file_id, geo_json, data_started, data_ended, data_types)) SourceFile.objects.save_parse_results(file_id, geo_json, data_started, data_ended, data_types, new_workspace_path)
def save_parse_results(self, parse_results, input_file_ids): """See :meth:`job.configuration.data.data_file.AbstractDataFileParseSaver.save_parse_results` """ file_name_to_id = {} source_files = ScaleFile.objects.filter(id__in=input_file_ids, file_type='SOURCE') for source_file in source_files: file_name_to_id[source_file.file_name] = source_file.id for file_name in parse_results: if file_name not in file_name_to_id: continue src_file_id = file_name_to_id[file_name] parse_result = parse_results[file_name] geo_json = parse_result[0] data_started = parse_result[1] data_ended = parse_result[2] data_types = parse_result[3] new_workspace_path = parse_result[4] if data_started: data_started = parse_datetime(data_started) if data_ended: data_ended = parse_datetime(data_ended) SourceFile.objects.save_parse_results(src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path)
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = CreateJobExecutionEnd() for job_exe_end_dict in json_dict['job_exe_end_models']: job_exe_end = JobExecutionEnd() task_results = TaskResults(job_exe_end_dict['task_results'], do_validate=False) job_exe_end.job_exe_id = job_exe_end_dict['id'] job_exe_end.job_id = job_exe_end_dict['job_id'] job_exe_end.job_type_id = job_exe_end_dict['job_type_id'] job_exe_end.exe_num = job_exe_end_dict['exe_num'] job_exe_end.task_results = job_exe_end_dict['task_results'] job_exe_end.status = job_exe_end_dict['status'] job_exe_end.queued = parse_datetime(job_exe_end_dict['queued']) job_exe_end.seed_started = task_results.get_task_started('main') job_exe_end.seed_ended = task_results.get_task_ended('main') job_exe_end.ended = parse_datetime(job_exe_end_dict['ended']) if 'error_id' in job_exe_end_dict: job_exe_end.error_id = job_exe_end_dict['error_id'] if 'node_id' in job_exe_end_dict: job_exe_end.node_id = job_exe_end_dict['node_id'] if 'started' in job_exe_end_dict: job_exe_end.started = job_exe_end_dict['started'] message.add_job_exe_end(job_exe_end) return message
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = RequeueJobsBulk() if 'current_job_id' in json_dict: message.current_job_id = json_dict['current_job_id'] if 'started' in json_dict: message.started = parse_datetime(json_dict['started']) if 'ended' in json_dict: message.ended = parse_datetime(json_dict['ended']) if 'error_categories' in json_dict: message.error_categories = json_dict['error_categories'] if 'error_ids' in json_dict: message.error_ids = json_dict['error_ids'] if 'job_ids' in json_dict: message.job_ids = json_dict['job_ids'] if 'job_type_ids' in json_dict: message.job_type_ids = json_dict['job_type_ids'] if 'priority' in json_dict: message.priority = json_dict['priority'] if 'status' in json_dict: message.status = json_dict['status'] return message
def save_parse_results(self, parse_results, input_file_ids): """See :meth:`job.configuration.data.data_file.AbstractDataFileParseSaver.save_parse_results` """ file_name_to_id = {} source_files = SourceFile.objects.filter(id__in=input_file_ids) for source_file in source_files: file_name_to_id[source_file.file_name] = source_file.id for file_name in parse_results: if file_name not in file_name_to_id: continue src_file_id = file_name_to_id[file_name] parse_result = parse_results[file_name] geo_json = parse_result[0] data_started = parse_result[1] data_ended = parse_result[2] data_types = parse_result[3] new_workspace_path = parse_result[4] if data_started: data_started = parse_datetime(data_started) if data_ended: data_ended = parse_datetime(data_ended) SourceFile.objects.save_parse_results(src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path)
def test_successful(self, mock_save): """Tests calling SourceDataFileParseSaver.save_parse_results() successfully""" geo_json = {'type': 'Feature'} started = now() ended = started + datetime.timedelta(days=1) # quick hack to give these a valid timezone. Easier than creating a TZ object since we don't really care about the time for this test. started = parse_datetime(started.isoformat() + "Z") ended = parse_datetime(ended.isoformat() + "Z") file_ids = [ self.source_file_1.id, self.source_file_2.id, self.extra_source_file_id ] parse_results = { self.file_name_1: (geo_json, started, None, [], None), self.file_name_2: (None, None, ended, [], None), 'FILE_WITH_NO_SOURCE_FILE_MODEL': (None, None, None, None, None) } SourceDataFileParseSaver().save_parse_results(parse_results, file_ids) calls = [ call(self.source_file_1.id, geo_json, started, None, [], None), call(self.source_file_2.id, None, None, ended, [], None) ] self.assertEqual(mock_save.call_count, 2) mock_save.assert_has_calls(calls, any_order=True)
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = CancelJobsBulk() if 'current_job_id' in json_dict: message.current_job_id = json_dict['current_job_id'] if 'started' in json_dict: message.started = parse_datetime(json_dict['started']) if 'ended' in json_dict: message.ended = parse_datetime(json_dict['ended']) if 'error_categories' in json_dict: message.error_categories = json_dict['error_categories'] if 'error_ids' in json_dict: message.error_ids = json_dict['error_ids'] if 'job_ids' in json_dict: message.job_ids = json_dict['job_ids'] if 'job_type_ids' in json_dict: message.job_type_ids = json_dict['job_type_ids'] if 'status' in json_dict: message.status = json_dict['status'] if 'job_type_names' in json_dict: message.job_type_names = json_dict['job_type_names'] if 'batch_ids' in json_dict: message.batch_ids = json_dict['batch_ids'] if 'recipe_ids' in json_dict: message.recipe_ids = json_dict['recipe_ids'] if 'is_superseded' in json_dict: message.is_superseded = json_dict['is_superseded'] return message
def test_successful_v6(self, mock_save): """Tests calling SourceDataFileParseSaver.save_parse_results_v6() successfully""" started = '2018-06-01T00:00:00Z' ended = '2018-06-01T01:00:00Z' types = ['one', 'two', 'three'] new_workspace_path = 'awful/path' data = { 'type': 'Feature', 'geometry': { 'type': 'Point', 'coordinates': [0, 1] }, 'properties': { 'dataStarted': started, 'dataEnded': ended, 'dataTypes': types, 'newWorkspacePath': new_workspace_path } } metadata = { self.source_file_1.id: SeedMetadata.metadata_from_json(data, do_validate=False) } calls = [ call(self.source_file_1.id, data, parse_datetime(started), parse_datetime(ended), types, new_workspace_path) ] SourceDataFileParseSaver().save_parse_results_v6(metadata) self.assertEqual(mock_save.call_count, 1) mock_save.assert_has_calls(calls, any_order=True)
def __init__(self, definition): """Creates a batch definition object from the given dictionary. The general format is checked for correctness. :param definition: The batch definition :type definition: dict :raises :class:`batch.configuration.definition.exceptions.InvalidDefinition`: If the given definition is invalid """ self._definition = definition try: validate(definition, BATCH_DEFINITION_SCHEMA) except ValidationError as ex: raise InvalidDefinition('', 'Invalid batch definition: %s' % unicode(ex)) self._populate_default_values() if not self._definition['version'] == '1.0': raise InvalidDefinition('', '%s is an unsupported version number' % self._definition['version']) date_range = self._definition['date_range'] if 'date_range' in self._definition else None self.date_range_type = None if date_range and 'type' in date_range: self.date_range_type = date_range['type'] self.started = None if date_range and 'started' in date_range: try: self.started = parse.parse_datetime(date_range['started']) except ValueError: raise InvalidDefinition('', 'Invalid start date format: %s' % date_range['started']) self.ended = None if date_range and 'ended' in date_range: try: self.ended = parse.parse_datetime(date_range['ended']) except ValueError: raise InvalidDefinition('', 'Invalid end date format: %s' % date_range['ended']) self.job_names = self._definition['job_names'] self.all_jobs = self._definition['all_jobs'] self.priority = None if 'priority' in self._definition: try: self.priority = self._definition['priority'] except ValueError: raise InvalidDefinition('', 'Invalid priority: %s' % self._definition['priority']) self.trigger_rule = False self.trigger_config = None if 'trigger_rule' in self._definition: if isinstance(self._definition['trigger_rule'], bool): self.trigger_rule = self._definition['trigger_rule'] else: self.trigger_config = BatchTriggerConfiguration('BATCH', self._definition['trigger_rule'])
def __init__(self, definition): """Creates a batch definition object from the given dictionary. The general format is checked for correctness. :param definition: The batch definition :type definition: dict :raises :class:`batch.configuration.definition.exceptions.InvalidDefinition`: If the given definition is invalid """ self._definition = definition try: validate(definition, BATCH_DEFINITION_SCHEMA) except ValidationError as ex: raise InvalidDefinition("Invalid batch definition: %s" % unicode(ex)) self._populate_default_values() if not self._definition["version"] == "1.0": raise InvalidDefinition("%s is an unsupported version number" % self._definition["version"]) date_range = self._definition["date_range"] if "date_range" in self._definition else None self.date_range_type = None if date_range and "type" in date_range: self.date_range_type = date_range["type"] self.started = None if date_range and "started" in date_range: try: self.started = parse.parse_datetime(date_range["started"]) except ValueError: raise InvalidDefinition("Invalid start date format: %s" % date_range["started"]) self.ended = None if date_range and "ended" in date_range: try: self.ended = parse.parse_datetime(date_range["ended"]) except ValueError: raise InvalidDefinition("Invalid end date format: %s" % date_range["ended"]) self.job_names = self._definition["job_names"] self.all_jobs = self._definition["all_jobs"] self.priority = None if "priority" in self._definition: try: self.priority = self._definition["priority"] except ValueError: raise InvalidDefinition("Invalid priority: %s" % self._definition["priority"])
def parse_datetime(request, name, default_value=None, required=True): """Parses a datetime parameter from the given request. :param request: The context of an active HTTP request. :type request: :class:`rest_framework.request.Request` :param name: The name of the parameter to parse. :type name: string :param default_value: The name of the parameter to parse. :type default_value: datetime.datetime :param required: Indicates whether or not the parameter is required. An exception will be raised if the parameter does not exist, there is no default value, and required is True. :type required: bool :returns: The value of the named parameter or the default value if provided. :rtype: datetime.datetime :raises :class:`util.rest.BadParameter`: If the value cannot be parsed. """ value = _get_param(request, name, default_value, required) if not isinstance(value, basestring): return value try: result = parse_util.parse_datetime(value) if result: return result raise except: raise BadParameter( 'Datetime values must follow ISO-8601 and include a timezone: %s' % name)
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = RestartScheduler() message.when = parse_datetime(json_dict['when']) return message
def parse_datetime(request, name, default_value=None, required=True): '''Parses a datetime parameter from the given request. :param request: The context of an active HTTP request. :type request: :class:`rest_framework.request.Request` :param name: The name of the parameter to parse. :type name: str :param default_value: The name of the parameter to parse. :type default_value: datetime.datetime :param required: Indicates whether or not the parameter is required. An exception will be raised if the parameter does not exist, there is no default value, and required is True. :type required: bool :returns: The value of the named parameter or the default value if provided. :rtype: datetime.datetime :raises :class:`util.rest.BadParameter`: If the value cannot be parsed. ''' value = _get_param(request, name, default_value, required) if not isinstance(value, basestring): return value try: result = parse_util.parse_datetime(value) if result: return result raise except ParseError: raise BadParameter('Datetime value must include a timezone: %s' % name) except: raise BadParameter('Invalid datetime format for parameter: %s' % name)
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = CancelJobs() message.when = parse_datetime(json_dict['when']) for job_id in json_dict['job_ids']: message.add_job(job_id) return message
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ status_change = parse_datetime(json_dict['status_change']) message = BlockedJobs() message.status_change = status_change for job_id in json_dict['job_ids']: message.add_job(job_id) return message
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = CompletedJobs() message.ended = parse_datetime(json_dict['ended']) for job_dict in json_dict['jobs']: job_id = job_dict['id'] exe_num = job_dict['exe_num'] message.add_completed_job(CompletedJob(job_id, exe_num)) return message
def test_successful(self, mock_save): '''Tests calling SourceDataFileParseSaver.save_parse_results() successfully''' geo_json = {u'type': u'Feature'} started = now() ended = started + datetime.timedelta(days=1) # quick hack to give these a valid timezone. Easier than creating a TZ object since we don't really care about the time for this test. started = parse_datetime(started.isoformat() + "Z") ended = parse_datetime(ended.isoformat() + "Z") file_ids = [self.source_file_1.id, self.source_file_2.id, self.extra_source_file_id] parse_results = {self.file_name_1: (geo_json, started, None, [], None, None), self.file_name_2: (None, None, ended, [], None, None), u'FILE_WITH_NO_SOURCE_FILE_MODEL': (None, None, None, None, None)} SourceDataFileParseSaver().save_parse_results(parse_results, file_ids) calls = [call(self.source_file_1.id, geo_json, started, None, [], None, None), call(self.source_file_2.id, None, None, ended, [], None, None)] self.assertEqual(mock_save.call_count, 2) mock_save.assert_has_calls(calls, any_order=True)
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ started = parse_datetime(json_dict['started']) message = RunningJobs(started) for node_dict in json_dict['nodes']: node_id = node_dict['id'] for job_dict in node_dict['jobs']: job_id = job_dict['id'] exe_num = job_dict['exe_num'] message.add_running_job(job_id, exe_num, node_id) return message
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = FailedJobs() message.ended = parse_datetime(json_dict['ended']) for error_dict in json_dict['errors']: error_id = error_dict['id'] for job_dict in error_dict['jobs']: job_id = job_dict['id'] exe_num = job_dict['exe_num'] message.add_failed_job(FailedJob(job_id, exe_num, error_id)) return message
def from_json(json_dict): """See :meth:`messaging.messages.message.CommandMessage.from_json` """ message = SupersedeRecipeNodes() for recipe_id in json_dict['recipe_ids']: message.add_recipe(recipe_id) message.when = parse_datetime(json_dict['when']) message.supersede_all = json_dict['supersede_all'] message.supersede_jobs = set(json_dict['supersede_jobs']) message.supersede_subrecipes = set(json_dict['supersede_subrecipes']) message.unpublish_all = json_dict['unpublish_all'] message.unpublish_jobs = set(json_dict['unpublish_jobs']) message.supersede_recursive_all = json_dict['supersede_recursive_all'] message.supersede_recursive = set(json_dict['supersede_recursive']) message.unpublish_recursive_all = json_dict['unpublish_recursive_all'] message.unpublish_recursive = set(json_dict['unpublish_recursive']) return message
], 'level': 'DEBUG', }, }, } LOG_CONSOLE_FILE_INFO = { 'version': 1, 'formatters': LOG_FORMATTERS, 'filters': LOG_FILTERS, 'handlers': LOG_HANDLERS, 'loggers': { '': { 'handlers': ['console', 'console-err', 'file-info', 'file-error'], 'level': 'INFO', }, }, } LOGGING = LOG_CONSOLE_INFO # Hack to fix ISO8601 for datetime filters. # This should be taken care of by a future django fix. And might even be handled # by a newer version of django-rest-framework. Unfortunately, both of these solutions # will accept datetimes without timezone information which we do not want to allow # see https://code.djangoproject.com/tickets/23448 # Solution modified from http://akinfold.blogspot.com/2012/12/datetimefield-doesnt-accept-iso-8601.html from django.forms import fields from util.parse import parse_datetime fields.DateTimeField.strptime = lambda _self, datetime_string, _format: parse_datetime( datetime_string)
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files to upload :type file_entries: list[:class:`product.types.ProductFileMetadata`] :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`storage.models.ScaleFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Get property names and values as strings properties = job_exe.job.get_job_data().get_all_properties() # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input # properties) input_strings = input_file_uuids input_strings.extend(properties) # Determine if any input files are non-operational products input_products = ScaleFile.objects.filter( id__in=[f['id'] for f in input_files], file_type='PRODUCT') input_products_operational = all( [f.is_operational for f in input_products]) source_started = job_exe.job.source_started source_ended = job_exe.job.source_ended source_sensor_class = job_exe.job.source_sensor_class source_sensor = job_exe.job.source_sensor source_collection = job_exe.job.source_collection source_task = job_exe.job.source_task if not source_started: # Compute the overall start and stop times for all file_entries source_files = FileAncestryLink.objects.get_source_ancestors( [f['id'] for f in input_files]) start_times = [f.data_started for f in source_files] end_times = [f.data_ended for f in source_files] start_times.sort() end_times.sort(reverse=True) if start_times: source_started = start_times[0] if end_times: source_ended = end_times[0] products_to_save = [] for entry in file_entries: product = ProductFile.create() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(entry.local_path) file_size = os.path.getsize(entry.local_path) product.set_basic_fields(file_name, file_size, entry.media_type) product.file_path = entry.remote_path product.job_output = entry.output_name # Add a stable identifier based on the job type, input files, input properties, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings) # Add temporal info to product if available if entry.data_start: product.data_started = parse_datetime(entry.data_start) if entry.data_end: product.data_ended = parse_datetime(entry.data_end) if entry.geojson: geom, props = geo_utils.parse_geo_json(entry.geojson) product.geometry = geom if props: product.meta_data = props product.center_point = geo_utils.get_center_point(geom) # Add recipe info to product if available. job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id) if job_recipe: product.recipe_id = job_recipe.recipe.id product.recipe_type = job_recipe.recipe.recipe_type product.recipe_node = job_recipe.node_name # Add batch info to product if available. try: from batch.models import BatchJob product.batch_id = BatchJob.objects.get( job_id=job_exe.job_id).batch_id except BatchJob.DoesNotExist: product.batch_id = None # Allow override, if set via side-car metadata, otherwise take derived values from above product.source_started = entry.source_started if entry.source_started else source_started product.source_ended = entry.source_ended if entry.source_ended else source_ended # Supplemental source metadata product.source_sensor_class = entry.source_sensor_class if entry.source_sensor_class else source_sensor_class product.source_sensor = entry.source_sensor if entry.source_sensor else source_sensor product.source_collection = entry.source_collection if entry.source_collection else source_collection product.source_task = entry.source_task if entry.source_task else source_task # Update product model with details derived from the job_type product.meta_data['url'] = product.url product.meta_data['job_name'] = job_exe.job_type.name product.meta_data[ 'job_version'] = job_exe.job_type.get_job_version() product.meta_data[ 'package_version'] = job_exe.job_type.get_package_version() products_to_save.append(FileUpload(product, entry.local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def test_parse_datetime(self): '''Tests parsing a valid ISO datetime.''' self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'), datetime.datetime(2015, 1, 1, tzinfo=timezone.utc))
def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace): '''Uploads the given local product files into the workspace. All database changes will be made in an atomic transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same upload and work directories. :param upload_dir: Absolute path to the local directory of the files to upload :type upload_dir: str :param work_dir: Absolute path to a local work directory available to assist in uploading :type work_dir: str :param file_entries: List of files where each file is a tuple of (source path relative to upload directory, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` ''' # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter( file__in=[f['id'] for f in input_files]) input_products_operational = all( [f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational product.media_type = media_type # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs file_name = os.path.basename(local_path) product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime( geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime( geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json( geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append((product, local_path, remote_path)) return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type, output_name) :type file_entries: list of tuple(str, str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`storage.models.ScaleFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Get property names and values as strings properties = job_exe.job.get_job_data().get_all_properties() # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input # properties) input_strings = input_file_uuids input_strings.extend(properties) # Determine if any input files are non-operational products input_products = ScaleFile.objects.filter(id__in=[f['id'] for f in input_files], file_type='PRODUCT') input_products_operational = all([f.is_operational for f in input_products]) # Compute the overall start and stop times for all file_entries source_files = FileAncestryLink.objects.get_source_ancestors([f['id'] for f in input_files]) start_times = [f.data_started for f in source_files] end_times = [f.data_ended for f in source_files] start_times.sort() end_times.sort(reverse=True) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] output_name = entry[3] product = ProductFile.create() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path product.job_output = output_name # Add a stable identifier based on the job type, input files, input properties, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings) # Add geospatial info to product if available if len(entry) > 4: geo_metadata = entry[4] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom if props: product.meta_data = props product.center_point = geo_utils.get_center_point(geom) # Add recipe info to product if available. job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id) if job_recipe: product.recipe_id = job_recipe.recipe.id product.recipe_type = job_recipe.recipe.recipe_type product.recipe_job = job_recipe.job_name # Add batch info to product if available. try: from batch.models import BatchJob product.batch_id = BatchJob.objects.get(job_id=job_exe.job_id).batch_id except BatchJob.DoesNotExist: product.batch_id = None # Add start and stop times if available if start_times: product.source_started = start_times[0] if end_times: product.source_ended = end_times[0] products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def test_parse_datetime(self): """Tests parsing a valid ISO datetime.""" self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'), datetime.datetime(2015, 1, 1, tzinfo=utc))
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files]) input_products_operational = all([f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def test_parse_datetime(self): """Tests parsing a valid ISO datetime.""" self.assertEqual(parse_util.parse_datetime('2015-01-01T00:00:00Z'), datetime.datetime(2015, 1, 1, tzinfo=timezone.utc))
def test_parse_datetime_invalid(self): """Tests parsing an invalid ISO datetime.""" self.assertIsNone(parse_util.parse_datetime('20150101T00:00:00Z'))
'': { 'handlers': ['console', 'console-err', 'file-debug', 'file-info', 'file-error'], 'level': 'DEBUG', }, }, } LOG_CONSOLE_FILE_INFO = { 'version': 1, 'formatters': LOG_FORMATTERS, 'filters': LOG_FILTERS, 'handlers': LOG_HANDLERS, 'loggers': { '': { 'handlers': ['console', 'console-err', 'file-info', 'file-error'], 'level': 'INFO', }, }, } LOGGING = LOG_CONSOLE_INFO # Hack to fix ISO8601 for datetime filters. # This should be taken care of by a future django fix. And might even be handled # by a newer version of django-rest-framework. Unfortunately, both of these solutions # will accept datetimes without timezone information which we do not want to allow # see https://code.djangoproject.com/tickets/23448 # Solution modified from http://akinfold.blogspot.com/2012/12/datetimefield-doesnt-accept-iso-8601.html from django.forms import fields from util.parse import parse_datetime fields.DateTimeField.strptime = lambda _self, datetime_string, _format: parse_datetime(datetime_string)
def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace): '''Uploads the given local product files into the workspace. All database changes will be made in an atomic transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same upload and work directories. :param upload_dir: Absolute path to the local directory of the files to upload :type upload_dir: str :param work_dir: Absolute path to a local work directory available to assist in uploading :type work_dir: str :param file_entries: List of files where each file is a tuple of (source path relative to upload directory, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` ''' # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files]) input_products_operational = all([f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational product.media_type = media_type # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs file_name = os.path.basename(local_path) product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append((product, local_path, remote_path)) return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter( file__in=[f['id'] for f in input_files]) input_products_operational = all( [f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime( geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime( geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json( geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)