def update_border(self, name, border, effective=None): """Updates the country border geometry for an existing country, adding a new entry for the new effective date. :param name: The name of an existing country :type name: str :param border: The new border geometry. Either GEOSGeometry or geojson (which will be converted to GEOSGeometry) :type border: GEOSGeometry or str :param effective: The effective date for the new border. If None, now() will be used :type data_started: :class:`datetime.datetime` """ if not isinstance(border, geos.geometry.GEOSGeometry): border, _ = geospatial_utils.parse_geo_json(border) # Acquire model lock cur = self.get(name=name) if cur: new_item = CountryData(name=cur.name, fips=cur.fips, gmi=cur.gmi, iso2=cur.iso2, iso3=cur.iso3, iso_num=cur.iso_num, border=border, effective=effective) new_item.save()
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path): """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: [string] :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str """ geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = SourceFile.objects.select_for_update().get(pk=src_file_id) src_file.is_parsed = True src_file.parsed = now() src_file.data_started = data_started src_file.data_ended = data_ended target_date = src_file.data_started if target_date is None: target_date = src_file.data_ended if target_date is None: target_date = src_file.created for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() # Move the source file if a new workspace path is provided and the workspace allows it old_workspace_path = src_file.file_path if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files([FileMove(src_file, new_workspace_path)]) try: # Check trigger rules for parsed source files ParseTriggerHandler().process_parsed_source_file(src_file) except Exception: # Move file back if there was an error if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files([FileMove(src_file, old_workspace_path)]) raise
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path, work_dir): '''Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: list :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str :param work_dir: Absolute path to a local work directory available to assist in moving the source file. Only needed if new_workspace_path is not None. :type work_dir: str ''' geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = SourceFile.objects.select_for_update().get(pk=src_file_id) src_file.is_parsed = True src_file.parsed = now() src_file.data_started = data_started src_file.data_ended = data_ended target_date = src_file.data_started if target_date is None: target_date = src_file.data_ended if target_date is None: target_date = src_file.created for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() # Move the source file if a new workspace path is provided if new_workspace_path: ScaleFile.objects.move_files(work_dir, [(src_file, new_workspace_path)]) for parse_rule in get_parse_rules(): parse_rule.process_parse(src_file)
def test_parse_geo_json(self): '''Tests parsing geojson''' geo_json = {u'geometry': {u'type': u'POLYGON', u'coordinates': [[[40, 26], [50, 27], [60, 26], [50, 25], [40, 26]]]}, u'type': u'Feature'} # Call method to test geom, props = geo_utils.parse_geo_json(geo_json) # Check results self.assertEqual(geom.geom_type, u'Polygon') self.assertIsNone(props)
def test_parse_geo_json(self): """Tests parsing geojson""" geo_json = {'geometry': {'type': 'POLYGON', 'coordinates': [[[40, 26], [50, 27], [60, 26], [50, 25], [40, 26]]]}, 'type': 'Feature'} # Call method to test geom, props = geo_utils.parse_geo_json(geo_json) # Check results self.assertEqual(geom.geom_type, 'Polygon') self.assertIsNone(props)
def test_get_center_point(self): '''Tests calculating center point''' geo_json = { "type": "Polygon", "coordinates": [[[ 1.0, 10.0 ], [ 2.0, 10.0 ], [ 2.0, 20.0 ],[ 1.0, 20.0 ], [ 1.0, 10.0 ]]] } # Call method to test geom, props = geo_utils.parse_geo_json(geo_json) center = geo_utils.get_center_point(geom) # Check results self.assertEqual(center.geom_type, u'Point') self.assertEqual(center.coords, (1.5, 15.0))
def test_get_center_point(self): """Tests calculating center point""" geo_json = { "type": "Polygon", "coordinates": [[[ 1.0, 10.0 ], [ 2.0, 10.0 ], [ 2.0, 20.0 ],[ 1.0, 20.0 ], [ 1.0, 10.0 ]]] } # Call method to test geom, props = geo_utils.parse_geo_json(geo_json) center = geo_utils.get_center_point(geom) # Check results self.assertEqual(center.geom_type, 'Point') self.assertEqual(center.coords, (1.5, 15.0))
def update_border(self, name, border, effective=None): """Updates the country border geometry for an existing country, adding a new entry for the new effective date. :param name: The name of an existing country :type name: string :param border: The new border geometry. Either GEOSGeometry or geojson (which will be converted to GEOSGeometry) :type border: GEOSGeometry or dict :param effective: The effective date for the new border. If None, now() will be used :type effective: :class:`datetime.datetime` """ if not isinstance(border, geos.geometry.GEOSGeometry): border, _ = geospatial_utils.parse_geo_json(border) # Acquire model lock cur = self.get(name=name) if cur: new_item = CountryData(name=cur.name, fips=cur.fips, gmi=cur.gmi, iso2=cur.iso2, iso3=cur.iso3, iso_num=cur.iso_num, border=border, effective=effective) new_item.save()
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files to upload :type file_entries: list[:class:`product.types.ProductFileMetadata`] :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`storage.models.ScaleFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Get property names and values as strings properties = job_exe.job.get_job_data().get_all_properties() # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input # properties) input_strings = input_file_uuids input_strings.extend(properties) # Determine if any input files are non-operational products input_products = ScaleFile.objects.filter( id__in=[f['id'] for f in input_files], file_type='PRODUCT') input_products_operational = all( [f.is_operational for f in input_products]) source_started = job_exe.job.source_started source_ended = job_exe.job.source_ended source_sensor_class = job_exe.job.source_sensor_class source_sensor = job_exe.job.source_sensor source_collection = job_exe.job.source_collection source_task = job_exe.job.source_task if not source_started: # Compute the overall start and stop times for all file_entries source_files = FileAncestryLink.objects.get_source_ancestors( [f['id'] for f in input_files]) start_times = [f.data_started for f in source_files] end_times = [f.data_ended for f in source_files] start_times.sort() end_times.sort(reverse=True) if start_times: source_started = start_times[0] if end_times: source_ended = end_times[0] products_to_save = [] for entry in file_entries: product = ProductFile.create() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(entry.local_path) file_size = os.path.getsize(entry.local_path) product.set_basic_fields(file_name, file_size, entry.media_type) product.file_path = entry.remote_path product.job_output = entry.output_name # Add a stable identifier based on the job type, input files, input properties, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings) # Add temporal info to product if available if entry.data_start: product.data_started = parse_datetime(entry.data_start) if entry.data_end: product.data_ended = parse_datetime(entry.data_end) if entry.geojson: geom, props = geo_utils.parse_geo_json(entry.geojson) product.geometry = geom if props: product.meta_data = props product.center_point = geo_utils.get_center_point(geom) # Add recipe info to product if available. job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id) if job_recipe: product.recipe_id = job_recipe.recipe.id product.recipe_type = job_recipe.recipe.recipe_type product.recipe_node = job_recipe.node_name # Add batch info to product if available. try: from batch.models import BatchJob product.batch_id = BatchJob.objects.get( job_id=job_exe.job_id).batch_id except BatchJob.DoesNotExist: product.batch_id = None # Allow override, if set via side-car metadata, otherwise take derived values from above product.source_started = entry.source_started if entry.source_started else source_started product.source_ended = entry.source_ended if entry.source_ended else source_ended # Supplemental source metadata product.source_sensor_class = entry.source_sensor_class if entry.source_sensor_class else source_sensor_class product.source_sensor = entry.source_sensor if entry.source_sensor else source_sensor product.source_collection = entry.source_collection if entry.source_collection else source_collection product.source_task = entry.source_task if entry.source_task else source_task # Update product model with details derived from the job_type product.meta_data['url'] = product.url product.meta_data['job_name'] = job_exe.job_type.name product.meta_data[ 'job_version'] = job_exe.job_type.get_job_version() product.meta_data[ 'package_version'] = job_exe.job_type.get_package_version() products_to_save.append(FileUpload(product, entry.local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace): '''Uploads the given local product files into the workspace. All database changes will be made in an atomic transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same upload and work directories. :param upload_dir: Absolute path to the local directory of the files to upload :type upload_dir: str :param work_dir: Absolute path to a local work directory available to assist in uploading :type work_dir: str :param file_entries: List of files where each file is a tuple of (source path relative to upload directory, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` ''' # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter( file__in=[f['id'] for f in input_files]) input_products_operational = all( [f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational product.media_type = media_type # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs file_name = os.path.basename(local_path) product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime( geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime( geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json( geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append((product, local_path, remote_path)) return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files]) input_products_operational = all([f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path, work_dir): '''Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: list :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str :param work_dir: Absolute path to a local work directory available to assist in moving the source file. Only needed if new_workspace_path is not None. :type work_dir: str ''' geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = SourceFile.objects.select_for_update().get(pk=src_file_id) src_file.is_parsed = True src_file.parsed = now() src_file.data_started = data_started src_file.data_ended = data_ended target_date = src_file.data_started if target_date is None: target_date = src_file.data_ended if target_date is None: target_date = src_file.created for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() # Move the source file if a new workspace path is provided and the workspace allows it if new_workspace_path and src_file.workspace.is_move_enabled: old_workspace_path = src_file.file_path ScaleFile.objects.move_files(work_dir, [(src_file, new_workspace_path)]) try: # Check trigger rules for parsed source files ParseTriggerHandler().process_parsed_source_file(src_file) except Exception: # Move file back if there was an error if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files(work_dir, [(src_file, old_workspace_path)]) raise
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type, output_name) :type file_entries: list of tuple(str, str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`storage.models.ScaleFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Get property names and values as strings properties = job_exe.job.get_job_data().get_all_properties() # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input # properties) input_strings = input_file_uuids input_strings.extend(properties) # Determine if any input files are non-operational products input_products = ScaleFile.objects.filter(id__in=[f['id'] for f in input_files], file_type='PRODUCT') input_products_operational = all([f.is_operational for f in input_products]) # Compute the overall start and stop times for all file_entries source_files = FileAncestryLink.objects.get_source_ancestors([f['id'] for f in input_files]) start_times = [f.data_started for f in source_files] end_times = [f.data_ended for f in source_files] start_times.sort() end_times.sort(reverse=True) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] output_name = entry[3] product = ProductFile.create() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path product.job_output = output_name # Add a stable identifier based on the job type, input files, input properties, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings) # Add geospatial info to product if available if len(entry) > 4: geo_metadata = entry[4] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom if props: product.meta_data = props product.center_point = geo_utils.get_center_point(geom) # Add recipe info to product if available. job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id) if job_recipe: product.recipe_id = job_recipe.recipe.id product.recipe_type = job_recipe.recipe.recipe_type product.recipe_job = job_recipe.job_name # Add batch info to product if available. try: from batch.models import BatchJob product.batch_id = BatchJob.objects.get(job_id=job_exe.job_id).batch_id except BatchJob.DoesNotExist: product.batch_id = None # Add start and stop times if available if start_times: product.source_started = start_times[0] if end_times: product.source_ended = end_times[0] products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)
def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace): '''Uploads the given local product files into the workspace. All database changes will be made in an atomic transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same upload and work directories. :param upload_dir: Absolute path to the local directory of the files to upload :type upload_dir: str :param work_dir: Absolute path to a local work directory available to assist in uploading :type work_dir: str :param file_entries: List of files where each file is a tuple of (source path relative to upload directory, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` ''' # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files]) input_products_operational = all([f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational product.media_type = media_type # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs file_name = os.path.basename(local_path) product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime(geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime(geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append((product, local_path, remote_path)) return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path): """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic transaction. :param src_file_id: The ID of the source file :type src_file_id: int :param geo_json: The associated geojson data, possibly None :type geo_json: dict :param data_started: The start time of the data contained in the source file, possibly None :type data_started: :class:`datetime.datetime` or None :param data_ended: The end time of the data contained in the source file, possibly None :type data_ended: :class:`datetime.datetime` or None :param data_types: List of strings containing the data types tags for this source file. :type data_types: [string] :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If None, the source file should not be moved. :type new_workspace_path: str """ geom = None props = None if geo_json: geom, props = geo_utils.parse_geo_json(geo_json) # Acquire model lock src_file = ScaleFile.objects.select_for_update().get( pk=src_file_id, file_type='SOURCE') src_file.is_parsed = True src_file.parsed = now() if data_started and not data_ended: src_file.data_started = data_started src_file.data_ended = data_started elif not data_started and data_ended: src_file.data_started = data_ended src_file.data_ended = data_ended elif not data_ended and not data_started: src_file.data_started = None src_file.data_ended = None else: src_file.data_started = data_started src_file.data_ended = data_ended src_file.source_started = src_file.data_started src_file.source_ended = src_file.data_ended for tag in data_types: src_file.add_data_type_tag(tag) if geom: src_file.geometry = geom src_file.center_point = geo_utils.get_center_point(geom) if props: src_file.meta_data = props # src_file already exists so we don't need to save/set_countries/save, just a single save is fine src_file.set_countries() src_file.save() try: # Try to update corresponding ingest models with this file's data time from ingest.models import Ingest Ingest.objects.filter(source_file_id=src_file_id).update( data_started=data_started, data_ended=data_ended) except ImportError: pass # Move the source file if a new workspace path is provided and the workspace allows it old_workspace_path = src_file.file_path if new_workspace_path and src_file.workspace.is_move_enabled: ScaleFile.objects.move_files( [FileMove(src_file, new_workspace_path)])
def upload_files(self, file_entries, input_file_ids, job_exe, workspace): """Uploads the given local product files into the workspace. :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for storing the file, media_type) :type file_entries: list of tuple(str, str, str) :param input_file_ids: List of identifiers for files used to produce the given file entries :type input_file_ids: list of int :param job_exe: The job_exe model with the related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param workspace: The workspace to use for storing the product files :type workspace: :class:`storage.models.Workspace` :returns: The list of the saved product models :rtype: list of :class:`product.models.ProductFile` """ # Build a list of UUIDs for the input files input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values( 'uuid', 'id').order_by('uuid') input_file_uuids = [f['uuid'] for f in input_files] # Determine if any input files are non-operational products input_products = ProductFile.objects.filter( file__in=[f['id'] for f in input_files]) input_products_operational = all( [f.is_operational for f in input_products]) products_to_save = [] for entry in file_entries: local_path = entry[0] remote_path = entry[1] media_type = entry[2] product = ProductFile() product.job_exe = job_exe product.job = job_exe.job product.job_type = job_exe.job.job_type product.is_operational = input_products_operational and job_exe.job.job_type.is_operational file_name = os.path.basename(local_path) file_size = os.path.getsize(local_path) product.set_basic_fields(file_name, file_size, media_type) product.file_path = remote_path # Add a stable identifier based on the job type, input files, and file name # This is designed to remain stable across re-processing the same type of job on the same inputs product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids) # Add geospatial info to product if available if len(entry) > 3: geo_metadata = entry[3] target_date = None if 'data_started' in geo_metadata: product.data_started = parse_datetime( geo_metadata['data_started']) target_date = product.data_started if 'data_ended' in geo_metadata: product.data_ended = parse_datetime( geo_metadata['data_ended']) if target_date is None: target_date = product.data_ended if target_date is None: target_date = product.created if 'geo_json' in geo_metadata: geom, props = geo_utils.parse_geo_json( geo_metadata['geo_json']) product.geometry = geom product.meta_data = props product.center_point = geo_utils.get_center_point(geom) products_to_save.append(FileUpload(product, local_path)) return ScaleFile.objects.upload_files(workspace, products_to_save)