Exemple #1
0
    def update_border(self, name, border, effective=None):
        """Updates the country border geometry for an existing country, adding a new entry for the new effective date.

        :param name: The name of an existing country
        :type name: str
        :param border: The new border geometry. Either GEOSGeometry or geojson (which will be converted to GEOSGeometry)
        :type border: GEOSGeometry or str
        :param effective: The effective date for the new border. If None, now() will be used
        :type data_started: :class:`datetime.datetime`
        """

        if not isinstance(border, geos.geometry.GEOSGeometry):
            border, _ = geospatial_utils.parse_geo_json(border)

        # Acquire model lock
        cur = self.get(name=name)
        if cur:
            new_item = CountryData(name=cur.name,
                                   fips=cur.fips,
                                   gmi=cur.gmi,
                                   iso2=cur.iso2,
                                   iso3=cur.iso3,
                                   iso_num=cur.iso_num,
                                   border=border,
                                   effective=effective)
            new_item.save()
Exemple #2
0
    def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path):
        """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: [string]
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        """

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = SourceFile.objects.select_for_update().get(pk=src_file_id)
        src_file.is_parsed = True
        src_file.parsed = now()
        src_file.data_started = data_started
        src_file.data_ended = data_ended
        target_date = src_file.data_started
        if target_date is None:
            target_date = src_file.data_ended
        if target_date is None:
            target_date = src_file.created
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        # Move the source file if a new workspace path is provided and the workspace allows it
        old_workspace_path = src_file.file_path
        if new_workspace_path and src_file.workspace.is_move_enabled:
            ScaleFile.objects.move_files([FileMove(src_file, new_workspace_path)])

        try:
            # Check trigger rules for parsed source files
            ParseTriggerHandler().process_parsed_source_file(src_file)
        except Exception:
            # Move file back if there was an error
            if new_workspace_path and src_file.workspace.is_move_enabled:
                ScaleFile.objects.move_files([FileMove(src_file, old_workspace_path)])
            raise
Exemple #3
0
    def save_parse_results(self, src_file_id, geo_json, data_started, data_ended, data_types, new_workspace_path,
                           work_dir):
        '''Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: list
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        :param work_dir: Absolute path to a local work directory available to assist in moving the source file. Only
            needed if new_workspace_path is not None.
        :type work_dir: str
        '''

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = SourceFile.objects.select_for_update().get(pk=src_file_id)
        src_file.is_parsed = True
        src_file.parsed = now()
        src_file.data_started = data_started
        src_file.data_ended = data_ended
        target_date = src_file.data_started
        if target_date is None:
            target_date = src_file.data_ended
        if target_date is None:
            target_date = src_file.created
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        # Move the source file if a new workspace path is provided
        if new_workspace_path:
            ScaleFile.objects.move_files(work_dir, [(src_file, new_workspace_path)])

        for parse_rule in get_parse_rules():
            parse_rule.process_parse(src_file)
    def test_parse_geo_json(self):
        '''Tests parsing geojson'''

        geo_json = {u'geometry': {u'type': u'POLYGON', u'coordinates': [[[40, 26], [50, 27], [60, 26], [50, 25], [40, 26]]]}, u'type': u'Feature'}

        # Call method to test
        geom, props = geo_utils.parse_geo_json(geo_json)

        # Check results
        self.assertEqual(geom.geom_type, u'Polygon')
        self.assertIsNone(props)
Exemple #5
0
    def test_parse_geo_json(self):
        """Tests parsing geojson"""

        geo_json = {'geometry': {'type': 'POLYGON', 'coordinates': [[[40, 26], [50, 27], [60, 26], [50, 25], [40, 26]]]}, 'type': 'Feature'}

        # Call method to test
        geom, props = geo_utils.parse_geo_json(geo_json)

        # Check results
        self.assertEqual(geom.geom_type, 'Polygon')
        self.assertIsNone(props)
    def test_get_center_point(self):
        '''Tests calculating center point'''
        geo_json = {
            "type": "Polygon",
            "coordinates": [[[ 1.0, 10.0 ], [ 2.0, 10.0 ], [ 2.0, 20.0 ],[ 1.0, 20.0 ], [ 1.0, 10.0 ]]]
        }

        # Call method to test
        geom, props = geo_utils.parse_geo_json(geo_json)
        center = geo_utils.get_center_point(geom)

        # Check results
        self.assertEqual(center.geom_type, u'Point')
        self.assertEqual(center.coords, (1.5, 15.0))
Exemple #7
0
    def test_get_center_point(self):
        """Tests calculating center point"""
        geo_json = {
            "type": "Polygon",
            "coordinates": [[[ 1.0, 10.0 ], [ 2.0, 10.0 ], [ 2.0, 20.0 ],[ 1.0, 20.0 ], [ 1.0, 10.0 ]]]
        }

        # Call method to test
        geom, props = geo_utils.parse_geo_json(geo_json)
        center = geo_utils.get_center_point(geom)

        # Check results
        self.assertEqual(center.geom_type, 'Point')
        self.assertEqual(center.coords, (1.5, 15.0))
Exemple #8
0
    def update_border(self, name, border, effective=None):
        """Updates the country border geometry for an existing country, adding a new entry for the new effective date.

        :param name: The name of an existing country
        :type name: string
        :param border: The new border geometry. Either GEOSGeometry or geojson (which will be converted to GEOSGeometry)
        :type border: GEOSGeometry or dict
        :param effective: The effective date for the new border. If None, now() will be used
        :type effective: :class:`datetime.datetime`
        """

        if not isinstance(border, geos.geometry.GEOSGeometry):
            border, _ = geospatial_utils.parse_geo_json(border)

        # Acquire model lock
        cur = self.get(name=name)
        if cur:
            new_item = CountryData(name=cur.name, fips=cur.fips, gmi=cur.gmi,
                                   iso2=cur.iso2, iso3=cur.iso3,
                                   iso_num=cur.iso_num, border=border, effective=effective)
            new_item.save()
Exemple #9
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files to upload
        :type file_entries: list[:class:`product.types.ProductFileMetadata`]
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`storage.models.ScaleFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Get property names and values as strings
        properties = job_exe.job.get_job_data().get_all_properties()

        # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input
        # properties)
        input_strings = input_file_uuids
        input_strings.extend(properties)

        # Determine if any input files are non-operational products
        input_products = ScaleFile.objects.filter(
            id__in=[f['id'] for f in input_files], file_type='PRODUCT')
        input_products_operational = all(
            [f.is_operational for f in input_products])

        source_started = job_exe.job.source_started
        source_ended = job_exe.job.source_ended
        source_sensor_class = job_exe.job.source_sensor_class
        source_sensor = job_exe.job.source_sensor
        source_collection = job_exe.job.source_collection
        source_task = job_exe.job.source_task
        if not source_started:
            # Compute the overall start and stop times for all file_entries
            source_files = FileAncestryLink.objects.get_source_ancestors(
                [f['id'] for f in input_files])
            start_times = [f.data_started for f in source_files]
            end_times = [f.data_ended for f in source_files]
            start_times.sort()
            end_times.sort(reverse=True)
            if start_times:
                source_started = start_times[0]
            if end_times:
                source_ended = end_times[0]

        products_to_save = []
        for entry in file_entries:
            product = ProductFile.create()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(entry.local_path)
            file_size = os.path.getsize(entry.local_path)
            product.set_basic_fields(file_name, file_size, entry.media_type)
            product.file_path = entry.remote_path
            product.job_output = entry.output_name

            # Add a stable identifier based on the job type, input files, input properties, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_strings)

            # Add temporal info to product if available
            if entry.data_start:
                product.data_started = parse_datetime(entry.data_start)
            if entry.data_end:
                product.data_ended = parse_datetime(entry.data_end)

            if entry.geojson:
                geom, props = geo_utils.parse_geo_json(entry.geojson)
                product.geometry = geom
                if props:
                    product.meta_data = props
                product.center_point = geo_utils.get_center_point(geom)

            # Add recipe info to product if available.
            job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id)
            if job_recipe:
                product.recipe_id = job_recipe.recipe.id
                product.recipe_type = job_recipe.recipe.recipe_type
                product.recipe_node = job_recipe.node_name

                # Add batch info to product if available.
                try:
                    from batch.models import BatchJob
                    product.batch_id = BatchJob.objects.get(
                        job_id=job_exe.job_id).batch_id
                except BatchJob.DoesNotExist:
                    product.batch_id = None

            # Allow override, if set via side-car metadata, otherwise take derived values from above
            product.source_started = entry.source_started if entry.source_started else source_started
            product.source_ended = entry.source_ended if entry.source_ended else source_ended

            # Supplemental source metadata
            product.source_sensor_class = entry.source_sensor_class if entry.source_sensor_class else source_sensor_class
            product.source_sensor = entry.source_sensor if entry.source_sensor else source_sensor
            product.source_collection = entry.source_collection if entry.source_collection else source_collection
            product.source_task = entry.source_task if entry.source_task else source_task

            # Update product model with details derived from the job_type
            product.meta_data['url'] = product.url
            product.meta_data['job_name'] = job_exe.job_type.name
            product.meta_data[
                'job_version'] = job_exe.job_type.get_job_version()
            product.meta_data[
                'package_version'] = job_exe.job_type.get_package_version()

            products_to_save.append(FileUpload(product, entry.local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #10
0
    def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids,
                     job_exe, workspace):
        '''Uploads the given local product files into the workspace. All database changes will be made in an atomic
        transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same
        upload and work directories.

        :param upload_dir: Absolute path to the local directory of the files to upload
        :type upload_dir: str
        :param work_dir: Absolute path to a local work directory available to assist in uploading
        :type work_dir: str
        :param file_entries: List of files where each file is a tuple of (source path relative to upload directory,
            workspace path for storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        '''

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(
            file__in=[f['id'] for f in input_files])
        input_products_operational = all(
            [f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            product.media_type = media_type

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            file_name = os.path.basename(local_path)
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(
                        geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(
                        geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(
                        geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append((product, local_path, remote_path))

        return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace,
                                              products_to_save)
Exemple #11
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files])
        input_products_operational = all([f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #12
0
    def save_parse_results(self, src_file_id, geo_json, data_started,
                           data_ended, data_types, new_workspace_path,
                           work_dir):
        '''Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: list
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        :param work_dir: Absolute path to a local work directory available to assist in moving the source file. Only
            needed if new_workspace_path is not None.
        :type work_dir: str
        '''

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = SourceFile.objects.select_for_update().get(pk=src_file_id)
        src_file.is_parsed = True
        src_file.parsed = now()
        src_file.data_started = data_started
        src_file.data_ended = data_ended
        target_date = src_file.data_started
        if target_date is None:
            target_date = src_file.data_ended
        if target_date is None:
            target_date = src_file.created
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        # Move the source file if a new workspace path is provided and the workspace allows it
        if new_workspace_path and src_file.workspace.is_move_enabled:
            old_workspace_path = src_file.file_path
            ScaleFile.objects.move_files(work_dir,
                                         [(src_file, new_workspace_path)])

        try:
            # Check trigger rules for parsed source files
            ParseTriggerHandler().process_parsed_source_file(src_file)
        except Exception:
            # Move file back if there was an error
            if new_workspace_path and src_file.workspace.is_move_enabled:
                ScaleFile.objects.move_files(work_dir,
                                             [(src_file, old_workspace_path)])

            raise
Exemple #13
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type, output_name)
        :type file_entries: list of tuple(str, str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`storage.models.ScaleFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Get property names and values as strings
        properties = job_exe.job.get_job_data().get_all_properties()

        # Product UUID will be based in part on input data (UUIDs of input files and name/value pairs of input
        # properties)
        input_strings = input_file_uuids
        input_strings.extend(properties)

        # Determine if any input files are non-operational products
        input_products = ScaleFile.objects.filter(id__in=[f['id'] for f in input_files], file_type='PRODUCT')
        input_products_operational = all([f.is_operational for f in input_products])

        # Compute the overall start and stop times for all file_entries
        source_files = FileAncestryLink.objects.get_source_ancestors([f['id'] for f in input_files])
        start_times = [f.data_started for f in source_files]
        end_times = [f.data_ended for f in source_files]
        start_times.sort()
        end_times.sort(reverse=True)

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]
            output_name = entry[3]

            product = ProductFile.create()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path
            product.job_output = output_name

            # Add a stable identifier based on the job type, input files, input properties, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_strings)

            # Add geospatial info to product if available
            if len(entry) > 4:
                geo_metadata = entry[4]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    if props:
                        product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            # Add recipe info to product if available.
            job_recipe = Recipe.objects.get_recipe_for_job(job_exe.job_id)
            if job_recipe:
                product.recipe_id = job_recipe.recipe.id
                product.recipe_type = job_recipe.recipe.recipe_type
                product.recipe_job = job_recipe.job_name

                # Add batch info to product if available.
                try:
                    from batch.models import BatchJob
                    product.batch_id = BatchJob.objects.get(job_id=job_exe.job_id).batch_id
                except BatchJob.DoesNotExist:
                    product.batch_id = None

            # Add start and stop times if available
            if start_times:
                product.source_started = start_times[0]

            if end_times:
                product.source_ended = end_times[0]

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)
Exemple #14
0
    def upload_files(self, upload_dir, work_dir, file_entries, input_file_ids, job_exe, workspace):
        '''Uploads the given local product files into the workspace. All database changes will be made in an atomic
        transaction. This method assumes that ScaleFileManager.setup_upload_dir() has already been called with the same
        upload and work directories.

        :param upload_dir: Absolute path to the local directory of the files to upload
        :type upload_dir: str
        :param work_dir: Absolute path to a local work directory available to assist in uploading
        :type work_dir: str
        :param file_entries: List of files where each file is a tuple of (source path relative to upload directory,
            workspace path for storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        '''

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values('uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(file__in=[f['id'] for f in input_files])
        input_products_operational = all([f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            product.media_type = media_type

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            file_name = os.path.basename(local_path)
            product.update_uuid(job_exe.job.job_type.id, file_name, *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append((product, local_path, remote_path))

        return ScaleFile.objects.upload_files(upload_dir, work_dir, workspace, products_to_save)
Exemple #15
0
    def save_parse_results(self, src_file_id, geo_json, data_started,
                           data_ended, data_types, new_workspace_path):
        """Saves the given parse results to the source file for the given ID. All database changes occur in an atomic
        transaction.

        :param src_file_id: The ID of the source file
        :type src_file_id: int
        :param geo_json: The associated geojson data, possibly None
        :type geo_json: dict
        :param data_started: The start time of the data contained in the source file, possibly None
        :type data_started: :class:`datetime.datetime` or None
        :param data_ended: The end time of the data contained in the source file, possibly None
        :type data_ended: :class:`datetime.datetime` or None
        :param data_types: List of strings containing the data types tags for this source file.
        :type data_types: [string]
        :param new_workspace_path: New workspace path to move the source file to now that parse data is available. If
            None, the source file should not be moved.
        :type new_workspace_path: str
        """

        geom = None
        props = None
        if geo_json:
            geom, props = geo_utils.parse_geo_json(geo_json)

        # Acquire model lock
        src_file = ScaleFile.objects.select_for_update().get(
            pk=src_file_id, file_type='SOURCE')
        src_file.is_parsed = True
        src_file.parsed = now()
        if data_started and not data_ended:
            src_file.data_started = data_started
            src_file.data_ended = data_started
        elif not data_started and data_ended:
            src_file.data_started = data_ended
            src_file.data_ended = data_ended
        elif not data_ended and not data_started:
            src_file.data_started = None
            src_file.data_ended = None
        else:
            src_file.data_started = data_started
            src_file.data_ended = data_ended
        src_file.source_started = src_file.data_started
        src_file.source_ended = src_file.data_ended
        for tag in data_types:
            src_file.add_data_type_tag(tag)
        if geom:
            src_file.geometry = geom
            src_file.center_point = geo_utils.get_center_point(geom)
        if props:
            src_file.meta_data = props
        # src_file already exists so we don't need to save/set_countries/save, just a single save is fine
        src_file.set_countries()
        src_file.save()

        try:
            # Try to update corresponding ingest models with this file's data time
            from ingest.models import Ingest
            Ingest.objects.filter(source_file_id=src_file_id).update(
                data_started=data_started, data_ended=data_ended)
        except ImportError:
            pass

        # Move the source file if a new workspace path is provided and the workspace allows it
        old_workspace_path = src_file.file_path
        if new_workspace_path and src_file.workspace.is_move_enabled:
            ScaleFile.objects.move_files(
                [FileMove(src_file, new_workspace_path)])
Exemple #16
0
    def upload_files(self, file_entries, input_file_ids, job_exe, workspace):
        """Uploads the given local product files into the workspace.

        :param file_entries: List of files where each file is a tuple of (absolute local path, workspace path for
            storing the file, media_type)
        :type file_entries: list of tuple(str, str, str)
        :param input_file_ids: List of identifiers for files used to produce the given file entries
        :type input_file_ids: list of int
        :param job_exe: The job_exe model with the related job and job_type fields
        :type job_exe: :class:`job.models.JobExecution`
        :param workspace: The workspace to use for storing the product files
        :type workspace: :class:`storage.models.Workspace`
        :returns: The list of the saved product models
        :rtype: list of :class:`product.models.ProductFile`
        """

        # Build a list of UUIDs for the input files
        input_files = ScaleFile.objects.filter(pk__in=input_file_ids).values(
            'uuid', 'id').order_by('uuid')
        input_file_uuids = [f['uuid'] for f in input_files]

        # Determine if any input files are non-operational products
        input_products = ProductFile.objects.filter(
            file__in=[f['id'] for f in input_files])
        input_products_operational = all(
            [f.is_operational for f in input_products])

        products_to_save = []
        for entry in file_entries:
            local_path = entry[0]
            remote_path = entry[1]
            media_type = entry[2]

            product = ProductFile()
            product.job_exe = job_exe
            product.job = job_exe.job
            product.job_type = job_exe.job.job_type
            product.is_operational = input_products_operational and job_exe.job.job_type.is_operational
            file_name = os.path.basename(local_path)
            file_size = os.path.getsize(local_path)
            product.set_basic_fields(file_name, file_size, media_type)
            product.file_path = remote_path

            # Add a stable identifier based on the job type, input files, and file name
            # This is designed to remain stable across re-processing the same type of job on the same inputs
            product.update_uuid(job_exe.job.job_type.id, file_name,
                                *input_file_uuids)

            # Add geospatial info to product if available
            if len(entry) > 3:
                geo_metadata = entry[3]
                target_date = None
                if 'data_started' in geo_metadata:
                    product.data_started = parse_datetime(
                        geo_metadata['data_started'])
                    target_date = product.data_started
                if 'data_ended' in geo_metadata:
                    product.data_ended = parse_datetime(
                        geo_metadata['data_ended'])
                    if target_date is None:
                        target_date = product.data_ended
                if target_date is None:
                    target_date = product.created
                if 'geo_json' in geo_metadata:
                    geom, props = geo_utils.parse_geo_json(
                        geo_metadata['geo_json'])
                    product.geometry = geom
                    product.meta_data = props
                    product.center_point = geo_utils.get_center_point(geom)

            products_to_save.append(FileUpload(product, local_path))

        return ScaleFile.objects.upload_files(workspace, products_to_save)