Esempio n. 1
0
def get_query_cache_key(*args):
    from eventkit_cloud.tasks.helpers import normalize_name

    cleaned_args = []
    for arg in args:
        if hasattr(arg, "__name__"):
            cleaned_args += [normalize_name(str(arg.__name__))]
        else:
            cleaned_args += [normalize_name(str(arg))]
    return "-".join(cleaned_args)
Esempio n. 2
0
def generate_qgs_style(metadata):
    """
    Task to create QGIS project file with styles for osm.

    If a data_provider_task_record is provided a style file will be generated only for that, otherwise all of the
    data providers in the run will be added to the style file.
    :param metadata: A dict of metadata provided by get_metadata.
    :return: The path to the generated qgs file.
    """

    from eventkit_cloud.tasks.helpers import normalize_name
    stage_dir = os.path.join(settings.EXPORT_STAGING_ROOT,
                             str(metadata['run_uid']))

    job_name = normalize_name(metadata['name'].lower())

    provider_details = [
        provider_detail
        for provider_slug, provider_detail in metadata['data_sources'].items()
    ]

    if len(provider_details) == 1:
        style_file_name = '{0}-{1}-{2}.qgs'.format(
            job_name, normalize_name(provider_details[0]['slug']),
            default_format_time(timezone.now()))
    else:
        style_file_name = '{0}-{1}.qgs'.format(
            job_name, default_format_time(timezone.now()))

    style_file = os.path.join(stage_dir, style_file_name)

    with open(style_file, 'wb') as open_file:
        open_file.write(
            render_to_string(
                'styles/Style.qgs',
                context={
                    'job_name':
                    job_name,
                    'job_date_time':
                    '{0}'.format(
                        timezone.now().strftime("%Y%m%d%H%M%S%f")[:-3]),
                    'provider_details':
                    provider_details,
                    'bbox':
                    metadata['bbox'],
                    'has_raster':
                    metadata['has_raster'],
                    'has_elevation':
                    metadata['has_elevation']
                }).encode())
    return style_file
Esempio n. 3
0
def generate_qgs_style(metadata):
    """
    Task to create QGIS project file with styles for osm.

    If a data_provider_task_record is provided a style file will be generated only for that, otherwise all of the
    data providers in the run will be added to the style file.
    :param metadata: A dict of metadata provided by get_metadata.
    :return: The path to the generated qgs file.
    """

    from eventkit_cloud.tasks.helpers import normalize_name
    stage_dir = os.path.join(settings.EXPORT_STAGING_ROOT, str(metadata['run_uid']))

    job_name = normalize_name(metadata['name'].lower())

    provider_details = [provider_detail for provider_slug, provider_detail in metadata['data_sources'].items()]

    if len(provider_details) == 1:
        style_file_name = '{0}-{1}-{2}.qgs'.format(job_name,
                                                   normalize_name(provider_details[0]['slug']),
                                                   default_format_time(timezone.now()))
    else:
        style_file_name = '{0}-{1}.qgs'.format(job_name, default_format_time(timezone.now()))

    style_file = os.path.join(stage_dir, style_file_name)

    with open(style_file, 'wb') as open_file:
        open_file.write(render_to_string('styles/Style.qgs',
                                         context={'job_name': job_name,
                                                  'job_date_time': '{0}'.format(
                                                      timezone.now().strftime("%Y%m%d%H%M%S%f")[:-3]),
                                                  'provider_details': provider_details,
                                                  'bbox': metadata['bbox'],
                                                  'has_raster': metadata['has_raster'],
                                                  'has_elevation': metadata['has_elevation']}).encode())
    return style_file
Esempio n. 4
0
def get_human_readable_metadata_document(metadata):
    """

    :param metadata: A dictionary returned by get_metadata.
    :return: A filepath to a txt document.
    """
    from eventkit_cloud.tasks.helpers import normalize_name
    stage_dir = os.path.join(settings.EXPORT_STAGING_ROOT, str(metadata['run_uid']))

    metadata_file = os.path.join(stage_dir, '{0}_ReadMe.txt'.format(normalize_name(metadata['name'])))

    with open(metadata_file, 'wb') as open_file:
        open_file.write(
            render_to_string('styles/metadata.txt',
                             context={'metadata': metadata}).replace('\r\n', '\n').replace('\n',
                                                                                           '\r\n').encode())
    return metadata_file
Esempio n. 5
0
def create_license_file(provider_task):
    # checks a DataProviderTaskRecord's license file and adds it to the file list if it exists
    from eventkit_cloud.jobs.models import DataProvider
    from eventkit_cloud.tasks.helpers import normalize_name
    data_provider_license = DataProvider.objects.get(slug=provider_task.slug).license

    # DataProviders are not required to have a license
    if data_provider_license is None:
        return

    license_file_path = os.path.join(get_provider_staging_dir(provider_task.run.uid, provider_task.slug),
                                     '{0}.txt'.format(normalize_name(data_provider_license.name)))

    with open(license_file_path, 'wb') as license_file:
        license_file.write(data_provider_license.text.encode())

    return license_file_path
Esempio n. 6
0
def create_license_file(provider_task):
    # checks a DataProviderTaskRecord's license file and adds it to the file list if it exists
    from eventkit_cloud.jobs.models import DataProvider
    from eventkit_cloud.tasks.helpers import normalize_name
    data_provider_license = DataProvider.objects.get(
        slug=provider_task.slug).license

    # DataProviders are not required to have a license
    if data_provider_license is None:
        return

    license_file_path = os.path.join(
        get_provider_staging_dir(provider_task.run.uid, provider_task.slug),
        '{0}.txt'.format(normalize_name(data_provider_license.name)))

    with open(license_file_path, 'wb') as license_file:
        license_file.write(data_provider_license.text.encode())

    return license_file_path
Esempio n. 7
0
def get_human_readable_metadata_document(metadata):
    """

    :param metadata: A dictionary returned by get_metadata.
    :return: A filepath to a txt document.
    """
    from eventkit_cloud.tasks.helpers import normalize_name
    stage_dir = os.path.join(settings.EXPORT_STAGING_ROOT,
                             str(metadata['run_uid']))

    metadata_file = os.path.join(
        stage_dir, '{0}_ReadMe.txt'.format(normalize_name(metadata['name'])))

    with open(metadata_file, 'wb') as open_file:
        open_file.write(
            render_to_string('styles/metadata.txt',
                             context={
                                 'metadata': metadata
                             }).replace('\r\n', '\n').replace('\n',
                                                              '\r\n').encode())
    return metadata_file
Esempio n. 8
0
def get_metadata(data_provider_task_uid):
    """
    A object to hold metadata about the run for the sake of being passed to various scripts for the creation of
    style files or metadata documents for within the datapack.

    This also creates a license file which is considered a part of the metadata and adds it to the "include_files"
    :param data_provider_task_uid: A Provider task uid string for either the run task which will add all of the provider
    tasks for the run or for a single data provider task.
    :return: A dict containing the run metadata.

    Example:
    {
    "aoi": "GEOJSON representing the selected AOI."
    "bbox": [
        w, s, e, n
    ],
    "data_sources": {
        "osm": {
            "copyright": None,
            "description": "OpenStreetMap vector data provided in a custom thematic schema. \r\n\t\r\n\tData is grouped into separate tables (e.g. water, roads...).",
            "file_path": "data/osm/test-osm-20181101.gpkg",
            "file_type": ".gpkg",
            "full_file_path": "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/test.gpkg",
            "last_update": "2018-10-29T04:35:02Z\n",
            "metadata": "https://overpass-server.com/overpass/interpreter",
            "name": "OpenStreetMap Data (Themes)",
            "slug": "osm",
            "type": "osm",
            "uid": "0d08ddf6-35c1-464f-b271-75f6911c3f78"
        }
    },
    "date": "20181101",
    "description": "Test",
    "has_elevation": False,
    "has_raster": True,
    "include_files": [
        "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/test.gpkg",
        "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/osm_selection.geojson"
    ],
    "name": "test",
    "project": "Test",
    "run_uid": "7fadf34e-58f9-4bb8-ab57-adc1015c4269",
    "url": "http://cloud.eventkit.test/status/2010025c-6d61-4a0b-8d5d-ff9c657259eb"
    }
    """

    from eventkit_cloud.jobs.models import DataProvider
    from eventkit_cloud.tasks.models import DataProviderTaskRecord
    from eventkit_cloud.tasks.export_tasks import TaskStates, create_zip_task

    data_provider_task = DataProviderTaskRecord.objects.get(
        uid=data_provider_task_uid)

    run = data_provider_task.run

    if data_provider_task.name == 'run':
        provider_tasks = run.provider_tasks.filter(~Q(name='run'))
        data_provider_task = None
    else:
        provider_tasks = [data_provider_task]

    # To prepare for the zipfile task, the files need to be checked to ensure they weren't
    # deleted during cancellation.
    include_files = list([])

    # A dict is used here to ensure that just one file per provider is added,
    # this should be updated when multiple formats are supported.
    metadata = {
        'name':
        normalize_name(run.job.name),
        'url':
        "{0}/status/{1}".format(getattr(settings, "SITE_URL"),
                                str(run.job.uid)),
        'description':
        run.job.description,
        'project':
        run.job.event,
        'date':
        timezone.now().strftime("%Y%m%d"),
        'run_uid':
        str(run.uid),
        'data_sources': {},
        "bbox":
        run.job.extents,
        'aoi':
        run.job.bounds_geojson,
        'has_raster':
        False,
        'has_elevation':
        False
    }

    for provider_task in provider_tasks:
        if TaskStates[
                provider_task.status] in TaskStates.get_incomplete_states():
            continue
        data_provider = DataProvider.objects.get(slug=provider_task.slug)
        provider_type = data_provider.export_provider_type.type_name
        metadata['data_sources'][provider_task.slug] = {
            "name": provider_task.name
        }
        if TaskStates[provider_task.
                      status] not in TaskStates.get_incomplete_states():
            provider_staging_dir = get_provider_staging_dir(
                run.uid, provider_task.slug)
            for export_task in provider_task.tasks.all():
                try:
                    filename = export_task.result.filename
                except Exception:
                    continue
                full_file_path = os.path.join(provider_staging_dir, filename)
                file_ext = os.path.splitext(filename)[1]
                # Exclude zip files created by prepare_file_for zip and the selection geojson
                # also within the QGIS style sheet it is currently assumed that GPKG files are Imagery and
                # GeoTIFF are elevation.  This will need to be updated in the future.
                if file_ext in ['.gpkg', '.tif']:
                    download_filename = get_download_filename(
                        os.path.splitext(os.path.basename(filename))[0],
                        timezone.now(),
                        file_ext,
                        additional_descriptors=provider_task.slug)
                    filepath = get_archive_data_path(provider_task.slug,
                                                     download_filename)
                    metadata['data_sources'][provider_task.slug] = {
                        'uid':
                        str(provider_task.uid),
                        'slug':
                        provider_task.slug,
                        'name':
                        provider_task.name,
                        'file_path':
                        filepath,
                        'full_file_path':
                        full_file_path,
                        'file_type':
                        file_ext,
                        'type':
                        get_data_type_from_provider(provider_task.slug),
                        'description':
                        str(data_provider.service_description).replace(
                            '\r\n', '\n').replace('\n', '\r\n\t'),
                        # 'description': data_provider.service_description,
                        'last_update':
                        get_last_update(data_provider.url,
                                        provider_type,
                                        slug=data_provider.slug),
                        'metadata':
                        get_metadata_url(data_provider.url, provider_type),
                        'copyright':
                        data_provider.service_copyright
                    }
                    if provider_task.slug not in ['osm', 'nome']:
                        if file_ext == '.gpkg':
                            metadata['has_raster'] = True
                        if file_ext == '.tif':
                            metadata['has_elevation'] = True
                    if os.path.splitext(full_file_path)[1] == '.tif':
                        # Get statistics to update ranges in template.
                        band_stats = get_band_statistics(full_file_path)
                        logger.info("Band Stats {0}: {1}".format(
                            full_file_path, band_stats))
                        metadata['data_sources'][
                            provider_task.slug]["band_stats"] = band_stats
                        # Calculate the value for each elevation step (of 16)
                        try:
                            steps = linspace(band_stats[0],
                                             band_stats[1],
                                             num=16)
                            metadata['data_sources'][provider_task.slug][
                                "ramp_shader_steps"] = list(map(int, steps))
                        except TypeError:
                            metadata['data_sources'][
                                provider_task.slug]["ramp_shader_steps"] = None

                if not os.path.isfile(full_file_path):
                    logger.error(
                        "Could not find file {0} for export {1}.".format(
                            full_file_path, export_task.name))
                    continue
                # Exclude zip files created by zip_export_provider
                if not (full_file_path.endswith(".zip")
                        and export_task.name == create_zip_task.name):
                    include_files += [full_file_path]

        # add the license for this provider if there are other files already
        license_file = create_license_file(provider_task)
        if license_file:
            include_files += [license_file]
        metadata['include_files'] = include_files

    return metadata
Esempio n. 9
0
    def build_tasks(
        self,
        primary_export_task,
        provider_task_uid=None,
        user=None,
        run=None,
        stage_dir=None,
        worker=None,
        service_type=None,
        session_token=None,
        *args,
        **kwargs,
    ):
        """
        Run OSM export tasks. Specifically create a task chain to be picked up by a celery worker later.

        :param primary_export_task: The task which converts the source data to the interchange format (i.e. OSM-> gpkg)
        :param provider_task_uid: A reference uid for the DataProviderTask model.
        :param user: The user executing the task.
        :param run: The ExportRun which this task will belong to.
        :param stage_dir: The directory where to store the files while they are being created.
        :param worker: The celery worker assigned this task.
        :param osm_gpkg: A OSM geopackage with the planet osm schema.
        :return: An DataProviderTaskRecord uid and the Celery Task Chain or None, False.
        """

        logger.debug("Running Job with id: {0}".format(provider_task_uid))
        # pull the provider_task from the database
        data_provider_task = (
            DataProviderTask.objects.select_related("provider")
            .prefetch_related("formats__supported_projections")
            .get(uid=provider_task_uid)
        )
        data_provider: DataProvider = data_provider_task.provider
        job = run.job

        # This is just to make it easier to trace when user_details haven't been sent
        user_details = kwargs.get("user_details")
        if user_details is None:
            from audit_logging.utils import get_user_details

            user_details = get_user_details(user)

        job_name = normalize_name(job.name)
        # get the formats to export
        formats: List[ExportFormat] = list(data_provider_task.formats.all())

        data_provider_task_record: DataProviderTaskRecord
        created: bool
        data_provider_task_record, created = DataProviderTaskRecord.objects.get_or_create(
            run=run,
            name=data_provider.name,
            provider=data_provider,
            status=TaskState.PENDING.value,
            display=True,
        )

        projections = [projection.srid for projection in run.job.projections.all()]

        """
        Create a celery chain which gets the data & runs export formats
        """

        queue_group = get_celery_queue_group(run_uid=run.uid, worker=worker)

        # Record estimates for size and time
        get_estimates_task.apply_async(
            queue=queue_group,
            routing_key=queue_group,
            kwargs={
                "run_uid": run.uid,
                "data_provider_task_uid": data_provider_task.uid,
                "data_provider_task_record_uid": data_provider_task_record.uid,
            },
        )

        skip_primary_export_task = False
        if set(item.name.lower() for item in formats).issubset(
            set(item.name.lower() for item in get_proxy_formats(data_provider))
        ):
            skip_primary_export_task = True

        export_tasks = {}  # {export_format : (etr_uid, export_task)}
        for export_format in formats:
            logger.info(f"Setting up task for format: {export_format.name} with {export_format.options}")
            if is_supported_proxy_format(export_format, data_provider):
                export_task = create_format_task("ogcapi-process")
            else:
                export_task = create_format_task(export_format.slug)

            default_projection = get_default_projection(export_format.get_supported_projection_list(), projections)
            task_name = export_format.name
            if default_projection:
                task_name = f"{task_name} - EPSG:{default_projection}"
            export_task_record = create_export_task_record(
                task_name=task_name,
                export_provider_task=data_provider_task_record,
                worker=worker,
                display=getattr(export_task, "display", False),
            )
            export_tasks[export_format] = (export_task_record, export_task)

        bbox = run.job.extents

        """
        Create a celery chain which gets the data & runs export formats
        """
        if export_tasks:
            subtasks = list()
            if data_provider.preview_url:
                subtasks.append(
                    create_datapack_preview.s(
                        run_uid=run.uid,
                        stage_dir=stage_dir,
                        task_uid=data_provider_task_record.uid,
                        user_details=user_details,
                    ).set(queue=queue_group, routing_key=queue_group)
                )

            for current_format, (export_task_record, export_task) in export_tasks.items():
                supported_projections = current_format.get_supported_projection_list()
                default_projection = get_default_projection(supported_projections, selected_projections=projections)

                subtasks.append(
                    export_task.s(
                        run_uid=run.uid,
                        stage_dir=stage_dir,
                        job_name=job_name,
                        task_uid=export_task_record.uid,
                        user_details=user_details,
                        locking_task_key=export_task_record.uid,
                        config=data_provider.config,
                        service_url=data_provider.url,
                        export_format_slug=current_format.slug,
                        bbox=bbox,
                        session_token=session_token,
                        provider_slug=data_provider.slug,
                        export_provider_task_record_uid=data_provider_task_record.uid,
                        worker=worker,
                        selection=job.the_geom.geojson,
                        layer=data_provider.layer,
                        service_type=service_type,
                    ).set(queue=queue_group, routing_key=queue_group)
                )

                for projection in list(set(supported_projections) & set(projections)):

                    # This task was already added as the initial format conversion.
                    if projection == default_projection:
                        continue

                    task_name = f"{export_task.name} - EPSG:{projection}"
                    projection_task = create_export_task_record(
                        task_name=task_name,
                        export_provider_task=data_provider_task_record,
                        worker=worker,
                        display=getattr(export_task, "display", True),
                    )
                    subtasks.append(
                        reprojection_task.s(
                            run_uid=run.uid,
                            stage_dir=stage_dir,
                            job_name=job_name,
                            task_uid=projection_task.uid,
                            user_details=user_details,
                            locking_task_key=data_provider_task_record.uid,
                            projection=projection,
                            config=data_provider.config,
                        ).set(queue=queue_group, routing_key=queue_group)
                    )

            format_tasks = chain(subtasks)
        else:
            format_tasks = None

        primary_export_task_record = create_export_task_record(
            task_name=primary_export_task.name,
            export_provider_task=data_provider_task_record,
            worker=worker,
            display=getattr(primary_export_task, "display", False),
        )

        if "osm" in primary_export_task.name.lower():
            queue_routing_key_name = "{}.large".format(queue_group)
        else:
            queue_routing_key_name = queue_group

        # Set custom zoom levels if available, otherwise use the provider defaults.

        min_zoom = data_provider_task.min_zoom if data_provider_task.min_zoom else data_provider.level_from
        max_zoom = data_provider_task.max_zoom if data_provider_task.max_zoom else data_provider.level_to

        primary_export_task_signature = primary_export_task.s(
            name=data_provider.slug,
            run_uid=run.uid,
            provider_slug=data_provider.slug,
            overpass_url=data_provider.url,
            stage_dir=stage_dir,
            export_provider_task_record_uid=data_provider_task_record.uid,
            worker=worker,
            job_name=job_name,
            bbox=bbox,
            selection=job.the_geom.geojson,
            user_details=user_details,
            task_uid=primary_export_task_record.uid,
            layer=data_provider.layer,
            level_from=min_zoom,
            level_to=max_zoom,
            service_type=service_type,
            service_url=data_provider.url,
            config=data_provider.config,
            session_token=session_token,
        )
        primary_export_task_signature = primary_export_task_signature.set(
            queue=queue_routing_key_name, routing_key=queue_routing_key_name
        )
        if skip_primary_export_task:
            tasks = chain(format_tasks)
            primary_export_task_record.delete()
        else:
            tasks = chain(primary_export_task_signature, format_tasks)

        tasks = chain(tasks)

        return data_provider_task_record.uid, tasks
Esempio n. 10
0
    def build_tasks(self, primary_export_task, provider_task_uid=None, user=None, run=None, stage_dir=None, worker=None,
                    service_type=None, *args, **kwargs):
        """
        Run OSM export tasks. Specifically create a task chain to be picked up by a celery worker later.

        :param primary_export_task: The task which converts the source data to the interchange format (i.e. OSM-> gpkg)
        :param provider_task_uid: A reference uid for the DataProviderTask model.
        :param user: The user executing the task.
        :param run: The ExportRun which this task will belong to.
        :param stage_dir: The directory where to store the files while they are being created.
        :param worker: The celery worker assigned this task.
        :param osm_gpkg: A OSM geopackage with the planet osm schema.
        :return: An DataProviderTaskRecord uid and the Celery Task Chain or None, False.
        """
        # This is just to make it easier to trace when user_details haven't been sent
        user_details = kwargs.get('user_details')
        if user_details is None:
            user_details = {'username': '******'.format(primary_export_task.name)}

        logger.debug('Running Job with id: {0}'.format(provider_task_uid))
        # pull the provider_task from the database
        provider_task = DataProviderTask.objects.get(uid=provider_task_uid)
        job = run.job

        job_name = normalize_name(job.name)
        # get the formats to export
        formats = [provider_task_format.slug for provider_task_format in provider_task.formats.all()]
        export_tasks = {}

        # If WCS we will want geotiff...
        if 'wcs' in primary_export_task.name.lower():
            formats += ['geotiff']

        # build a list of celery tasks based on the export formats...
        for format in formats:
            try:
                export_tasks[format] = {'obj': create_format_task(format), 'task_uid': None}
            except KeyError as e:
                logger.debug('KeyError: export_tasks[{}] - {}'.format(format, e))
            except ImportError as e:
                msg = 'Error importing export task: {0}'.format(e)
                logger.debug(msg)

        # First everything is already in gpkg format by default, except WCS and we don't want gpkg for that
        #  so just remove gpkg so that it doesn't show twice in the UI (i.e. raster export(.gpkg) and geopackage)...
        if len(export_tasks) > 0:
            if export_tasks.get('gpkg'):
                export_tasks.pop('gpkg')

        est_sz, meta = get_size_estimate(provider_task.provider, run.job.extents)

        # run the tasks
        data_provider_task_record = DataProviderTaskRecord.objects.create(run=run,
                                                                          name=provider_task.provider.name,
                                                                          slug=provider_task.provider.slug,
                                                                          status=TaskStates.PENDING.value,
                                                                          display=True,
                                                                          estimated_size=est_sz)

        for format, task in export_tasks.items():
            export_task = create_export_task_record(
                task_name=task.get('obj').name,
                export_provider_task=data_provider_task_record, worker=worker,
                display=getattr(task.get('obj'), "display", False)
            )
            export_tasks[format]['task_uid'] = export_task.uid

        """
        Create a celery chain which gets the data & runs export formats
        """
        if export_tasks:
            format_tasks = chain(
                task.get('obj').s(
                    run_uid=run.uid, stage_dir=stage_dir, job_name=job_name, task_uid=task.get('task_uid'),
                    user_details=user_details, locking_task_key=data_provider_task_record.uid
                ).set(queue=worker, routing_key=worker)
                for format_ignored, task in export_tasks.items()
            )
        else:
            format_tasks = None

        bbox = run.job.extents

        primary_export_task_record = create_export_task_record(
            task_name=primary_export_task.name,
            export_provider_task=data_provider_task_record, worker=worker,
            display=getattr(primary_export_task, "display", False)
        )

        if "osm" in primary_export_task.name.lower():
            queue_routing_key_name = "{}.osm".format(worker)
        else:
            queue_routing_key_name = worker

        primary_export_task_signature = primary_export_task.s(name=provider_task.provider.slug,
                                                              run_uid=run.uid,
                                                              provider_slug=provider_task.provider.slug,
                                                              overpass_url=provider_task.provider.url,
                                                              stage_dir=stage_dir,
                                                              export_provider_task_record_uid=data_provider_task_record.uid,
                                                              worker=worker,
                                                              job_name=job_name,
                                                              bbox=bbox,
                                                              selection=job.the_geom.geojson,
                                                              user_details=user_details,
                                                              task_uid=primary_export_task_record.uid,
                                                              layer=provider_task.provider.layer,
                                                              level_from=provider_task.provider.level_from,
                                                              level_to=provider_task.provider.level_to,
                                                              service_type=service_type,
                                                              service_url=provider_task.provider.url,
                                                              config=provider_task.provider.config).set(queue=queue_routing_key_name,
                                                                                                        routing_key=queue_routing_key_name)


        if format_tasks:
            tasks = chain(primary_export_task_signature, format_tasks)
        else:
            tasks = primary_export_task_signature

        return data_provider_task_record.uid, tasks
Esempio n. 11
0
def get_metadata(data_provider_task_uid):
    """
    A object to hold metadata about the run for the sake of being passed to various scripts for the creation of
    style files or metadata documents for within the datapack.

    This also creates a license file which is considered a part of the metadata and adds it to the "include_files"
    :param data_provider_task_uid: A Provider task uid string for either the run task which will add all of the provider
    tasks for the run or for a single data provider task.
    :return: A dict containing the run metadata.

    Example:
    {
    "aoi": "GEOJSON representing the selected AOI."
    "bbox": [
        w, s, e, n
    ],
    "data_sources": {
        "osm": {
            "copyright": None,
            "description": "OpenStreetMap vector data provided in a custom thematic schema. \r\n\t\r\n\tData is grouped into separate tables (e.g. water, roads...).",
            "file_path": "data/osm/test-osm-20181101.gpkg",
            "file_type": ".gpkg",
            "full_file_path": "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/test.gpkg",
            "last_update": "2018-10-29T04:35:02Z\n",
            "metadata": "https://overpass-server.com/overpass/interpreter",
            "name": "OpenStreetMap Data (Themes)",
            "slug": "osm",
            "type": "osm",
            "uid": "0d08ddf6-35c1-464f-b271-75f6911c3f78"
        }
    },
    "date": "20181101",
    "description": "Test",
    "has_elevation": False,
    "has_raster": True,
    "include_files": [
        "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/test.gpkg",
        "/var/lib/eventkit/exports_stage/7fadf34e-58f9-4bb8-ab57-adc1015c4269/osm/osm_selection.geojson"
    ],
    "name": "test",
    "project": "Test",
    "run_uid": "7fadf34e-58f9-4bb8-ab57-adc1015c4269",
    "url": "http://cloud.eventkit.test/status/2010025c-6d61-4a0b-8d5d-ff9c657259eb"
    }
    """

    from eventkit_cloud.jobs.models import DataProvider
    from eventkit_cloud.tasks.models import DataProviderTaskRecord
    from eventkit_cloud.tasks.export_tasks import TaskStates, create_zip_task

    data_provider_task = DataProviderTaskRecord.objects.get(uid=data_provider_task_uid)

    run = data_provider_task.run

    if data_provider_task.name == 'run':
        provider_tasks = run.provider_tasks.filter(~Q(name='run'))
        data_provider_task = None
    else:
        provider_tasks = [data_provider_task]

    # To prepare for the zipfile task, the files need to be checked to ensure they weren't
    # deleted during cancellation.
    include_files = list([])

    # A dict is used here to ensure that just one file per provider is added,
    # this should be updated when multiple formats are supported.
    metadata = {'name': normalize_name(run.job.name),
                'url': "{0}/status/{1}".format(getattr(settings, "SITE_URL"), str(run.job.uid)),
                'description': run.job.description,
                'project': run.job.event,
                'date': timezone.now().strftime("%Y%m%d"),
                'run_uid': str(run.uid),
                'data_sources': {},
                "bbox": run.job.extents,
                'aoi': run.job.bounds_geojson,
                'has_raster': False,
                'has_elevation': False}

    for provider_task in provider_tasks:
        if TaskStates[provider_task.status] in TaskStates.get_incomplete_states():
            continue
        data_provider = DataProvider.objects.get(slug=provider_task.slug)
        provider_type = data_provider.export_provider_type.type_name
        metadata['data_sources'][provider_task.slug] = {"name": provider_task.name}
        if TaskStates[provider_task.status] not in TaskStates.get_incomplete_states():
            provider_staging_dir = get_provider_staging_dir(run.uid, provider_task.slug)
            for export_task in provider_task.tasks.all():
                try:
                    filename = export_task.result.filename
                except Exception:
                    continue
                full_file_path = os.path.join(provider_staging_dir, filename)
                file_ext = os.path.splitext(filename)[1]
                # Exclude zip files created by prepare_file_for zip and the selection geojson
                # also within the QGIS style sheet it is currently assumed that GPKG files are Imagery and
                # GeoTIFF are elevation.  This will need to be updated in the future.
                if file_ext in ['.gpkg', '.tif']:
                    download_filename = get_download_filename(os.path.splitext(os.path.basename(filename))[0],
                                                              timezone.now(),
                                                              file_ext,
                                                              additional_descriptors=provider_task.slug)
                    filepath = get_archive_data_path(
                        provider_task.slug,
                        download_filename
                    )
                    metadata['data_sources'][provider_task.slug] = {'uid': str(provider_task.uid),
                                                                    'slug': provider_task.slug,
                                                                    'name': provider_task.name,
                                                                    'file_path': filepath,
                                                                    'full_file_path': full_file_path,
                                                                    'file_type': file_ext,
                                                                    'type': get_data_type_from_provider(
                                                                        provider_task.slug), 'description': str(
                            data_provider.service_description).replace('\r\n', '\n').replace(
                            '\n', '\r\n\t'),
                                                                    # 'description': data_provider.service_description,
                                                                    'last_update': get_last_update(data_provider.url,
                                                                                                   provider_type,
                                                                                                   slug=data_provider.slug),
                                                                    'metadata': get_metadata_url(data_provider.url,
                                                                                                 provider_type),
                                                                    'copyright': data_provider.service_copyright}
                    if provider_task.slug not in ['osm', 'nome']:
                        if file_ext == '.gpkg':
                            metadata['has_raster'] = True
                        if file_ext == '.tif':
                            metadata['has_elevation'] = True
                    if os.path.splitext(full_file_path)[1] == '.tif':
                        # Get statistics to update ranges in template.
                        band_stats = get_band_statistics(full_file_path)
                        logger.info("Band Stats {0}: {1}".format(full_file_path, band_stats))
                        metadata['data_sources'][provider_task.slug]["band_stats"] = band_stats
                        # Calculate the value for each elevation step (of 16)
                        try:
                            steps = linspace(band_stats[0], band_stats[1], num=16)
                            metadata['data_sources'][provider_task.slug]["ramp_shader_steps"] = list(map(int, steps))
                        except TypeError:
                            metadata['data_sources'][provider_task.slug]["ramp_shader_steps"] = None

                if not os.path.isfile(full_file_path):
                    logger.error("Could not find file {0} for export {1}.".format(full_file_path, export_task.name))
                    continue
                # Exclude zip files created by zip_export_provider
                if not (full_file_path.endswith(".zip") and export_task.name == create_zip_task.name):
                    include_files += [full_file_path]

        # add the license for this provider if there are other files already
        license_file = create_license_file(provider_task)
        if license_file:
            include_files += [license_file]
        metadata['include_files'] = include_files

    return metadata