Example #1
0
    def execute(self, context):
        if self.get_inputs_from != None:
            log.info("Getting inputs from: {}".format(self.get_inputs_from))
            self.downloaded_products, self.archived_products = context[
                'task_instance'].xcom_pull(task_ids=self.get_inputs_from,
                                           key=XCOM_RETURN_KEY)
        else:
            log.info("Getting inputs from: dhus_download_task")
            self.downloaded_products = context['task_instance'].xcom_pull(
                'dhus_download_task', key='downloaded_products')
        if self.downloaded_products is None:
            log.info("Nothing to process.")
            return

        for product in self.downloaded_products.keys():
            log.info("Processing: {}".format(product))
            with s2reader.open(product) as s2_product:
                coords = []
                links = []
                metadata = s2_product._product_metadata
                granule = s2_product.granules[0]
                granule_metadata = granule._metadata
                product_footprint = [
                    [[m.replace(" ", ",")]
                     for m in str(s2_product.footprint).replace(", ", ",").
                     partition('((')[-1].rpartition('))')[0].split(",")]
                ]
                for item in product_footprint[0]:
                    [x_coordinate, y_coordinate] = item[0].split(",")
                    coords.append([float(x_coordinate), float(y_coordinate)])
                final_metadata_dict = {
                    "type": "Feature",
                    "geometry": {
                        "type": "Polygon",
                        "coordinates": [coords]
                    },
                    "properties": {
                        "eop:identifier":
                        s2_product.manifest_safe_path.rsplit('.SAFE', 1)[0],
                        "timeStart":
                        s2_product.product_start_time,
                        "timeEnd":
                        s2_product.product_stop_time,
                        "originalPackageLocation":
                        os.path.join(
                            self.original_package_download_base_url,
                            os.path.basename(self.archived_products.pop(0))),
                        "thumbnailURL":
                        None,
                        "quicklookURL":
                        None,
                        "eop:parentIdentifier":
                        "SENTINEL2",
                        "eop:productionStatus":
                        None,
                        "eop:acquisitionType":
                        None,
                        "eop:orbitNumber":
                        s2_product.sensing_orbit_number,
                        "eop:orbitDirection":
                        s2_product.sensing_orbit_direction,
                        "eop:track":
                        None,
                        "eop:frame":
                        None,
                        "eop:swathIdentifier":
                        metadata.find('.//Product_Info/Datatake').
                        attrib['datatakeIdentifier'],
                        "opt:cloudCover":
                        int(
                            float(
                                metadata.findtext(
                                    ".//Cloud_Coverage_Assessment"))),
                        "opt:snowCover":
                        None,
                        "eop:productQualityStatus":
                        None,
                        "eop:productQualityDegradationStatus":
                        None,
                        "eop:processorName":
                        None,
                        "eop:processingCenter":
                        None,
                        "eop:creationDate":
                        None,
                        "eop:modificationDate":
                        None,
                        "eop:processingDate":
                        None,
                        "eop:sensorMode":
                        None,
                        "eop:archivingCenter":
                        granule_metadata.findtext('.//ARCHIVING_CENTRE'),
                        "eop:processingMode":
                        None,
                        "eop:availabilityTime":
                        s2_product.generation_time,
                        "eop:acquisitionStation":
                        None,
                        "eop:acquisitionSubtype":
                        None,
                        "eop:startTimeFromAscendingNode":
                        None,
                        "eop:completionTimeFromAscendingNode":
                        None,
                        "eop:illuminationAzimuthAngle":
                        metadata.findtext('.//Mean_Sun_Angle/AZIMUTH_ANGLE'),
                        "eop:illuminationZenithAngle":
                        metadata.findtext('.//Mean_Sun_Angle/ZENITH_ANGLE'),
                        "eop:illuminationElevationAngle":
                        None,
                        "eop:resolution":
                        None
                    }
                }
                for i in self.bands_res.values():
                    features_list = []
                    granule_counter = 1
                    for granule in s2_product.granules:
                        granule_coords = []
                        granule_coordinates = [[
                            [m.replace(" ", ",")]
                            for m in str(granule.footprint).replace(", ", ",").
                            partition('((')[-1].rpartition('))')[0].split(",")
                        ]]

                        for item in granule_coordinates[0]:
                            [granule_x_coordinate,
                             granule_y_coordinate] = item[0].split(",")
                            granule_coords.append([
                                float(granule_x_coordinate),
                                float(granule_y_coordinate)
                            ])
                        zipped_product = zipfile.ZipFile(product)
                        for file_name in zipped_product.namelist():
                            if file_name.endswith(
                                    '.jp2'
                            ) and not file_name.endswith('PVI.jp2'):
                                features_list.append({"type": "Feature", "geometry": { "type": "Polygon", "coordinates": [granule_coords]},\
                       "properties": {\
                       "location":os.path.join(self.remote_dir, granule.granule_path.rsplit("/")[-1], "IMG_DATA", file_name.rsplit("/")[-1]), "band": self.bands_dict[file_name.rsplit("/")[-1].rsplit(".")[0][-3:]]},\
                       "id": "GRANULE.{}".format(granule_counter)})
                                granule_counter += 1
            final_granules_dict = {
                "type": "FeatureCollection",
                "features": features_list
            }

            timeStart, timeEnd = final_metadata_dict["properties"][
                "timeStart"], final_metadata_dict["properties"]["timeEnd"]
            # create description.html and dump it to file
            log.info("Creating description.html")
            tr = TemplatesResolver()
            htmlAbstract = tr.generate_product_abstract({
                "timeStart":
                timeStart,
                "timeEnd":
                timeEnd,
                "originalPackageLocation":
                final_metadata_dict["properties"]["originalPackageLocation"]
            })
            log.debug(pprint.pformat(htmlAbstract))
            final_metadata_dict['htmlDescription'] = htmlAbstract

            with open(product.strip(".zip") + '/description.html',
                      'w') as product_outfile:
                product_outfile.write(htmlAbstract)
            # Note here that the SRID is a property of the granule not the product
            final_metadata_dict["properties"]["crs"] = granule.srid
            with open(product.strip(".zip") + '/product.json',
                      'w') as product_outfile:
                json.dump(final_metadata_dict, product_outfile, indent=4)
            with open(product.strip(".zip") + '/granules.json',
                      'w') as granules_outfile:
                json.dump(final_granules_dict, granules_outfile, indent=4)

            product_identifier = s2_product.manifest_safe_path.rsplit(
                '.SAFE', 1)[0]
            bbox = get_bbox_from_granules_coordinates(granule_coordinates)

            ows_links_dict = create_owslinks_dict(
                product_identifier=product_identifier,
                timestart=timeStart,
                timeend=timeEnd,
                granule_bbox=bbox,
                gs_workspace=self.gs_workspace,
                gs_wms_layer=self.gs_wms_layer,
                gs_wms_width=self.gs_wms_width,
                gs_wms_height=self.gs_wms_height,
                gs_wms_format=self.gs_wms_format,
                gs_wms_version=self.gs_wms_version,
                gs_wfs_featuretype=self.gs_wfs_featuretype,
                gs_wfs_format=self.gs_wfs_format,
                gs_wfs_version=self.gs_wfs_version,
                gs_wcs_coverage_id=self.gs_wcs_coverage_id,
                gs_wcs_scale_i=self.gs_wcs_scale_i,
                gs_wcs_scale_j=self.gs_wcs_scale_j,
                gs_wcs_format=self.gs_wcs_format,
                gs_wcs_version=self.gs_wcs_version,
            )

            log.info("ows links: {}".format(pprint.pformat(ows_links_dict)))

            with open(product.strip(".zip") + '/owsLinks.json',
                      'w') as owslinks_outfile:
                json.dump(ows_links_dict, owslinks_outfile, indent=4)

        self.custom_archived = []
        for archive_line in self.downloaded_products.keys():
            jp2_files_paths = []
            archive_path = archive_line
            archived_product = zipfile.ZipFile(archive_line, 'r')
            for file_name in archived_product.namelist():
                if file_name.endswith(
                        '.jp2') and not file_name.endswith('PVI.jp2'):
                    archived_product.extract(file_name,
                                             archive_path.strip(".zip"))
                    jp2_files_paths.append(
                        os.path.join(archive_path.strip(".zip"), file_name))
                    parent_dir = os.path.dirname(jp2_files_paths[0])
                if file_name.endswith('MTD_TL.xml'):
                    archived_product.extract(file_name,
                                             archive_path.strip(".zip"))
                    mtd_tl_xml = os.path.join(archive_path.strip(".zip"),
                                              file_name)
            tree = ET.parse(mtd_tl_xml)
            root = tree.getroot()
            geometric_info = root.find(
                root.tag.split('}', 1)[0] + "}Geometric_Info")
            tile_geocoding = geometric_info.find("Tile_Geocoding")
            wld_files = []
            prj_files = []
            for jp2_file in jp2_files_paths:
                wld_name = os.path.splitext(jp2_file)[0]
                gdalinfo_cmd = "gdalinfo {} > {}".format(
                    jp2_file, wld_name + ".prj")
                gdalinfo_BO = BashOperator(
                    task_id="bash_operator_gdalinfo_{}".format(wld_name[-3:]),
                    bash_command=gdalinfo_cmd)
                gdalinfo_BO.execute(context)
                sed_cmd = "sed -i -e '1,4d;29,$d' {}".format(wld_name + ".prj")
                sed_BO = BashOperator(task_id="bash_operator_sed_{}".format(
                    wld_name[-3:]),
                                      bash_command=sed_cmd)
                sed_BO.execute(context)
                prj_files.append(wld_name + ".prj")
                wld_file = open(wld_name + ".wld", "w")
                wld_files.append(wld_name + ".wld")
                for key, value in self.bands_res.items():
                    if wld_name[-3:] in value:
                        element = key
                geo_position = tile_geocoding.find(
                    './/Geoposition[@resolution="{}"]'.format(element))
                wld_file.write(
                    geo_position.find("XDIM").text + "\n" + "0" + "\n" + "0" +
                    "\n")
                wld_file.write(geo_position.find("YDIM").text + "\n")
                wld_file.write(geo_position.find("ULX").text + "\n")
                wld_file.write(geo_position.find("ULY").text + "\n")
            parent_dir = os.path.dirname(jp2_files_paths[0])
            self.custom_archived.append(os.path.dirname(parent_dir))
            log.info(os.path.dirname(parent_dir))
        log.info(self.custom_archived)
        context['task_instance'].xcom_push(key='downloaded_products',
                                           value=self.downloaded_products)
        context['task_instance'].xcom_push(
            key='downloaded_products_with_wldprj',
            value=' '.join(self.custom_archived))
        return self.custom_archived
Example #2
0
    def execute(self, context):
        # fetch MTL file path from XCom
        mtl_path = context["task_instance"].xcom_pull(self.get_inputs_from["metadata_task_id"])
        if mtl_path is None:
            log.info("Nothing to process.")
            return
        # Uploaded granules paths from XCom
        upload_granules_task_ids = self.get_inputs_from["upload_task_ids"]
        granule_paths=[]
        for tid in upload_granules_task_ids:
            granule_paths += context["task_instance"].xcom_pull(tid)
        original_package_paths = context["task_instance"].xcom_pull(self.get_inputs_from["upload_original_package_task_id"])
        original_package_path = original_package_paths [0]
        original_package_filename = os.path.basename(original_package_path)
        original_package_location = self.original_package_download_base_url + original_package_filename
        product_id = os.path.splitext(original_package_filename)[0]
        # Get GDALInfo output from XCom
        gdalinfo_task_id = self.get_inputs_from["gdalinfo_task_id"]
        gdalinfo_dict = context["task_instance"].xcom_pull(gdalinfo_task_id)
        # Get GDALInfo output of one of the granules, CRS will be the same for all granules
        k = gdalinfo_dict.keys()[0]
        gdalinfo_out=gdalinfo_dict[k]
        # Extract projection WKT and get EPSG code
        match = re.findall(r'^(PROJCS.*]])', gdalinfo_out, re.MULTILINE | re.DOTALL)
        wkt_def = match[0]
        assert wkt_def is not None
        assert isinstance(wkt_def, basestring) or isinstance(wkt_def, str)
        sref = osr.SpatialReference()
        sref.ImportFromWkt(wkt_def)
        crs = sref.GetAttrValue("AUTHORITY",1)

        with open(mtl_path) as mtl_fh:
            parsed_metadata = parse_mtl_data(mtl_fh)
        bounding_box = get_bounding_box(parsed_metadata["PRODUCT_METADATA"])
        bbox_dict={
            "long_min": min(bounding_box.lllon, bounding_box.ullon),
            "lat_min" : min(bounding_box.lllat, bounding_box.lrlat),
            "long_max": min(bounding_box.lrlon, bounding_box.urlon),
            "lat_max" : min(bounding_box.ullat, bounding_box.urlat),
        }
        log.debug("BoundingBox: {}".format(pprint.pformat(bounding_box)))

        prepared_metadata = prepare_metadata(parsed_metadata, bounding_box, crs, original_package_location)
        timeStart, timeEnd = prepared_metadata['properties']['timeStart'], prepared_metadata['properties']['timeEnd']
        # create description.html and dump it to file
        log.info("Creating description.html")
        tr = TemplatesResolver()
        htmlAbstract = tr.generate_product_abstract({
            "timeStart" : timeStart,
            "timeEnd" : timeEnd,
            "originalPackageLocation" : original_package_location
        })
        log.debug(pprint.pformat(htmlAbstract))
        prepared_metadata["htmlDescription"] = htmlAbstract

        product_directory, mtl_name = os.path.split(mtl_path)
        granules_dict = prepare_granules(bounding_box, granule_paths)
        log.debug("Granules Dict: {}".format(pprint.pformat(granules_dict)))

        ows_links_dict = create_owslinks_dict(
            product_identifier = product_id,
            timestart= timeStart,
            timeend = timeEnd,
            granule_bbox= bbox_dict,
            gs_workspace=self.gs_workspace,
            gs_wms_layer=self.gs_wms_layer,
            gs_wms_width=self.gs_wms_width,
            gs_wms_height=self.gs_wms_height,
            gs_wms_format=self.gs_wms_format,
            gs_wms_version=self.gs_wms_version,
            gs_wfs_featuretype=self.gs_wfs_featuretype,
            gs_wfs_format=self.gs_wfs_format,
            gs_wfs_version=self.gs_wfs_version,
            gs_wcs_coverage_id=self.gs_wcs_coverage_id,
            gs_wcs_scale_i=self.gs_wcs_scale_i,
            gs_wcs_scale_j=self.gs_wcs_scale_j,
            gs_wcs_format = self.gs_wcs_format,
            gs_wcs_version=self.gs_wcs_version,
        )
        log.info("ows links: {}".format(pprint.pformat(ows_links_dict)))

        product_json_path = os.path.join(product_directory, "product.json")
        ows_links_path = os.path.join(product_directory, "owsLinks.json")
        granules_path = os.path.join(product_directory, "granules.json")
        xml_template_path = os.path.join(product_directory, "metadata.xml")
        description_html_path = os.path.join(product_directory, "description.html")

        # Create product.json
        with open(product_json_path, 'w') as out_json_fh:
            json.dump(prepared_metadata, out_json_fh, indent=4)
        # Create granules.json
        with open(granules_path, 'w') as out_granules_fh:
            json.dump(granules_dict, out_granules_fh, indent=4)
        # Create owsLinks.json
        with open(ows_links_path, 'w') as out_ows_links_fh:
            json.dump(ows_links_dict, out_ows_links_fh, indent=4)
        # Create metadata.xml
        shutil.copyfile(self.metadata_xml_path, xml_template_path)
        # Create description.html
        with open(description_html_path, "w") as out_description:
            out_description.write(htmlAbstract)

        return product_json_path, granules_path, ows_links_path, xml_template_path, description_html_path
Example #3
0
    def execute(self, context):
        log.info('--------------------S1Metadata_PLUGIN running------------')
        task_instance = context['task_instance']

        log.info("Receiving from 'get_input_from':\n{}".format(
            self.get_inputs_from))

        download_task_id = self.get_inputs_from['download_task_id']
        addo_task_ids = self.get_inputs_from['addo_task_ids']
        upload_task_ids = self.get_inputs_from['upload_task_ids']
        archive_product_task_id = self.get_inputs_from[
            'archive_product_task_id']

        downloaded = context['task_instance'].xcom_pull(
            task_ids=download_task_id, key=XCOM_RETURN_KEY)

        local_granules_paths = []
        for tid in addo_task_ids:
            local_granules_path = context['task_instance'].xcom_pull(
                task_ids=tid, key=XCOM_RETURN_KEY)
            if local_granules_path:
                local_granules_paths += local_granules_path
        uploaded_granules_paths = context['task_instance'].xcom_pull(
            task_ids=upload_task_ids, key=XCOM_RETURN_KEY)
        original_package_path = context['task_instance'].xcom_pull(
            task_ids=archive_product_task_id, key=XCOM_RETURN_KEY)
        granules_dict, bbox = collect_granules_metadata(
            local_granules_paths, self.granules_upload_dir, self.bands_dict)

        if not downloaded:
            log.info("No products from Download task, Nothing to do.")
            return list()
        if not local_granules_paths:
            log.info("No local granules from processing, Nothing to do.")
            return list()
        if not uploaded_granules_paths:
            log.info("No uploaded granules from upload task, Nothing to do.")
            return list()
        if not original_package_path:
            log.info(
                "No original package path from original package upload task, Nothing to do."
            )
            return list()

        safe_package_path = downloaded.keys()[0]
        safe_package_filename = os.path.basename(safe_package_path)
        product_id = downloaded[safe_package_path].get('title')
        originalPackageLocation = self.original_package_download_base_url + safe_package_filename
        processing_dir = os.path.join(self.processing_dir, product_id)
        if not os.path.exists(processing_dir):
            os.makedirs(processing_dir)

        log.info('safe_package_path: {}'.format(safe_package_path))
        log.info('local_granules_paths: {}'.format(local_granules_paths))

        s1reader = S1GDALReader(safe_package_path)
        product_metadata = s1reader.get_metadata()
        product_metadata['footprint'] = s1reader.get_footprint()
        log.info(pprint.pformat(product_metadata, indent=4))

        timeStart = product_metadata['ACQUISITION_START_TIME']
        timeEnd = product_metadata['ACQUISITION_STOP_TIME']

        owslinks_dict = create_owslinks_dict(
            product_identifier=product_id,
            timestart=timeStart,
            timeend=timeEnd,
            granule_bbox=bbox,
            gs_workspace=self.gs_workspace,
            gs_wms_layer=self.gs_wms_layer,
            gs_wms_width=self.gs_wms_width,
            gs_wms_height=self.gs_wms_height,
            gs_wms_format=self.gs_wms_format,
            gs_wms_version=self.gs_wms_version,
            gs_wfs_featuretype=self.gs_wfs_featuretype,
            gs_wfs_format=self.gs_wfs_format,
            gs_wfs_version=self.gs_wfs_version,
            gs_wcs_coverage_id=self.gs_wcs_coverage_id,
            gs_wcs_scale_i=self.gs_wcs_scale_i,
            gs_wcs_scale_j=self.gs_wcs_scale_j,
            gs_wcs_format=self.gs_wcs_format,
            gs_wcs_version=self.gs_wcs_version)

        # create thumbnail
        # TODO: create proper thumbnail from quicklook. Also remove temp file
        log.info("Creating thumbnail")
        thumbnail_path = os.path.join(processing_dir, "thumbnail.png")
        quicklook_path = s1reader.get_quicklook()
        log.info(pprint.pformat(quicklook_path))
        copyfile(quicklook_path, thumbnail_path)

        search_params_dict = create_search_dict(product_metadata,
                                                originalPackageLocation)
        log.info(pprint.pformat(search_params_dict))

        metadata_dict = create_metadata_dict(product_metadata)
        log.info(pprint.pformat(metadata_dict))

        description_dict = create_description_dict(product_metadata,
                                                   originalPackageLocation)
        log.info(pprint.pformat(description_dict))

        # create description.html and dump it to file
        log.info("Creating description.html")
        html_description = create_product_description(description_dict)
        search_params_dict['htmlDescription'] = html_description

        # create metadata XML
        log.info("Creating metadata.xml")
        metadata_xml = create_product_metadata(metadata_dict)

        po = PythonOperator(task_id="s1_metadata_dictionary_creation",
                            python_callable=create_procuct_zip,
                            op_kwargs={
                                'processing_dir': processing_dir,
                                'search_params_dict': search_params_dict,
                                'description_html': html_description,
                                'metadata_xml': metadata_xml,
                                'granules_dict': granules_dict,
                                'owslinks_dict': owslinks_dict,
                                'thumbnail_path': thumbnail_path
                            })

        out = po.execute(context)
        zip_paths = list()
        if out:
            zip_paths.append(out)
        return zip_paths