def execute(self, context): # fetch MTL file path from XCom mtl_path = context["task_instance"].xcom_pull(self.get_inputs_from["metadata_task_id"]) if mtl_path is None: log.info("Nothing to process.") return # Uploaded granules paths from XCom upload_granules_task_ids = self.get_inputs_from["upload_task_ids"] granule_paths=[] for tid in upload_granules_task_ids: granule_paths += context["task_instance"].xcom_pull(tid) original_package_paths = context["task_instance"].xcom_pull(self.get_inputs_from["upload_original_package_task_id"]) original_package_path = original_package_paths [0] original_package_filename = os.path.basename(original_package_path) original_package_location = self.original_package_download_base_url + original_package_filename product_id = os.path.splitext(original_package_filename)[0] # Get GDALInfo output from XCom gdalinfo_task_id = self.get_inputs_from["gdalinfo_task_id"] gdalinfo_dict = context["task_instance"].xcom_pull(gdalinfo_task_id) # Get GDALInfo output of one of the granules, CRS will be the same for all granules k = gdalinfo_dict.keys()[0] gdalinfo_out=gdalinfo_dict[k] # Extract projection WKT and get EPSG code match = re.findall(r'^(PROJCS.*]])', gdalinfo_out, re.MULTILINE | re.DOTALL) wkt_def = match[0] assert wkt_def is not None assert isinstance(wkt_def, basestring) or isinstance(wkt_def, str) sref = osr.SpatialReference() sref.ImportFromWkt(wkt_def) crs = sref.GetAttrValue("AUTHORITY",1) with open(mtl_path) as mtl_fh: parsed_metadata = parse_mtl_data(mtl_fh) bounding_box = get_bounding_box(parsed_metadata["PRODUCT_METADATA"]) bbox_dict={ "long_min": min(bounding_box.lllon, bounding_box.ullon), "lat_min" : min(bounding_box.lllat, bounding_box.lrlat), "long_max": min(bounding_box.lrlon, bounding_box.urlon), "lat_max" : min(bounding_box.ullat, bounding_box.urlat), } log.debug("BoundingBox: {}".format(pprint.pformat(bounding_box))) prepared_metadata = prepare_metadata(parsed_metadata, bounding_box, crs, original_package_location) timeStart, timeEnd = prepared_metadata['properties']['timeStart'], prepared_metadata['properties']['timeEnd'] # create description.html and dump it to file log.info("Creating description.html") tr = TemplatesResolver() htmlAbstract = tr.generate_product_abstract({ "timeStart" : timeStart, "timeEnd" : timeEnd, "originalPackageLocation" : original_package_location }) log.debug(pprint.pformat(htmlAbstract)) prepared_metadata["htmlDescription"] = htmlAbstract product_directory, mtl_name = os.path.split(mtl_path) granules_dict = prepare_granules(bounding_box, granule_paths) log.debug("Granules Dict: {}".format(pprint.pformat(granules_dict))) ows_links_dict = create_owslinks_dict( product_identifier = product_id, timestart= timeStart, timeend = timeEnd, granule_bbox= bbox_dict, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format = self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version, ) log.info("ows links: {}".format(pprint.pformat(ows_links_dict))) product_json_path = os.path.join(product_directory, "product.json") ows_links_path = os.path.join(product_directory, "owsLinks.json") granules_path = os.path.join(product_directory, "granules.json") xml_template_path = os.path.join(product_directory, "metadata.xml") description_html_path = os.path.join(product_directory, "description.html") # Create product.json with open(product_json_path, 'w') as out_json_fh: json.dump(prepared_metadata, out_json_fh, indent=4) # Create granules.json with open(granules_path, 'w') as out_granules_fh: json.dump(granules_dict, out_granules_fh, indent=4) # Create owsLinks.json with open(ows_links_path, 'w') as out_ows_links_fh: json.dump(ows_links_dict, out_ows_links_fh, indent=4) # Create metadata.xml shutil.copyfile(self.metadata_xml_path, xml_template_path) # Create description.html with open(description_html_path, "w") as out_description: out_description.write(htmlAbstract) return product_json_path, granules_path, ows_links_path, xml_template_path, description_html_path
def create_product_metadata(metadata_dict): tr = TemplatesResolver() metadata_xml = tr.generate_sentinel1_product_metadata(metadata_dict) return metadata_xml
def execute(self, context): if self.get_inputs_from != None: log.info("Getting inputs from: {}".format(self.get_inputs_from)) self.downloaded_products, self.archived_products = context[ 'task_instance'].xcom_pull(task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY) else: log.info("Getting inputs from: dhus_download_task") self.downloaded_products = context['task_instance'].xcom_pull( 'dhus_download_task', key='downloaded_products') if self.downloaded_products is None: log.info("Nothing to process.") return for product in self.downloaded_products.keys(): log.info("Processing: {}".format(product)) with s2reader.open(product) as s2_product: coords = [] links = [] metadata = s2_product._product_metadata granule = s2_product.granules[0] granule_metadata = granule._metadata product_footprint = [ [[m.replace(" ", ",")] for m in str(s2_product.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",")] ] for item in product_footprint[0]: [x_coordinate, y_coordinate] = item[0].split(",") coords.append([float(x_coordinate), float(y_coordinate)]) final_metadata_dict = { "type": "Feature", "geometry": { "type": "Polygon", "coordinates": [coords] }, "properties": { "eop:identifier": s2_product.manifest_safe_path.rsplit('.SAFE', 1)[0], "timeStart": s2_product.product_start_time, "timeEnd": s2_product.product_stop_time, "originalPackageLocation": os.path.join( self.original_package_download_base_url, os.path.basename(self.archived_products.pop(0))), "thumbnailURL": None, "quicklookURL": None, "eop:parentIdentifier": "SENTINEL2", "eop:productionStatus": None, "eop:acquisitionType": None, "eop:orbitNumber": s2_product.sensing_orbit_number, "eop:orbitDirection": s2_product.sensing_orbit_direction, "eop:track": None, "eop:frame": None, "eop:swathIdentifier": metadata.find('.//Product_Info/Datatake'). attrib['datatakeIdentifier'], "opt:cloudCover": int( float( metadata.findtext( ".//Cloud_Coverage_Assessment"))), "opt:snowCover": None, "eop:productQualityStatus": None, "eop:productQualityDegradationStatus": None, "eop:processorName": None, "eop:processingCenter": None, "eop:creationDate": None, "eop:modificationDate": None, "eop:processingDate": None, "eop:sensorMode": None, "eop:archivingCenter": granule_metadata.findtext('.//ARCHIVING_CENTRE'), "eop:processingMode": None, "eop:availabilityTime": s2_product.generation_time, "eop:acquisitionStation": None, "eop:acquisitionSubtype": None, "eop:startTimeFromAscendingNode": None, "eop:completionTimeFromAscendingNode": None, "eop:illuminationAzimuthAngle": metadata.findtext('.//Mean_Sun_Angle/AZIMUTH_ANGLE'), "eop:illuminationZenithAngle": metadata.findtext('.//Mean_Sun_Angle/ZENITH_ANGLE'), "eop:illuminationElevationAngle": None, "eop:resolution": None } } for i in self.bands_res.values(): features_list = [] granule_counter = 1 for granule in s2_product.granules: granule_coords = [] granule_coordinates = [[ [m.replace(" ", ",")] for m in str(granule.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",") ]] for item in granule_coordinates[0]: [granule_x_coordinate, granule_y_coordinate] = item[0].split(",") granule_coords.append([ float(granule_x_coordinate), float(granule_y_coordinate) ]) zipped_product = zipfile.ZipFile(product) for file_name in zipped_product.namelist(): if file_name.endswith( '.jp2' ) and not file_name.endswith('PVI.jp2'): features_list.append({"type": "Feature", "geometry": { "type": "Polygon", "coordinates": [granule_coords]},\ "properties": {\ "location":os.path.join(self.remote_dir, granule.granule_path.rsplit("/")[-1], "IMG_DATA", file_name.rsplit("/")[-1]), "band": self.bands_dict[file_name.rsplit("/")[-1].rsplit(".")[0][-3:]]},\ "id": "GRANULE.{}".format(granule_counter)}) granule_counter += 1 final_granules_dict = { "type": "FeatureCollection", "features": features_list } timeStart, timeEnd = final_metadata_dict["properties"][ "timeStart"], final_metadata_dict["properties"]["timeEnd"] # create description.html and dump it to file log.info("Creating description.html") tr = TemplatesResolver() htmlAbstract = tr.generate_product_abstract({ "timeStart": timeStart, "timeEnd": timeEnd, "originalPackageLocation": final_metadata_dict["properties"]["originalPackageLocation"] }) log.debug(pprint.pformat(htmlAbstract)) final_metadata_dict['htmlDescription'] = htmlAbstract with open(product.strip(".zip") + '/description.html', 'w') as product_outfile: product_outfile.write(htmlAbstract) # Note here that the SRID is a property of the granule not the product final_metadata_dict["properties"]["crs"] = granule.srid with open(product.strip(".zip") + '/product.json', 'w') as product_outfile: json.dump(final_metadata_dict, product_outfile, indent=4) with open(product.strip(".zip") + '/granules.json', 'w') as granules_outfile: json.dump(final_granules_dict, granules_outfile, indent=4) product_identifier = s2_product.manifest_safe_path.rsplit( '.SAFE', 1)[0] bbox = get_bbox_from_granules_coordinates(granule_coordinates) ows_links_dict = create_owslinks_dict( product_identifier=product_identifier, timestart=timeStart, timeend=timeEnd, granule_bbox=bbox, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format=self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version, ) log.info("ows links: {}".format(pprint.pformat(ows_links_dict))) with open(product.strip(".zip") + '/owsLinks.json', 'w') as owslinks_outfile: json.dump(ows_links_dict, owslinks_outfile, indent=4) self.custom_archived = [] for archive_line in self.downloaded_products.keys(): jp2_files_paths = [] archive_path = archive_line archived_product = zipfile.ZipFile(archive_line, 'r') for file_name in archived_product.namelist(): if file_name.endswith( '.jp2') and not file_name.endswith('PVI.jp2'): archived_product.extract(file_name, archive_path.strip(".zip")) jp2_files_paths.append( os.path.join(archive_path.strip(".zip"), file_name)) parent_dir = os.path.dirname(jp2_files_paths[0]) if file_name.endswith('MTD_TL.xml'): archived_product.extract(file_name, archive_path.strip(".zip")) mtd_tl_xml = os.path.join(archive_path.strip(".zip"), file_name) tree = ET.parse(mtd_tl_xml) root = tree.getroot() geometric_info = root.find( root.tag.split('}', 1)[0] + "}Geometric_Info") tile_geocoding = geometric_info.find("Tile_Geocoding") wld_files = [] prj_files = [] for jp2_file in jp2_files_paths: wld_name = os.path.splitext(jp2_file)[0] gdalinfo_cmd = "gdalinfo {} > {}".format( jp2_file, wld_name + ".prj") gdalinfo_BO = BashOperator( task_id="bash_operator_gdalinfo_{}".format(wld_name[-3:]), bash_command=gdalinfo_cmd) gdalinfo_BO.execute(context) sed_cmd = "sed -i -e '1,4d;29,$d' {}".format(wld_name + ".prj") sed_BO = BashOperator(task_id="bash_operator_sed_{}".format( wld_name[-3:]), bash_command=sed_cmd) sed_BO.execute(context) prj_files.append(wld_name + ".prj") wld_file = open(wld_name + ".wld", "w") wld_files.append(wld_name + ".wld") for key, value in self.bands_res.items(): if wld_name[-3:] in value: element = key geo_position = tile_geocoding.find( './/Geoposition[@resolution="{}"]'.format(element)) wld_file.write( geo_position.find("XDIM").text + "\n" + "0" + "\n" + "0" + "\n") wld_file.write(geo_position.find("YDIM").text + "\n") wld_file.write(geo_position.find("ULX").text + "\n") wld_file.write(geo_position.find("ULY").text + "\n") parent_dir = os.path.dirname(jp2_files_paths[0]) self.custom_archived.append(os.path.dirname(parent_dir)) log.info(os.path.dirname(parent_dir)) log.info(self.custom_archived) context['task_instance'].xcom_push(key='downloaded_products', value=self.downloaded_products) context['task_instance'].xcom_push( key='downloaded_products_with_wldprj', value=' '.join(self.custom_archived)) return self.custom_archived
def create_product_description(description_dict): tr = TemplatesResolver() html_description = tr.generate_product_abstract(description_dict) return html_description