def execute(self, context): if self.get_inputs_from != None: log.info("Getting inputs from: {}".format(self.get_inputs_from)) self.downloaded_products, self.archived_products = context[ 'task_instance'].xcom_pull(task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY) else: log.info("Getting inputs from: dhus_download_task") self.downloaded_products = context['task_instance'].xcom_pull( 'dhus_download_task', key='downloaded_products') if self.downloaded_products is None: log.info("Nothing to process.") return for product in self.downloaded_products.keys(): log.info("Processing: {}".format(product)) with s2reader.open(product) as s2_product: coords = [] links = [] metadata = s2_product._product_metadata granule = s2_product.granules[0] granule_metadata = granule._metadata product_footprint = [ [[m.replace(" ", ",")] for m in str(s2_product.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",")] ] for item in product_footprint[0]: [x_coordinate, y_coordinate] = item[0].split(",") coords.append([float(x_coordinate), float(y_coordinate)]) final_metadata_dict = { "type": "Feature", "geometry": { "type": "Polygon", "coordinates": [coords] }, "properties": { "eop:identifier": s2_product.manifest_safe_path.rsplit('.SAFE', 1)[0], "timeStart": s2_product.product_start_time, "timeEnd": s2_product.product_stop_time, "originalPackageLocation": os.path.join( self.original_package_download_base_url, os.path.basename(self.archived_products.pop(0))), "thumbnailURL": None, "quicklookURL": None, "eop:parentIdentifier": "SENTINEL2", "eop:productionStatus": None, "eop:acquisitionType": None, "eop:orbitNumber": s2_product.sensing_orbit_number, "eop:orbitDirection": s2_product.sensing_orbit_direction, "eop:track": None, "eop:frame": None, "eop:swathIdentifier": metadata.find('.//Product_Info/Datatake'). attrib['datatakeIdentifier'], "opt:cloudCover": int( float( metadata.findtext( ".//Cloud_Coverage_Assessment"))), "opt:snowCover": None, "eop:productQualityStatus": None, "eop:productQualityDegradationStatus": None, "eop:processorName": None, "eop:processingCenter": None, "eop:creationDate": None, "eop:modificationDate": None, "eop:processingDate": None, "eop:sensorMode": None, "eop:archivingCenter": granule_metadata.findtext('.//ARCHIVING_CENTRE'), "eop:processingMode": None, "eop:availabilityTime": s2_product.generation_time, "eop:acquisitionStation": None, "eop:acquisitionSubtype": None, "eop:startTimeFromAscendingNode": None, "eop:completionTimeFromAscendingNode": None, "eop:illuminationAzimuthAngle": metadata.findtext('.//Mean_Sun_Angle/AZIMUTH_ANGLE'), "eop:illuminationZenithAngle": metadata.findtext('.//Mean_Sun_Angle/ZENITH_ANGLE'), "eop:illuminationElevationAngle": None, "eop:resolution": None } } for i in self.bands_res.values(): features_list = [] granule_counter = 1 for granule in s2_product.granules: granule_coords = [] granule_coordinates = [[ [m.replace(" ", ",")] for m in str(granule.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",") ]] for item in granule_coordinates[0]: [granule_x_coordinate, granule_y_coordinate] = item[0].split(",") granule_coords.append([ float(granule_x_coordinate), float(granule_y_coordinate) ]) zipped_product = zipfile.ZipFile(product) for file_name in zipped_product.namelist(): if file_name.endswith( '.jp2' ) and not file_name.endswith('PVI.jp2'): features_list.append({"type": "Feature", "geometry": { "type": "Polygon", "coordinates": [granule_coords]},\ "properties": {\ "location":os.path.join(self.remote_dir, granule.granule_path.rsplit("/")[-1], "IMG_DATA", file_name.rsplit("/")[-1]), "band": self.bands_dict[file_name.rsplit("/")[-1].rsplit(".")[0][-3:]]},\ "id": "GRANULE.{}".format(granule_counter)}) granule_counter += 1 final_granules_dict = { "type": "FeatureCollection", "features": features_list } timeStart, timeEnd = final_metadata_dict["properties"][ "timeStart"], final_metadata_dict["properties"]["timeEnd"] # create description.html and dump it to file log.info("Creating description.html") tr = TemplatesResolver() htmlAbstract = tr.generate_product_abstract({ "timeStart": timeStart, "timeEnd": timeEnd, "originalPackageLocation": final_metadata_dict["properties"]["originalPackageLocation"] }) log.debug(pprint.pformat(htmlAbstract)) final_metadata_dict['htmlDescription'] = htmlAbstract with open(product.strip(".zip") + '/description.html', 'w') as product_outfile: product_outfile.write(htmlAbstract) # Note here that the SRID is a property of the granule not the product final_metadata_dict["properties"]["crs"] = granule.srid with open(product.strip(".zip") + '/product.json', 'w') as product_outfile: json.dump(final_metadata_dict, product_outfile, indent=4) with open(product.strip(".zip") + '/granules.json', 'w') as granules_outfile: json.dump(final_granules_dict, granules_outfile, indent=4) product_identifier = s2_product.manifest_safe_path.rsplit( '.SAFE', 1)[0] bbox = get_bbox_from_granules_coordinates(granule_coordinates) ows_links_dict = create_owslinks_dict( product_identifier=product_identifier, timestart=timeStart, timeend=timeEnd, granule_bbox=bbox, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format=self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version, ) log.info("ows links: {}".format(pprint.pformat(ows_links_dict))) with open(product.strip(".zip") + '/owsLinks.json', 'w') as owslinks_outfile: json.dump(ows_links_dict, owslinks_outfile, indent=4) self.custom_archived = [] for archive_line in self.downloaded_products.keys(): jp2_files_paths = [] archive_path = archive_line archived_product = zipfile.ZipFile(archive_line, 'r') for file_name in archived_product.namelist(): if file_name.endswith( '.jp2') and not file_name.endswith('PVI.jp2'): archived_product.extract(file_name, archive_path.strip(".zip")) jp2_files_paths.append( os.path.join(archive_path.strip(".zip"), file_name)) parent_dir = os.path.dirname(jp2_files_paths[0]) if file_name.endswith('MTD_TL.xml'): archived_product.extract(file_name, archive_path.strip(".zip")) mtd_tl_xml = os.path.join(archive_path.strip(".zip"), file_name) tree = ET.parse(mtd_tl_xml) root = tree.getroot() geometric_info = root.find( root.tag.split('}', 1)[0] + "}Geometric_Info") tile_geocoding = geometric_info.find("Tile_Geocoding") wld_files = [] prj_files = [] for jp2_file in jp2_files_paths: wld_name = os.path.splitext(jp2_file)[0] gdalinfo_cmd = "gdalinfo {} > {}".format( jp2_file, wld_name + ".prj") gdalinfo_BO = BashOperator( task_id="bash_operator_gdalinfo_{}".format(wld_name[-3:]), bash_command=gdalinfo_cmd) gdalinfo_BO.execute(context) sed_cmd = "sed -i -e '1,4d;29,$d' {}".format(wld_name + ".prj") sed_BO = BashOperator(task_id="bash_operator_sed_{}".format( wld_name[-3:]), bash_command=sed_cmd) sed_BO.execute(context) prj_files.append(wld_name + ".prj") wld_file = open(wld_name + ".wld", "w") wld_files.append(wld_name + ".wld") for key, value in self.bands_res.items(): if wld_name[-3:] in value: element = key geo_position = tile_geocoding.find( './/Geoposition[@resolution="{}"]'.format(element)) wld_file.write( geo_position.find("XDIM").text + "\n" + "0" + "\n" + "0" + "\n") wld_file.write(geo_position.find("YDIM").text + "\n") wld_file.write(geo_position.find("ULX").text + "\n") wld_file.write(geo_position.find("ULY").text + "\n") parent_dir = os.path.dirname(jp2_files_paths[0]) self.custom_archived.append(os.path.dirname(parent_dir)) log.info(os.path.dirname(parent_dir)) log.info(self.custom_archived) context['task_instance'].xcom_push(key='downloaded_products', value=self.downloaded_products) context['task_instance'].xcom_push( key='downloaded_products_with_wldprj', value=' '.join(self.custom_archived)) return self.custom_archived
def execute(self, context): # fetch MTL file path from XCom mtl_path = context["task_instance"].xcom_pull(self.get_inputs_from["metadata_task_id"]) if mtl_path is None: log.info("Nothing to process.") return # Uploaded granules paths from XCom upload_granules_task_ids = self.get_inputs_from["upload_task_ids"] granule_paths=[] for tid in upload_granules_task_ids: granule_paths += context["task_instance"].xcom_pull(tid) original_package_paths = context["task_instance"].xcom_pull(self.get_inputs_from["upload_original_package_task_id"]) original_package_path = original_package_paths [0] original_package_filename = os.path.basename(original_package_path) original_package_location = self.original_package_download_base_url + original_package_filename product_id = os.path.splitext(original_package_filename)[0] # Get GDALInfo output from XCom gdalinfo_task_id = self.get_inputs_from["gdalinfo_task_id"] gdalinfo_dict = context["task_instance"].xcom_pull(gdalinfo_task_id) # Get GDALInfo output of one of the granules, CRS will be the same for all granules k = gdalinfo_dict.keys()[0] gdalinfo_out=gdalinfo_dict[k] # Extract projection WKT and get EPSG code match = re.findall(r'^(PROJCS.*]])', gdalinfo_out, re.MULTILINE | re.DOTALL) wkt_def = match[0] assert wkt_def is not None assert isinstance(wkt_def, basestring) or isinstance(wkt_def, str) sref = osr.SpatialReference() sref.ImportFromWkt(wkt_def) crs = sref.GetAttrValue("AUTHORITY",1) with open(mtl_path) as mtl_fh: parsed_metadata = parse_mtl_data(mtl_fh) bounding_box = get_bounding_box(parsed_metadata["PRODUCT_METADATA"]) bbox_dict={ "long_min": min(bounding_box.lllon, bounding_box.ullon), "lat_min" : min(bounding_box.lllat, bounding_box.lrlat), "long_max": min(bounding_box.lrlon, bounding_box.urlon), "lat_max" : min(bounding_box.ullat, bounding_box.urlat), } log.debug("BoundingBox: {}".format(pprint.pformat(bounding_box))) prepared_metadata = prepare_metadata(parsed_metadata, bounding_box, crs, original_package_location) timeStart, timeEnd = prepared_metadata['properties']['timeStart'], prepared_metadata['properties']['timeEnd'] # create description.html and dump it to file log.info("Creating description.html") tr = TemplatesResolver() htmlAbstract = tr.generate_product_abstract({ "timeStart" : timeStart, "timeEnd" : timeEnd, "originalPackageLocation" : original_package_location }) log.debug(pprint.pformat(htmlAbstract)) prepared_metadata["htmlDescription"] = htmlAbstract product_directory, mtl_name = os.path.split(mtl_path) granules_dict = prepare_granules(bounding_box, granule_paths) log.debug("Granules Dict: {}".format(pprint.pformat(granules_dict))) ows_links_dict = create_owslinks_dict( product_identifier = product_id, timestart= timeStart, timeend = timeEnd, granule_bbox= bbox_dict, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format = self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version, ) log.info("ows links: {}".format(pprint.pformat(ows_links_dict))) product_json_path = os.path.join(product_directory, "product.json") ows_links_path = os.path.join(product_directory, "owsLinks.json") granules_path = os.path.join(product_directory, "granules.json") xml_template_path = os.path.join(product_directory, "metadata.xml") description_html_path = os.path.join(product_directory, "description.html") # Create product.json with open(product_json_path, 'w') as out_json_fh: json.dump(prepared_metadata, out_json_fh, indent=4) # Create granules.json with open(granules_path, 'w') as out_granules_fh: json.dump(granules_dict, out_granules_fh, indent=4) # Create owsLinks.json with open(ows_links_path, 'w') as out_ows_links_fh: json.dump(ows_links_dict, out_ows_links_fh, indent=4) # Create metadata.xml shutil.copyfile(self.metadata_xml_path, xml_template_path) # Create description.html with open(description_html_path, "w") as out_description: out_description.write(htmlAbstract) return product_json_path, granules_path, ows_links_path, xml_template_path, description_html_path
def execute(self, context): log.info('--------------------S1Metadata_PLUGIN running------------') task_instance = context['task_instance'] log.info("Receiving from 'get_input_from':\n{}".format( self.get_inputs_from)) download_task_id = self.get_inputs_from['download_task_id'] addo_task_ids = self.get_inputs_from['addo_task_ids'] upload_task_ids = self.get_inputs_from['upload_task_ids'] archive_product_task_id = self.get_inputs_from[ 'archive_product_task_id'] downloaded = context['task_instance'].xcom_pull( task_ids=download_task_id, key=XCOM_RETURN_KEY) local_granules_paths = [] for tid in addo_task_ids: local_granules_path = context['task_instance'].xcom_pull( task_ids=tid, key=XCOM_RETURN_KEY) if local_granules_path: local_granules_paths += local_granules_path uploaded_granules_paths = context['task_instance'].xcom_pull( task_ids=upload_task_ids, key=XCOM_RETURN_KEY) original_package_path = context['task_instance'].xcom_pull( task_ids=archive_product_task_id, key=XCOM_RETURN_KEY) granules_dict, bbox = collect_granules_metadata( local_granules_paths, self.granules_upload_dir, self.bands_dict) if not downloaded: log.info("No products from Download task, Nothing to do.") return list() if not local_granules_paths: log.info("No local granules from processing, Nothing to do.") return list() if not uploaded_granules_paths: log.info("No uploaded granules from upload task, Nothing to do.") return list() if not original_package_path: log.info( "No original package path from original package upload task, Nothing to do." ) return list() safe_package_path = downloaded.keys()[0] safe_package_filename = os.path.basename(safe_package_path) product_id = downloaded[safe_package_path].get('title') originalPackageLocation = self.original_package_download_base_url + safe_package_filename processing_dir = os.path.join(self.processing_dir, product_id) if not os.path.exists(processing_dir): os.makedirs(processing_dir) log.info('safe_package_path: {}'.format(safe_package_path)) log.info('local_granules_paths: {}'.format(local_granules_paths)) s1reader = S1GDALReader(safe_package_path) product_metadata = s1reader.get_metadata() product_metadata['footprint'] = s1reader.get_footprint() log.info(pprint.pformat(product_metadata, indent=4)) timeStart = product_metadata['ACQUISITION_START_TIME'] timeEnd = product_metadata['ACQUISITION_STOP_TIME'] owslinks_dict = create_owslinks_dict( product_identifier=product_id, timestart=timeStart, timeend=timeEnd, granule_bbox=bbox, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format=self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version) # create thumbnail # TODO: create proper thumbnail from quicklook. Also remove temp file log.info("Creating thumbnail") thumbnail_path = os.path.join(processing_dir, "thumbnail.png") quicklook_path = s1reader.get_quicklook() log.info(pprint.pformat(quicklook_path)) copyfile(quicklook_path, thumbnail_path) search_params_dict = create_search_dict(product_metadata, originalPackageLocation) log.info(pprint.pformat(search_params_dict)) metadata_dict = create_metadata_dict(product_metadata) log.info(pprint.pformat(metadata_dict)) description_dict = create_description_dict(product_metadata, originalPackageLocation) log.info(pprint.pformat(description_dict)) # create description.html and dump it to file log.info("Creating description.html") html_description = create_product_description(description_dict) search_params_dict['htmlDescription'] = html_description # create metadata XML log.info("Creating metadata.xml") metadata_xml = create_product_metadata(metadata_dict) po = PythonOperator(task_id="s1_metadata_dictionary_creation", python_callable=create_procuct_zip, op_kwargs={ 'processing_dir': processing_dir, 'search_params_dict': search_params_dict, 'description_html': html_description, 'metadata_xml': metadata_xml, 'granules_dict': granules_dict, 'owslinks_dict': owslinks_dict, 'thumbnail_path': thumbnail_path }) out = po.execute(context) zip_paths = list() if out: zip_paths.append(out) return zip_paths