def execute(self, context): input_paths = context["task_instance"].xcom_pull(self.get_inputs_from, key=XCOM_RETURN_KEY) if input_paths is None: log.info("Nothing to process") return None output_paths = [] for input_path in input_paths: levels = get_overview_levels(self.max_overview_level) log.info("Generating overviews for {!r}...".format(input_path)) command = get_gdaladdo_command( input_path, overview_levels=levels, resampling_method=self.resampling_method, compress_overview=self.compress_overview) output_path = input_path output_paths.append(output_path) env = {'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH'} bo = BashOperator(task_id='bash_operator_addo_{}'.format( os.path.basename(input_path)), env=env, bash_command=command) bo.execute(context) log.info(output_paths) return output_paths
def execute(self, context): log.info('--------------------GDAL_PLUGIN Warp running------------') task_instance = context['task_instance'] log.info(""" target_srs: {} tile_size: {} overwrite: {} dstdir: {} get_inputs_from: {} """.format( self.target_srs, self.tile_size, self.overwrite, self.dstdir, self.get_inputs_from, ) ) dstdir = self.dstdir input_paths = task_instance.xcom_pull(self.get_inputs_from, key=XCOM_RETURN_KEY) if input_paths is None: log.info('Nothing to process') return None output_paths = [] for srcfile in input_paths: log.info('srcfile: %s', srcfile) srcfilename = os.path.basename(srcfile) dstfile = os.path.join(dstdir, srcfilename) log.info('dstfile: %s', dstfile) # build gdalwarp command self.overwrite = '-overwrite' if self.overwrite else '' gdalwarp_command = ( 'gdalwarp ' + self.overwrite + ' -t_srs ' + self.target_srs + ' -co TILED=YES -co BLOCKXSIZE=' + self.tile_size + ' -co BLOCKYSIZE=' + self.tile_size + ' ' + srcfile + ' ' + dstfile ) log.info('The complete GDAL warp command is: %s', gdalwarp_command) env = { 'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH' } bo = BashOperator( task_id="bash_operator_warp", env=env, bash_command=gdalwarp_command ) bo.execute(context) output_paths.append(dstfile) return output_paths
def execute(self, context): input_paths = context["task_instance"].xcom_pull(self.get_inputs_from, key=XCOM_RETURN_KEY) if input_paths is None: log.info("Nothing to process") return None # If message from XCom is a string with single file path, turn it into a string if isinstance(input_paths, six.string_types): input_paths = [input_paths] if not len(input_paths): log.info("Nothing to process") return None log.info(input_paths[0]) working_dir = os.path.dirname(input_paths[0]) try: os.makedirs(working_dir) except OSError as exc: if exc.errno == 17: pass # directory already exists else: raise output_paths = [] for input_path in input_paths: output_img_filename = 'translated_{}'.format( os.path.basename(input_path)) output_path = os.path.join(working_dir, output_img_filename) output_paths.append(output_path) command = get_gdal_translate_command( source=input_path, destination=output_path, output_type=self.output_type, creation_options=self.creation_options ) log.info("The complete GDAL translate command is: {}".format(command)) env = { 'LD_LIBRARY_PATH': '/usr/local/lib:$LD_LIBRARY_PATH' } b_o = BashOperator( task_id="bash_operator_translate", env=env, bash_command=command ) b_o.execute(context) log.info(output_paths) return output_paths
def execute(self, context): log.info(context) log.info("###########") log.info("## RSYNC ##") log.info('Host: %s', self.host) log.info('User: %s', self.remote_usr) log.info('Remote dir: %s', self.remote_dir) log.info('SSH Key: %s', self.ssh_key_file) # check default XCOM key in task_id 'get_inputs_from' files_str = "" files = context['task_instance'].xcom_pull( task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY) # stop processing if there are no products if files is None: log.info("Nothing to process.") self._do_skip_downstream_tasks(context) if isinstance(files, six.string_types): files_str = files else: for f in files: files_str += " " + f log.info("Retrieving input from task_id '{}'' and key '{}'".format( self.get_inputs_from, XCOM_RETURN_KEY)) bash_command = 'rsync -avHPze "ssh -i ' + self.ssh_key_file + ' -o StrictHostKeyChecking=no" ' + files_str + ' ' + self.remote_usr + '@' + self.host + ':' + self.remote_dir bo = BashOperator(task_id='bash_operator_rsync_', bash_command=bash_command) bo.execute(context) # construct list of filenames uploaded to remote host files_list = files_str.split() filenames_list = list( os.path.join(self.remote_dir, os.path.basename(path)) for path in files_list) if filenames_list and len(filenames_list) > 0: log.info("Uploaded files: {}".format(pprint.pformat(files_list))) return filenames_list self._do_skip_downstream_tasks(context)
def execute(self, context): products = list() ids = [] if self.input_product is not None: log.info("Processing single product: " + self.input_product) products.append(self.input_product) elif self.get_inputs_from is not None: log.info("Getting inputs from: " + self.get_inputs_from) inputs = context['task_instance'].xcom_pull( task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY) for input in inputs: products.append(input) else: self.downloaded_products = context['task_instance'].xcom_pull( 'dhus_download_task', key='downloaded_products') if self.downloaded_products is not None and len( self.downloaded_products) != 0: products = self.downloaded_products.keys() log.info(self.downloaded_products) for p in self.downloaded_products: ids.append(self.downloaded_products[p]["id"]) print "downloaded products keys :", self.downloaded_products.keys( )[0] if products is None or len(products) == 0: log.info("Nothing to process.") return thumbnail_paths = list() for product in products: log.info("Processing {}".format(product)) with s2reader.open(product) as safe_product: for granule in safe_product.granules: try: zipf = zipfile.ZipFile(product, 'r') imgdata = zipf.read(granule.pvi_path, 'r') img = Blob(imgdata) img = Image(img) img.scale(self.thumb_size_x + 'x' + self.thumb_size_y) img.quality(80) thumbnail_name = product.strip( ".zip") + "/thumbnail.jpg" if os.path.isdir(product.strip(".zip")): product_rmdir_cmd = "rm -r {} ".format( product.strip(".zip")) product_rmdir_BO = BashOperator( task_id="product_rmdir_{}".format( product.split("/")[-1].strip(".zip")), bash_command=product_rmdir_cmd) product_rmdir_BO.execute(context) product_mkdir_cmd = "mkdir {} ".format( product.strip(".zip")) product_mkdir_BO = BashOperator( task_id="product_mkdir_{}".format( product.split("/")[-1].strip(".zip")), bash_command=product_mkdir_cmd) product_mkdir_BO.execute(context) if self.output_dir is not None: thumbnail_name = os.path.join( self.output_dir, "thumbnail.jpeg") log.info("Writing thumbnail to {}".format( thumbnail_name)) img.write(thumbnail_name) else: img.write(str(thumbnail_name)) thumbnail_paths.append(thumbnail_name) # XCOM expects a single file so we push it here: context['task_instance'].xcom_push( key='thumbnail_jpeg_abs_path', value=str(thumbnail_name)) context['task_instance'].xcom_push(key='ids', value=ids) break except BaseException as e: log.error( "Unable to extract thumbnail from {}: {}".format( product, e)) return thumbnail_paths
def execute(self, context): if self.get_inputs_from != None: log.info("Getting inputs from: {}".format(self.get_inputs_from)) self.downloaded_products, self.archived_products = context[ 'task_instance'].xcom_pull(task_ids=self.get_inputs_from, key=XCOM_RETURN_KEY) else: log.info("Getting inputs from: dhus_download_task") self.downloaded_products = context['task_instance'].xcom_pull( 'dhus_download_task', key='downloaded_products') if self.downloaded_products is None: log.info("Nothing to process.") return for product in self.downloaded_products.keys(): log.info("Processing: {}".format(product)) with s2reader.open(product) as s2_product: coords = [] links = [] metadata = s2_product._product_metadata granule = s2_product.granules[0] granule_metadata = granule._metadata product_footprint = [ [[m.replace(" ", ",")] for m in str(s2_product.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",")] ] for item in product_footprint[0]: [x_coordinate, y_coordinate] = item[0].split(",") coords.append([float(x_coordinate), float(y_coordinate)]) final_metadata_dict = { "type": "Feature", "geometry": { "type": "Polygon", "coordinates": [coords] }, "properties": { "eop:identifier": s2_product.manifest_safe_path.rsplit('.SAFE', 1)[0], "timeStart": s2_product.product_start_time, "timeEnd": s2_product.product_stop_time, "originalPackageLocation": os.path.join( self.original_package_download_base_url, os.path.basename(self.archived_products.pop(0))), "thumbnailURL": None, "quicklookURL": None, "eop:parentIdentifier": "SENTINEL2", "eop:productionStatus": None, "eop:acquisitionType": None, "eop:orbitNumber": s2_product.sensing_orbit_number, "eop:orbitDirection": s2_product.sensing_orbit_direction, "eop:track": None, "eop:frame": None, "eop:swathIdentifier": metadata.find('.//Product_Info/Datatake'). attrib['datatakeIdentifier'], "opt:cloudCover": int( float( metadata.findtext( ".//Cloud_Coverage_Assessment"))), "opt:snowCover": None, "eop:productQualityStatus": None, "eop:productQualityDegradationStatus": None, "eop:processorName": None, "eop:processingCenter": None, "eop:creationDate": None, "eop:modificationDate": None, "eop:processingDate": None, "eop:sensorMode": None, "eop:archivingCenter": granule_metadata.findtext('.//ARCHIVING_CENTRE'), "eop:processingMode": None, "eop:availabilityTime": s2_product.generation_time, "eop:acquisitionStation": None, "eop:acquisitionSubtype": None, "eop:startTimeFromAscendingNode": None, "eop:completionTimeFromAscendingNode": None, "eop:illuminationAzimuthAngle": metadata.findtext('.//Mean_Sun_Angle/AZIMUTH_ANGLE'), "eop:illuminationZenithAngle": metadata.findtext('.//Mean_Sun_Angle/ZENITH_ANGLE'), "eop:illuminationElevationAngle": None, "eop:resolution": None } } for i in self.bands_res.values(): features_list = [] granule_counter = 1 for granule in s2_product.granules: granule_coords = [] granule_coordinates = [[ [m.replace(" ", ",")] for m in str(granule.footprint).replace(", ", ","). partition('((')[-1].rpartition('))')[0].split(",") ]] for item in granule_coordinates[0]: [granule_x_coordinate, granule_y_coordinate] = item[0].split(",") granule_coords.append([ float(granule_x_coordinate), float(granule_y_coordinate) ]) zipped_product = zipfile.ZipFile(product) for file_name in zipped_product.namelist(): if file_name.endswith( '.jp2' ) and not file_name.endswith('PVI.jp2'): features_list.append({"type": "Feature", "geometry": { "type": "Polygon", "coordinates": [granule_coords]},\ "properties": {\ "location":os.path.join(self.remote_dir, granule.granule_path.rsplit("/")[-1], "IMG_DATA", file_name.rsplit("/")[-1]), "band": self.bands_dict[file_name.rsplit("/")[-1].rsplit(".")[0][-3:]]},\ "id": "GRANULE.{}".format(granule_counter)}) granule_counter += 1 final_granules_dict = { "type": "FeatureCollection", "features": features_list } timeStart, timeEnd = final_metadata_dict["properties"][ "timeStart"], final_metadata_dict["properties"]["timeEnd"] # create description.html and dump it to file log.info("Creating description.html") tr = TemplatesResolver() htmlAbstract = tr.generate_product_abstract({ "timeStart": timeStart, "timeEnd": timeEnd, "originalPackageLocation": final_metadata_dict["properties"]["originalPackageLocation"] }) log.debug(pprint.pformat(htmlAbstract)) final_metadata_dict['htmlDescription'] = htmlAbstract with open(product.strip(".zip") + '/description.html', 'w') as product_outfile: product_outfile.write(htmlAbstract) # Note here that the SRID is a property of the granule not the product final_metadata_dict["properties"]["crs"] = granule.srid with open(product.strip(".zip") + '/product.json', 'w') as product_outfile: json.dump(final_metadata_dict, product_outfile, indent=4) with open(product.strip(".zip") + '/granules.json', 'w') as granules_outfile: json.dump(final_granules_dict, granules_outfile, indent=4) product_identifier = s2_product.manifest_safe_path.rsplit( '.SAFE', 1)[0] bbox = get_bbox_from_granules_coordinates(granule_coordinates) ows_links_dict = create_owslinks_dict( product_identifier=product_identifier, timestart=timeStart, timeend=timeEnd, granule_bbox=bbox, gs_workspace=self.gs_workspace, gs_wms_layer=self.gs_wms_layer, gs_wms_width=self.gs_wms_width, gs_wms_height=self.gs_wms_height, gs_wms_format=self.gs_wms_format, gs_wms_version=self.gs_wms_version, gs_wfs_featuretype=self.gs_wfs_featuretype, gs_wfs_format=self.gs_wfs_format, gs_wfs_version=self.gs_wfs_version, gs_wcs_coverage_id=self.gs_wcs_coverage_id, gs_wcs_scale_i=self.gs_wcs_scale_i, gs_wcs_scale_j=self.gs_wcs_scale_j, gs_wcs_format=self.gs_wcs_format, gs_wcs_version=self.gs_wcs_version, ) log.info("ows links: {}".format(pprint.pformat(ows_links_dict))) with open(product.strip(".zip") + '/owsLinks.json', 'w') as owslinks_outfile: json.dump(ows_links_dict, owslinks_outfile, indent=4) self.custom_archived = [] for archive_line in self.downloaded_products.keys(): jp2_files_paths = [] archive_path = archive_line archived_product = zipfile.ZipFile(archive_line, 'r') for file_name in archived_product.namelist(): if file_name.endswith( '.jp2') and not file_name.endswith('PVI.jp2'): archived_product.extract(file_name, archive_path.strip(".zip")) jp2_files_paths.append( os.path.join(archive_path.strip(".zip"), file_name)) parent_dir = os.path.dirname(jp2_files_paths[0]) if file_name.endswith('MTD_TL.xml'): archived_product.extract(file_name, archive_path.strip(".zip")) mtd_tl_xml = os.path.join(archive_path.strip(".zip"), file_name) tree = ET.parse(mtd_tl_xml) root = tree.getroot() geometric_info = root.find( root.tag.split('}', 1)[0] + "}Geometric_Info") tile_geocoding = geometric_info.find("Tile_Geocoding") wld_files = [] prj_files = [] for jp2_file in jp2_files_paths: wld_name = os.path.splitext(jp2_file)[0] gdalinfo_cmd = "gdalinfo {} > {}".format( jp2_file, wld_name + ".prj") gdalinfo_BO = BashOperator( task_id="bash_operator_gdalinfo_{}".format(wld_name[-3:]), bash_command=gdalinfo_cmd) gdalinfo_BO.execute(context) sed_cmd = "sed -i -e '1,4d;29,$d' {}".format(wld_name + ".prj") sed_BO = BashOperator(task_id="bash_operator_sed_{}".format( wld_name[-3:]), bash_command=sed_cmd) sed_BO.execute(context) prj_files.append(wld_name + ".prj") wld_file = open(wld_name + ".wld", "w") wld_files.append(wld_name + ".wld") for key, value in self.bands_res.items(): if wld_name[-3:] in value: element = key geo_position = tile_geocoding.find( './/Geoposition[@resolution="{}"]'.format(element)) wld_file.write( geo_position.find("XDIM").text + "\n" + "0" + "\n" + "0" + "\n") wld_file.write(geo_position.find("YDIM").text + "\n") wld_file.write(geo_position.find("ULX").text + "\n") wld_file.write(geo_position.find("ULY").text + "\n") parent_dir = os.path.dirname(jp2_files_paths[0]) self.custom_archived.append(os.path.dirname(parent_dir)) log.info(os.path.dirname(parent_dir)) log.info(self.custom_archived) context['task_instance'].xcom_push(key='downloaded_products', value=self.downloaded_products) context['task_instance'].xcom_push( key='downloaded_products_with_wldprj', value=' '.join(self.custom_archived)) return self.custom_archived