def convert_nodata_value(input_file, old_nodata, new_nodata): new_file_tmp = input_file + '.tmp' # Update the value raster_image_math.do_mask_image(input_file=input_file, mask_file=input_file, output_file=new_file_tmp, output_format=None, output_type=None, options='', mask_value=old_nodata, out_value=new_nodata) # Copy the metadata and update nodata field sds_meta = metadata.SdsMetadata() # Check if the input file is single, or a list sds_meta.read_from_file(input_file) sds_meta.assign_nodata(new_nodata) sds_meta.write_to_file(new_file_tmp) # Rename files shutil.move(input_file, input_file + '.old') shutil.move(new_file_tmp, input_file) return 0
def get_old_file_list(output_filename, my_logger): try: # Check if the output file already exists sds_meta_old = metadata.SdsMetadata() sds_meta_old.read_from_file(output_filename) old_file_list = sds_meta_old.get_item('eStation2_input_files') sds_meta_old = None return old_file_list except: my_logger.error('Error in getting old file metadata ')
def move_back_files(self, tmp_dir): # Create Metadata class to get target fullpath meta = metadata.SdsMetadata() # Get list of files in tmp dir files = glob.glob(tmp_dir + '*.tif') for my_file in files: fullpath_dest = meta.get_target_filepath(my_file) try: os.rename(my_file, fullpath_dest) except: logger.error('Error in moving file %s' % fullpath_dest)
def convert_scl_factor_value(input_file, old_scl_factor, new_scl_factor): new_file_tmp = input_file + '.tmp' # Update the value shutil.copy(input_file, new_file_tmp) # Copy the metadata and update scl_factor field sds_meta = metadata.SdsMetadata() # Check if the input file is single, or a list sds_meta.read_from_file(input_file) sds_meta.assign_scl_factor(new_scl_factor) sds_meta.write_to_file(new_file_tmp) # Rename files shutil.move(new_file_tmp, input_file) return 0
def rotate_mpe(input_file, output_dir): basename = os.path.basename(input_file) output_file = output_dir+basename my_logger.info('Working on file {0}'.format(basename)) out_data_type_gdal = gdal.GDT_Int16 # Instance metadata object and read from source sds_meta = metadata.SdsMetadata() sds_meta.read_from_file(input_file) # Load data from source orig_ds = gdal.Open(input_file, gdal.GA_ReadOnly) orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) orig_geo_transform = orig_ds.GetGeoTransform() orig_size_x = orig_ds.RasterXSize orig_size_y = orig_ds.RasterYSize band = orig_ds.GetRasterBand(1) orig_data = band.ReadAsArray(0, 0, orig_size_x, orig_size_y) # Prepare output driver out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT) # Play with data rev_data = N.flipud(orig_data) orig_data = N.fliplr(rev_data) # No reprojection, only format-conversion trg_ds = out_driver.Create(output_file, orig_size_x, orig_size_y, 1, out_data_type_gdal, [es_constants.ES2_OUTFILE_OPTIONS]) trg_ds.SetProjection(orig_ds.GetProjectionRef()) trg_ds.SetGeoTransform(orig_geo_transform) trg_ds.GetRasterBand(1).WriteArray(orig_data) sds_meta.write_to_ds(trg_ds) orig_ds = None trg_ds = None # Rename the original file os.rename(input_file,input_file+'.wrong')
def assign_metadata_generic(product, subproduct, mapset_id, out_date_str_final, output_directory, final_list_files, file_write_metadata, my_logger): try: sds_meta = metadata.SdsMetadata() sds_meta.assign_es2_version() sds_meta.assign_mapset(mapset_id) sds_meta.assign_from_product(product['productcode'], subproduct, product['version']) sds_meta.assign_date(out_date_str_final) sds_meta.assign_subdir_from_fullpath(output_directory) sds_meta.assign_compute_time_now() sds_meta.assign_input_files(final_list_files) sds_meta.write_to_file(file_write_metadata) except: my_logger.warning('Error in assigning metadata .. Continue')
def import_tar(filetar, tgz=False): result = { 'status': 0, # 0 -> ok, 1-> no tmpdir created 'n_file_copied': 0, 'n_file_error': 0, } # Create tmp dir try: tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(filetar), dir=es_constants.base_tmp_dir) except IOError: logger.error('Cannot create temporary dir ' + es_constants.base_tmp_dir + '. Exit') result['status'] = 1 # Extract from tar if os.path.isfile(filetar): # Untar the file to a temp dir tar = tarfile.open(filetar, "r|gz" if tgz else "r|") names = tar.getnames() # Extract with subdirs tar.extractall(path=tmpdir) # Move files to basedir for name in names: os.rename(tmpdir + name, tmpdir + os.path.basename(name)) else: result['status'] = 1 # Copy from tmpdir to target directory meta = metadata.SdsMetadata() # Get list of files in tmp dir extracted_files = glob.glob(tmpdir + '*.tif') for my_file in extracted_files: fullpath_dest = meta.get_target_filepath(my_file) try: shutil.copyfile(my_file, fullpath_dest) except: logger.error('Error in copying file %s' % fullpath_dest) # Clean and exit shutil.rmtree(tmpdir) return result
def test_do_mask_image(self): # linearx2diff-linearx2 output_filename = 'vgt-ndvi/linearx2diff-linearx2/20200301_vgt-ndvi_linearx2diff-linearx2_SPOTV-Africa-1km_sv2-pv2.2.tif' output_file = os.path.join(self.root_out_dir, output_filename) ref_file = os.path.join(self.ref_dir, output_filename) functions.check_output_dir(os.path.dirname(output_file)) output_file = functions.list_to_element(output_file) tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(output_file), dir=es_constants.base_tmp_dir) # Temporary (not masked) file output_file_temp = tmpdir + os.path.sep + os.path.basename(output_file) current_file = self.ref_dir + 'vgt-ndvi/ndvi-linearx2/20200301_vgt-ndvi_ndvi-linearx2_SPOTV-Africa-1km_sv2-pv2.2.tif' average_file = self.ref_dir + 'vgt-ndvi/10davg-linearx2/0301_vgt-ndvi_10davg-linearx2_SPOTV-Africa-1km_sv2-pv2.2.tif' baresoil_file = self.ref_dir + 'vgt-ndvi/baresoil-linearx2/20200301_vgt-ndvi_baresoil-linearx2_SPOTV-Africa-1km_sv2-pv2.2.tif' # Compute temporary file args = { "input_file": [current_file, average_file], "output_file": output_file_temp, "output_format": 'GTIFF', "options": "compress = lzw" } raster_image_math.do_oper_subtraction(**args) sds_meta = md.SdsMetadata() # Mask with baresoil file no_data = int(sds_meta.get_nodata_value(current_file)) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": output_file_temp, "mask_file": baresoil_file, "output_file": output_file, "options": "compress = lzw", "mask_value": no_data, "out_value": no_data } raster_image_math.do_mask_image(**args) # args = {"input_file": self.input_files, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw",'output_type':'Float32', 'input_nodata':-32767} # raster_image_math.do_mask_image(**args) equal = self.checkFile(ref_file, output_file) self.assertEqual(equal, 1)
def rewrite_metadata(input_file, productcode, subproductcode, version, mapsetcode, output_directory): # Copy the metadata and update nodata field sds_meta = metadata.SdsMetadata() sds_meta.assign_es2_version() if mapsetcode is not None: sds_meta.assign_mapset(mapsetcode) sds_meta.assign_from_product(productcode, subproductcode, version) if output_directory is not None: sds_meta.assign_subdir_from_fullpath(output_directory) sds_meta.assign_comput_time_now() sds_meta.assign_input_files(input_file) sds_meta.assign_date(sds_meta.get_item('eStation2_input_files')[0:8]) sds_meta.write_to_file(input_file) return 0
def rewrite_metadata_single_paramater(input_file, parameter_key, parameter_value): # new_file_tmp = input_file+'.old' # shutil.copy(input_file, new_file_tmp) # Update the value # raster_image_math.do_mask_image(input_file=input_file, mask_file=input_file, output_file=new_file_tmp,output_format=None, # output_type=None, options='', mask_value=old_nodata, out_value=new_nodata) # Copy the metadata and update nodata field sds_meta = metadata.SdsMetadata() # Check if the input file is single, or a list sds_meta.read_from_file(input_file) sds_meta.assign_single_paramater(parameter_key, parameter_value) sds_meta.write_to_file(input_file) # Rename files # shutil.move(input_file,input_file+'.old') # shutil.move(new_file_tmp,input_file) return 0
def reproject_jrc_wbd(input_file): new_mapset = 'WD-GEE-ECOWAS-AVG' out_filepath = input_file.replace('WD-GEE-ECOWAS-1', 'WD-GEE-ECOWAS-AVG') # Check the file is not yet there if not os.path.isfile(out_filepath): # Reproject command = 'gdal_translate -of GTIFF -co "compress=LZW" -projwin -17.5290058 27.3132762 24.0006488 4.2682552 ' + input_file + ' ' + out_filepath os.system(command) # Update metadata (mapset) sds_meta = metadata.SdsMetadata() # Check if the input file is single, or a list sds_meta.read_from_file(out_filepath) sds_meta.assign_mapset(new_mapset) sds_meta.write_to_file(out_filepath) else: print('Output file already exists: %s' % os.path.basename(out_filepath))
def ingest_file_archive(input_file, target_mapsetid, echo_query=False, no_delete=False): # ------------------------------------------------------------------------------------------------------- # Ingest a file of type MESA_JRC_ # Arguments: # input_file: input file full name # target_mapset: target mapset # no_delete: do not delete input file (for external medium) # # Since 30/10/17: manages .zipped files, ending with extension .gz.tif (see ES2-96) # logger.info("Entering routine %s for file %s" % ('ingest_file_archive', input_file)) extension = '.tif' # Test the file/files exists if not os.path.isfile(input_file): logger.error('Input file: %s does not exist' % input_file) return 1 # Create temp output dir for unzipping (since release 2.1.1 - for wd-gee products) if re.match( es_constants.es2globals['prefix_eumetcast_files'] + '.*.gz.tif', os.path.basename(input_file)): try: tmpdir = tempfile.mkdtemp(prefix=__name__, suffix='_' + os.path.basename(input_file), dir=es_constants.base_tmp_dir) except: logger.error('Cannot create temporary dir ' + es_constants.base_tmp_dir + '. Exit') raise NameError('Error in creating tmpdir') # unzip the file output_filename = os.path.basename(input_file).replace('gz.tif', 'tif') command = 'gunzip -c ' + input_file + ' > ' + tmpdir + os.path.sep + output_filename try: os.system(command) except: logger.error('Cannot gunzip file ' + os.path.basename(input_file) + '. Exit') raise NameError('Error in unzipping file') my_input_file = tmpdir + os.path.sep + output_filename b_unzipped = True extension = '.gz.tif' else: my_input_file = input_file b_unzipped = False extension = '.tif' # Instance metadata object (for output_file) sds_meta_out = metadata.SdsMetadata() # Read metadata from input_file sds_meta_in = metadata.SdsMetadata() sds_meta_in.read_from_file(my_input_file) # Extract info from input file if re.match(es_constants.es2globals['prefix_eumetcast_files'] + '.*.tif', os.path.basename(input_file)): [str_date, product_code, sub_product_code, mapsetid, version] = functions.get_all_from_filename_eumetcast(my_input_file) else: [str_date, product_code, sub_product_code, mapsetid, version] = functions.get_all_from_filename(my_input_file) # Define output filename sub_dir = sds_meta_in.get_item('eStation2_subdir') product_type = functions.get_product_type_from_subdir(sub_dir) if re.match(es_constants.es2globals['prefix_eumetcast_files'] + '.*.tif', os.path.basename(input_file)): output_file = es_constants.es2globals['processing_dir']+ \ functions.convert_name_from_eumetcast(my_input_file, product_type, with_dir=True, new_mapset=target_mapsetid) else: output_file = es_constants.es2globals['processing_dir'] + \ functions.convert_name_from_archive(my_input_file, product_type, with_dir=True, new_mapset=target_mapsetid) # make sure output dir exists output_dir = os.path.split(output_file)[0] functions.check_output_dir(output_dir) # Compare input-target mapset if target_mapsetid == mapsetid: # Check if the target file exist ... and delete it in case if os.path.isfile(output_file): os.remove(output_file) # Copy file to output shutil.copyfile(my_input_file, output_file) # Open output dataset for writing metadata #trg_ds = gdal.Open(output_file) else: # ------------------------------------------------------------------------- # Manage the geo-referencing associated to input file # ------------------------------------------------------------------------- orig_ds = gdal.Open(my_input_file, gdal.GA_ReadOnly) # Read the data type band = orig_ds.GetRasterBand(1) out_data_type_gdal = band.DataType try: # Read geo-reference from input file orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) orig_geo_transform = orig_ds.GetGeoTransform() orig_size_x = orig_ds.RasterXSize orig_size_y = orig_ds.RasterYSize except: logger.error('Cannot read geo-reference from file .. Continue') # TODO-M.C.: add a test on the mapset-id in DB table ! trg_mapset = mapset.MapSet() trg_mapset.assigndb(target_mapsetid) logger.debug('Target Mapset is: %s' % target_mapsetid) # ------------------------------------------------------------------------- # Generate the output file # ------------------------------------------------------------------------- # Prepare output driver out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT) logger.debug('Doing re-projection to target mapset: %s' % trg_mapset.short_name) # Get target SRS from mapset out_cs = trg_mapset.spatial_ref out_size_x = trg_mapset.size_x out_size_y = trg_mapset.size_y # Create target in memory mem_driver = gdal.GetDriverByName('MEM') # Assign mapset to dataset in memory mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1, out_data_type_gdal) mem_ds.SetGeoTransform(trg_mapset.geo_transform) mem_ds.SetProjection(out_cs.ExportToWkt()) # Apply Reproject-Image to the memory-driver orig_wkt = orig_cs.ExportToWkt() res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(), es_constants.ES2_OUTFILE_INTERP_METHOD) logger.debug('Re-projection to target done.') # Read from the dataset in memory out_data = mem_ds.ReadAsArray() # Write to output_file trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0, [es_constants.ES2_OUTFILE_OPTIONS]) trg_ds.GetRasterBand(1).WriteArray(out_data) # ------------------------------------------------------------------------- # Assign Metadata to the ingested file # ------------------------------------------------------------------------- # Close dataset trg_ds = None sds_meta_out.assign_es2_version() sds_meta_out.assign_mapset(target_mapsetid) sds_meta_out.assign_from_product(product_code, sub_product_code, version) sds_meta_out.assign_date(str_date) sds_meta_out.assign_subdir_from_fullpath(output_dir) sds_meta_out.assign_comput_time_now() sds_meta_out.assign_input_files(my_input_file) # Write metadata to file sds_meta_out.write_to_file(output_file) # ------------------------------------------------------------------------- # Create a file for deleting from ingest (at the end) # ------------------------------------------------------------------------- working_dir = es_constants.es2globals[ 'base_tmp_dir'] + os.path.sep + 'ingested_files' functions.check_output_dir(working_dir) if no_delete == False: trace_file = working_dir + os.path.sep + os.path.basename( input_file) + '.tbd' logger.debug('Trace for deleting ingested file %s' % trace_file) with open(trace_file, 'a'): os.utime(trace_file, None) else: logger.debug('Do not delete ingest file.') # Remove temp dir if b_unzipped: shutil.rmtree(tmpdir)
def convert_driver(output_dir=None): # Definitions input_dir = es_constants.es2globals['processing_dir'] # Instance metadata object sds_meta = metadata.SdsMetadata() # Check base output dir if output_dir is None: output_dir = es_constants.es2globals['spirits_output_dir'] functions.check_output_dir(output_dir) # Read the spirits table and convert all existing files spirits_list = querydb.get_spirits() for entry in spirits_list: use_range = False product_code = entry['productcode'] sub_product_code = entry['subproductcode'] version = entry['version'] out_data_type = entry['out_data_type'] out_scale_factor = entry['out_scale_factor'] out_offset = entry['out_offset'] out_nodata = entry['data_ignore_value'] # Prepare the naming dict naming_spirits = { 'sensor_filename_prefix':entry['sensor_filename_prefix'], \ 'frequency_filename_prefix':entry['frequency_filename_prefix'], \ 'pa_filename_prefix':entry['product_anomaly_filename_prefix']} metadata_spirits= {'values': entry['prod_values'], 'flags': entry['flags'], \ 'data_ignore_value':entry['data_ignore_value'], \ 'days': entry['days'], \ 'sensor_type':entry['sensor_type'], \ 'comment':entry['comment'], \ 'date':''} # Manage mapsets: if defined use it, else read the existing ones from filesystem my_mapsets = [] if entry['mapsetcode']: my_mapsets.append(entry['mapsetcode']) else: prod = Product(product_code, version=version) for mp in prod.mapsets: my_mapsets.append(mp) # Manage dates if entry['start_date']: from_date = datetime.datetime.strptime(str(entry['start_date']), '%Y%m%d').date() use_range = True else: from_date = None if entry['end_date']: to_date = datetime.datetime.strptime(str(entry['end_date']), '%Y%m%d').date() use_range = True else: to_date = None for my_mapset in my_mapsets: # Manage output dirs out_sub_dir = my_mapset+os.path.sep+\ product_code+os.path.sep+\ entry['product_anomaly_filename_prefix']+\ entry['frequency_filename_prefix']+\ str(entry['days'])+os.path.sep logger.info('Working on [%s]/[%s]/[%s]/[%s]' % (product_code, version, my_mapset, sub_product_code)) ds = Dataset(product_code, sub_product_code, my_mapset, version=version, from_date=from_date, to_date=to_date) product_info = ds._db_product in_scale_factor = product_info.scale_factor in_offset = product_info.scale_offset in_nodata = product_info.nodata mask_min = product_info.mask_min mask_max = product_info.mask_max productcode = product_info.productcode subproductcode = product_info.subproductcode if productcode == 'vgt-ndvi' and subproductcode == 'ndv': mask_min = 0 if use_range: available_files = ds.get_filenames_range() else: available_files = ds.get_filenames() # Convert input products if len(available_files) > 0: for input_file in available_files: # Check it is a .tif file (not .missing) path, ext = os.path.splitext(input_file) if ext == '.tif': functions.check_output_dir(output_dir + out_sub_dir) str_date = functions.get_date_from_path_filename( os.path.basename(input_file)) # Check input file exists if os.path.isfile(input_file): if len(naming_spirits['frequency_filename_prefix'] ) > 1: my_str_date = naming_spirits[ 'frequency_filename_prefix'][1:5] + str_date metadata_spirits['date'] = my_str_date else: metadata_spirits['date'] = str_date #Read metadata from the file and differentiate chirps prelim and final data sds_meta.read_from_file(input_file) input_file_name = sds_meta.get_item( 'eStation2_input_files') if productcode == 'chirps-dekad' and input_file_name.endswith( ".tif;"): metadata_spirits[ 'comment'] = 'Prelim ' + entry['comment'] elif productcode == 'chirps-dekad' and input_file_name.endswith( ".gz;"): metadata_spirits[ 'comment'] = 'Final ' + entry['comment'] # Check output file exists #convert_geotiff_file(input_file, output_dir+out_sub_dir, str_date, naming_spirits, metadata_spirits) convert_geotiff_datatype_rescaled( input_file, output_dir + out_sub_dir, str_date, naming_spirits, metadata_spirits, in_scale_factor, in_offset, in_nodata, out_scale_factor, out_offset, out_nodata, out_data_type, mask_min, mask_max) else: logger.debug('Input file does not exist: %s' % input_file)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, nrt_products=True, logger=None): # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() # Set DEFAULTS: all ON activate_3davg_comput = 1 activate_1monavg_comput = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (chl) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) starting_files = input_dir + "*" + in_prod_ident # ---------------------------------------------------------------------------------------------------------------- # 1 . 3davg # 3 Day average of the 1 day Chl, re-projected on target mapset output_sprod = proc_lists.proc_add_subprod("3dayavg", prod, final=False, descriptive_name='3day Avg', description='3 day Average', frequency_id='e1day', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_3davg = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_3davg = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) # Use a specific function, to skip the current day def generate_parameters_3davg(): # Look for all input files in input_dir, and sort them if starting_dates is not None: input_files = [] for my_date in starting_dates: input_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident input_files = glob.glob(starting_files) logger.debug("starting_files %s" % input_files) day_list = [] # Create unique list of all dekads (as 'Julian' number) for input_file in input_files: basename = os.path.basename(input_file) mydate = functions.get_date_from_path_filename(basename) mydate_yyyymmdd = str(mydate)[0:8] if mydate_yyyymmdd not in day_list: day_list.append(mydate_yyyymmdd) day_list = sorted(day_list) # Compute the 'julian' dakad for the current day today = datetime.today() yesterday = today - timedelta(1) today_str = today.strftime('%Y%m%d') yesterday_str = yesterday.strftime('%Y%m%d') dekad_now = functions.conv_date_2_dekad(today_str) for myday in day_list: # Exclude the current day and yesterday #if myday != today_str or myday != yesterday_str: #some_list = ['abc-123', 'def-456', 'ghi-789', 'abc-456'] input_file = [s for s in input_files if myday in s] file_list = [] #for input_file in input_files: #for i, input_file in enumerate(input_files, 1): basename = os.path.basename(input_file[0]) # Date is in format YYYYMMDD mydate_yyyymmdd = functions.get_date_from_path_filename(basename) #if mydate_yyyymmdd != day_list[i]: yyyy = int(mydate_yyyymmdd[0:4]) mm = int(mydate_yyyymmdd[4:6]) dd = int(mydate_yyyymmdd[6:8]) day2 = datetime(yyyy, mm, dd) + timedelta(1) day2_filepath = input_dir + day2.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day2_filepath): continue day3 = datetime(yyyy, mm, dd) + timedelta(2) day3_filepath = input_dir + day3.strftime('%Y%m%d') + in_prod_ident if not functions.is_file_exists_in_path(day3_filepath): continue file_list.append(input_file[0]) file_list.append(day2_filepath) file_list.append(day3_filepath) output_file = es_constants.processing_dir + subdir_3davg + os.path.sep + mydate_yyyymmdd + prod_ident_3davg file_list = sorted(file_list) # Check here the number of missing files (for optimization) if len(file_list) == 3: yield (file_list, output_file) @active_if(activate_3davg_comput) @files(generate_parameters_3davg) def compute_3dayavg(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file[0])) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "", "input_nodata": no_data, "output_nodata": no_data } raster_image_math.do_avg_image(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date is not None: starting_files = input_dir + my_date + "*" + in_prod_ident else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # Monthly Average for a given month output_sprod_group = proc_lists.proc_add_subprod_group("monstats") output_sprod = proc_lists.proc_add_subprod("monavg", "monstats", final=False, descriptive_name='Monthly average', description='Monthly average', frequency_id='', date_format='YYYMMMMDD', masked=False, timeseries_role='', active_default=True) out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext) output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident @collate(starting_files, formatter(formatter_in), formatter_out) def compute_monavg(input_file, output_file): output_file = functions.list_to_element(output_file) out_filename = os.path.basename(output_file) functions.check_output_dir(os.path.dirname(output_file)) no_data = int(sds_meta.get_nodata_value(input_file[0])) str_date = out_filename[0:6] today = datetime.date.today() today_yyyymm = today.strftime('%Y%m') # expected_ndays=functions.get_number_days_month(str_date) # current_ndays=len(input_file) if str_date == today_yyyymm: logger.info('Do not perform computation for current month {0}. Skip'.format(str_date)) else: args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "input_nodata": no_data, "options": "compress=lzw"} raster_image_math.do_avg_image(**args) return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date = None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_opfish_computation = 1 #activate_shapefile_conversion = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep # --------------------------------------------------------------------- # Define input files (chla) in_prod_ident = functions.set_path_filename_no_date( prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir + functions.set_path_sub_directory( prod, starting_sprod, 'Ingest', version, mapset) if starting_dates is not None: starting_files = [] for my_date in starting_dates: if functions.is_file_exists_in_path( input_dir + my_date + in_prod_ident ): # ES2 450 #+++++++ Check file exists before appending +++++++++++++++ starting_files.append(input_dir + my_date + in_prod_ident) else: starting_files = input_dir + "*" + in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- parameters = { 'chl_grad_min': 0.00032131, # smaller window detects more fronts 'chl_grad_int': 0.021107, 'chl_feed_min': 0.08, 'chl_feed_max': 11.0, # Temperature: 0.45 deg (multiply by 100 !!) 'dc': 0.91 } if prod == 'modis-chla': parameters = { 'chl_grad_min': 0.00032131, # smaller window detects more fronts 'chl_grad_int': 0.021107, 'chl_feed_min': 0.08, 'chl_feed_max': 11.0, # Temperature: 0.45 deg (multiply by 100 !!) 'dc': 0.91 } # --------------------------------------------------------------------- # Chal Gradient (raster) output_sprod_group = proc_lists.proc_add_subprod_group("gradient") output_sprod = proc_lists.proc_add_subprod( "opfish", "gradient", final=False, descriptive_name='Ocean Productive index for Fish', description='Ocean Productive index for Fish', frequency_id='', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_gradient = functions.set_path_filename_no_date( prod, output_sprod, mapset, version, ext) subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})" + in_prod_ident formatter_out = [ "{subpath[0][5]}" + os.path.sep + subdir_gradient + "{YYYYMMDD[0]}" + prod_ident_gradient ] @active_if(activate_opfish_computation) @transform(starting_files, formatter(formatter_in), formatter_out) def opfish_computation(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file)) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = { "input_file": input_file, "output_file": output_file, "nodata": no_data, "output_format": 'GTIFF', "options": "compress = lzw", "parameters": parameters } raster_image_math.compute_opFish_indicator(**args) print('Done with raster') return proc_lists
def reproject_output(input_file, native_mapset_id, target_mapset_id, output_dir=None, version=None, logger=None): # Check logger if logger is None: logger = log.my_logger(__name__) # Check output dir if output_dir is None: output_dir = es_constants.es2globals['processing_dir'] # Get the existing dates for the dataset logger.debug("Entering routine %s for file %s" % ('reproject_output', input_file)) ext = es_constants.ES2_OUTFILE_EXTENSION # Test the file/files exists if not os.path.isfile(input_file): logger.error('Input file: %s does not exist' % input_file) return 1 # Instance metadata object (for output_file) sds_meta_out = metadata.SdsMetadata() # Read metadata from input_file sds_meta_in = metadata.SdsMetadata() sds_meta_in.read_from_file(input_file) # Extract info from input file str_date = sds_meta_in.get_item('eStation2_date') product_code = sds_meta_in.get_item('eStation2_product') sub_product_code = sds_meta_in.get_item('eStation2_subProduct') # 22.06.2017 Add the option to force the version if version is None: version = sds_meta_in.get_item('eStation2_product_version') # Define output filename sub_dir = sds_meta_in.get_item('eStation2_subdir') # Fix a bug for 10davg-linearx2 metadata - and make method more robust if re.search('.*derived.*', sub_dir): product_type = 'Derived' elif re.search('.*tif.*', sub_dir): product_type = 'Ingest' # product_type = functions.get_product_type_from_subdir(sub_dir) out_prod_ident = functions.set_path_filename_no_date( product_code, sub_product_code, target_mapset_id, version, ext) output_subdir = functions.set_path_sub_directory(product_code, sub_product_code, product_type, version, target_mapset_id) output_file = output_dir+\ output_subdir +\ str_date +\ out_prod_ident # make sure output dir exists output_dir = os.path.split(output_file)[0] functions.check_output_dir(output_dir) # ------------------------------------------------------------------------- # Manage the geo-referencing associated to input file # ------------------------------------------------------------------------- orig_ds = gdal.Open(input_file, gdal.GA_Update) # Read the data type band = orig_ds.GetRasterBand(1) out_data_type_gdal = band.DataType if native_mapset_id != 'default': native_mapset = MapSet() native_mapset.assigndb(native_mapset_id) orig_cs = osr.SpatialReference( wkt=native_mapset.spatial_ref.ExportToWkt()) # Complement orig_ds info (necessary to Re-project) try: #orig_ds.SetGeoTransform(native_mapset.geo_transform) orig_ds.SetProjection(orig_cs.ExportToWkt()) except: logger.debug('Cannot set the geo-projection .. Continue') else: try: # Read geo-reference from input file orig_cs = osr.SpatialReference() orig_cs.ImportFromWkt(orig_ds.GetProjectionRef()) except: logger.debug('Cannot read geo-reference from file .. Continue') # TODO-M.C.: add a test on the mapset-id in DB table ! trg_mapset = MapSet() trg_mapset.assigndb(target_mapset_id) logger.debug('Target Mapset is: %s' % target_mapset_id) # ------------------------------------------------------------------------- # Generate the output file # ------------------------------------------------------------------------- # Prepare output driver out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT) logger.debug('Doing re-projection to target mapset: %s' % trg_mapset.short_name) # Get target SRS from mapset out_cs = trg_mapset.spatial_ref out_size_x = trg_mapset.size_x out_size_y = trg_mapset.size_y # Create target in memory mem_driver = gdal.GetDriverByName('MEM') # Assign mapset to dataset in memory mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1, out_data_type_gdal) mem_ds.SetGeoTransform(trg_mapset.geo_transform) mem_ds.SetProjection(out_cs.ExportToWkt()) # Apply Reproject-Image to the memory-driver orig_wkt = orig_cs.ExportToWkt() res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(), es_constants.ES2_OUTFILE_INTERP_METHOD) logger.debug('Re-projection to target done.') # Read from the dataset in memory out_data = mem_ds.ReadAsArray() # Write to output_file trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0, [es_constants.ES2_OUTFILE_OPTIONS]) trg_ds.GetRasterBand(1).WriteArray(out_data) # ------------------------------------------------------------------------- # Assign Metadata to the ingested file # ------------------------------------------------------------------------- # Close dataset trg_ds = None sds_meta_out.assign_es2_version() sds_meta_out.assign_mapset(target_mapset_id) sds_meta_out.assign_from_product(product_code, sub_product_code, version) sds_meta_out.assign_date(str_date) sds_meta_out.assign_subdir_from_fullpath(output_dir) sds_meta_out.assign_comput_time_now() # Copy the same input files as in the non-reprojected input file_list = sds_meta_in.get_item('eStation2_input_files') sds_meta_out.assign_input_files(file_list) # Write metadata to file sds_meta_out.write_to_file(output_file) # Return the filename return output_file
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None): my_date=None # --------------------------------------------------------------------- # Create lists if proc_lists is None: proc_lists = functions.ProcLists() activate_gradient_computation = 1 #activate_shapefile_conversion = 1 sds_meta = metadata.SdsMetadata() es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep # --------------------------------------------------------------------- # Define input files (chla) in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext) input_dir = es2_data_dir+ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset) if my_date: starting_files = input_dir+my_date+"*"+in_prod_ident else: starting_files = input_dir+"*"+in_prod_ident # --------------------------------------------------------------------- # 1. Define and customize parameters # --------------------------------------------------------------------- # # # Default values are from the routine are used if None is passed # parameters = {'histogramWindowStride': 16, # 'histogramWindowSize': 32, # 'minTheta': 0.76, # 'minPopProp': 0.25, # 'minPopMeanDifference': 20, # Temperature: 0.45 deg (multiply by 100 !!) # 'minSinglePopCohesion': 0.60, # 'minImageValue': 1, # 'minThreshold': 1} # if prod == 'modis-sst': # parameters = { 'histogramWindowStride': None, # 'minTheta' : None, # 'minPopProp' : None, # 'minPopMeanDifference' : None, # 'minSinglePopCohesion' : None, # 'histogramWindowSize' : None, # 'minImageValue' : None, # 'minThreshold' : None } # # if prod == 'pml-modis-sst': # parameters = { 'histogramWindowSize' : 32, # 'histogramWindowStride': 16, # 'minTheta' : 0.76, # 'minPopProp' : 0.25, # 'minPopMeanDifference' : 20, # 'minSinglePopCohesion' : 0.60, # 'minImageValue' : 1, # 'minThreshold' : 1 } # --------------------------------------------------------------------- # Chal Gradient (raster) output_sprod_group=proc_lists.proc_add_subprod_group("gradient") output_sprod=proc_lists.proc_add_subprod("gradient", "gradient", final=False, descriptive_name='Gradient', description='Gradient', frequency_id='', date_format='YYYYMMDD', masked=False, timeseries_role='', active_default=True) prod_ident_gradient = functions.set_path_filename_no_date(prod, output_sprod,mapset, version, ext) subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset) formatter_in = "(?P<YYYYMMDD>[0-9]{8})"+in_prod_ident formatter_out = ["{subpath[0][5]}"+os.path.sep+subdir_gradient+"{YYYYMMDD[0]}"+prod_ident_gradient] @active_if(activate_gradient_computation) @transform(starting_files, formatter(formatter_in),formatter_out) def gradient_computation(input_file, output_file): no_data = int(sds_meta.get_nodata_value(input_file)) output_file = functions.list_to_element(output_file) functions.check_output_dir(os.path.dirname(output_file)) args = {"input_file": input_file, "output_file": output_file, "nodata": no_data, "output_format": 'GTIFF', "options": "compress = lzw"} raster_image_math.do_compute_chla_gradient(**args) print ('Done with raster') return proc_lists
standard_library.install_aliases() import glob, os from lib.python import metadata input_dir = '/data/processing/wd-gee/1.0/WD-GEE-ECOWAS/tif/occurr/' output_dir = '/data/processing/wd-gee/1.0/WD-GEE-ECOWAS-AVG/tif/occurr/' new_mapset = 'WD-GEE-ECOWAS-AVG' in_files = glob.glob(input_dir + '*.tif') for file in in_files: filename = os.path.basename(file) out_filename = filename.replace('WD-GEE-ECOWAS', 'WD-GEE-ECOWAS-AVG') out_filepath = output_dir + out_filename # Check the file is not yet there if not os.path.isfile(out_filepath): # Reproject command = 'gdal_translate -of GTIFF -co "compress=LZW" -projwin -17.5290058 27.3132762 24.0006488 4.2682552 ' + file + ' ' + out_filepath os.system(command) # Update metadata (mapset) sds_meta = metadata.SdsMetadata() # Check if the input file is single, or a list sds_meta.read_from_file(out_filepath) sds_meta.assign_mapset(new_mapset) sds_meta.write_to_file(out_filepath) else: print('Output file already exists: %s' % out_filename)