def create_pipeline(starting_sprod):
    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext)

    input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)
                
    starting_files = input_dir+"*"+in_prod_ident
    # Read input product nodata
    in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version)  
    product_info = functions.list_to_element(in_prod_info)
    in_nodata = product_info.nodata
    
    print in_nodata
    
   #   ---------------------------------------------------------------------
   #   Monthly Average for a given month
    output_sprod="monavg"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)
    
    formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident]
   
    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def modis_par_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
        "options": "compress=lzw", "input_nodata": in_nodata}
        raster_image_math.do_avg_image(**args)
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0,
                     input_products='', output_product='', mapset=''):


    # Dummy return arguments
    proc_lists = functions.ProcLists()
    list_subprods = proc_lists.list_subprods
    list_subprod_groups = proc_lists.list_subprod_groups

    es2_data_dir = es_constants.processing_dir+os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info_connect(productcode=product_code,
                                                   subproductcode=sub_product_code,
                                                   version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version,
                                                               start_date=start_date, end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print 'To be Done !!!'
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir+my_date+out_prod_ident

                # Create the link
                functions.create_sym_link(in_file_path, out_file_path, force=False)

    return list_subprods, list_subprod_groups
Exemple #3
0
    def do_average(self):

        if self.frequency == '10d':
            #   ---------------------------------------------------------------------
            #   Average
            output_sprod_group = self.proc_lists.proc_add_subprod_group(
                "10dstats")
            output_sprod = self.proc_lists.proc_add_subprod(
                "10davg",
                "10dstats",
                final=False,
                descriptive_name='10d Average ' + self.subproduct_code,
                description='Long Term Average for ' + self.subproduct_code,
                frequency_id='e1dekad',
                date_format='MMDD',
                masked=False,
                timeseries_role=self.starting_sprod,  # '10d',
                # display_index=2,
                active_default=True)

            out_prod_ident = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            output_subdir = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            self.formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + self.in_prod_ident
            self.formatter_out = [
                "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
                out_prod_ident
            ]
        else:

            output_sprod_group = self.proc_lists.proc_add_subprod_group(
                "monthly_stats")
            output_sprod = self.proc_lists.proc_add_subprod(
                "1monavg",
                "monthly_stats",
                final=False,
                descriptive_name='Monthly Average ' + self.subproduct_code,
                description='Monthly Average ' + self.subproduct_code,
                frequency_id='e1month',
                date_format='MMDD',
                masked=False,
                timeseries_role=self.input_subprod_monthly,  # '10d',
                # display_index=112,
                active_default=True)

            prod_ident_1monavg = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            subdir_1monavg = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            self.formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1monavg + "{MMDD[0]}" + prod_ident_1monavg
    def do_monthly_prod(self):
        #   ---------------------------------------------------------------------
        #   3.a monthly product (avg)
        #   ---------------------------------------------------------------------
        output_sprod_group = self.proc_lists.proc_add_subprod_group(
            "monthly_prod")
        output_sprod = self.proc_lists.proc_add_subprod(
            "mon" + self.subproduct_code,
            "monthly_prod",
            final=False,
            descriptive_name='Monthly Product ' + self.subproduct_code,
            description='Monthly Product ' + self.subproduct_code,
            frequency_id='e1month',
            date_format='YYYYMMDD',
            masked=False,
            timeseries_role='',
            active_default=True)

        prod_ident_mon = functions.set_path_filename_no_date(
            self.prod, output_sprod, self.mapset, self.version, self.ext)
        subdir_mon = functions.set_path_sub_directory(self.prod, output_sprod,
                                                      'Derived', self.version,
                                                      self.mapset)

        self.formatter_in = "(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})" + self.in_prod_ident
        self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_mon + "{YYYYMM[0]}" + '01' + prod_ident_mon

        # To initialize the monthly parameters (check if ruffus initiate this)
        self.intialize_month_parameters()
    def __init__(self,
                 prod,
                 starting_sprod,
                 mapset,
                 version,
                 starting_dates=None,
                 proc_lists=None,
                 frequency=None,
                 product_type='Ingest'):

        self.prod = prod
        self.starting_sprod = starting_sprod
        self.mapset = mapset
        self.version = version
        self.starting_dates = starting_dates
        self.proc_lists = proc_lists
        self.frequency = frequency
        self.product_type = product_type
        self.starting_files = []
        # ES2-410 This should be used for creating the derived subproduct code dynamically
        self.subproduct_code = starting_sprod
        #   General definitions for this processing chain
        self.ext = es_constants.ES2_OUTFILE_EXTENSION

        self.es2_data_dir = es_constants.es2globals[
            'processing_dir'] + os.path.sep

        #   ---------------------------------------------------------------------
        #   Define input files
        self.in_prod_ident = functions.set_path_filename_no_date(
            self.prod, self.starting_sprod, self.mapset, self.version,
            self.ext)

        #logger.debug('Base data directory is: %s' % es2_data_dir)
        input_dir = self.es2_data_dir+ \
                    functions.set_path_sub_directory(self.prod, self.starting_sprod, self.product_type, self.version, self.mapset)

        if self.starting_dates is not None:
            # starting_files = []
            for my_date in self.starting_dates:
                #ES2 450 #+++++++ Check file exists before appending  +++++++++++++++
                if functions.is_file_exists_in_path(input_dir + my_date +
                                                    self.in_prod_ident):
                    self.starting_files.append(input_dir + my_date +
                                               self.in_prod_ident)
        else:
            self.starting_files = input_dir + "*" + self.in_prod_ident

        #   Look for all input files in input_dir, and sort them
        if self.starting_dates is not None:
            self.input_files = self.starting_files
        else:
            self.input_files = glob.glob(self.starting_files)

        # self.input_files = glob.glob(self.starting_files)

        # #   ---------------------------------------------------------------------
        # #   Initialize the monthly input prod
        # #   ---------------------------------------------------------------------
        self.intialize_month_parameters()
    def change_subProds_params(self,
                               starting_sprod,
                               frequency=None,
                               product_type='Ingest'):
        self.starting_sprod = starting_sprod
        self.change_frequency_params(frequency)
        self.product_type = product_type
        self.starting_files = []

        #   ---------------------------------------------------------------------
        #   Define input files
        self.in_prod_ident = functions.set_path_filename_no_date(
            self.prod, self.starting_sprod, self.mapset, self.version,
            self.ext)

        # logger.debug('Base data directory is: %s' % es2_data_dir)
        input_dir = self.es2_data_dir + \
                    functions.set_path_sub_directory(self.prod, self.starting_sprod, self.product_type, self.version,
                                                     self.mapset)

        if self.starting_dates is not None:
            # starting_files = []
            for my_date in self.starting_dates:
                #ES2 450 #+++++++ Check file exists before appending  +++++++++++++++
                if functions.is_file_exists_in_path(input_dir + my_date +
                                                    self.in_prod_ident):
                    self.starting_files.append(input_dir + my_date +
                                               self.in_prod_ident)
        else:
            self.starting_files = input_dir + "*" + self.in_prod_ident
def create_pipeline(starting_sprod):
    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    starting_files = input_dir + "*" + in_prod_ident
    # Read input product nodata
    in_prod_info = querydb.get_product_out_info(productcode=prod,
                                                subproductcode=starting_sprod,
                                                version=version)
    product_info = functions.list_to_element(in_prod_info)
    in_nodata = product_info.nodata

    print in_nodata

    #   ---------------------------------------------------------------------
    #   Monthly Average for a given month
    output_sprod = "monavg"
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" +
        out_prod_ident
    ]

    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def modis_par_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
        "options": "compress=lzw", "input_nodata": in_nodata}
        raster_image_math.do_avg_image(**args)
 def test_set_path_filename_no_date(self):
     params = {
         'productcode': 'vgt-ndvi',
         'subproductcode': 'ndvi-linearx2',
         'version': 'sv2-pv2.2',
         'mapsetcode': 'SPOTV-Africa-1km'
     }
     filename_nodate = functions.set_path_filename_no_date(
         params['productcode'], params['subproductcode'], params['version'],
         params['mapsetcode'], '.tif')
     self.assertEqual(
         filename_nodate,
         '_vgt-ndvi_ndvi-linearx2_sv2-pv2.2_SPOTV-Africa-1km.tif')
    def intialize_month_parameters(self):
        #   ---------------------------------------------------------------------
        #   Initialize the monthly input prod
        #   ---------------------------------------------------------------------
        self.input_subprod_monthly = "mon" + self.subproduct_code

        self.in_prod_ident_monthly = functions.set_path_filename_no_date(
            self.prod, self.input_subprod_monthly, self.mapset, self.version,
            self.ext)

        input_dir_monthly = self.es2_data_dir + \
                            functions.set_path_sub_directory(self.prod, self.input_subprod_monthly, 'Derived', self.version, self.mapset)

        self.starting_files_mon_prod = input_dir_monthly + "*" + self.in_prod_ident_monthly
Exemple #10
0
    def hide_some_files(self, productcode, version, subproductcode, type, mapset, dates):
    # Move to /tmp/eStation2/test/ some products - for generating a product request

        source_dir = es_constants.es2globals['processing_dir'] + \
                     functions.set_path_sub_directory(productcode, subproductcode, type, version, mapset)
        target_dir = es_constants.es2globals['base_tmp_dir']

        for date in dates:
            filename = date + functions.set_path_filename_no_date(productcode,subproductcode, mapset, version,'.tif')

            fullpath=source_dir+os.path.sep+filename
            fullpath_dest=target_dir+os.path.sep+filename

            try:
                os.rename(source_dir+filename, fullpath_dest)
            except:
                logger.error('Error in moving file %s' % fullpath)
Exemple #11
0
    def hide_some_files(self, productcode, version, subproductcode, type,
                        mapset, dates):
        # Move to /tmp/eStation2/test/ some products - for generating a product request

        source_dir = es_constants.es2globals['processing_dir'] + \
                     functions.set_path_sub_directory(productcode, subproductcode, type, version, mapset)
        target_dir = es_constants.es2globals['base_tmp_dir']

        for date in dates:
            filename = date + functions.set_path_filename_no_date(
                productcode, subproductcode, mapset, version, '.tif')

            fullpath = source_dir + os.path.sep + filename
            fullpath_dest = target_dir + os.path.sep + filename

            try:
                os.rename(source_dir + filename, fullpath_dest)
            except:
                logger.error('Error in moving file %s' % fullpath)
    def do_10d_from_1d(self):
        #   ---------------------------------------------------------------------
        #   Derived product: 10dcumul
        #   ---------------------------------------------------------------------

        output_sprod_group = self.proc_lists.proc_add_subprod_group("10d_prod")
        output_sprod = self.proc_lists.proc_add_subprod(
            "10d",
            "10d_prod",
            final=False,
            descriptive_name='10d ' + self.subproduct_code,
            description=self.subproduct_code + ' for dekad',
            frequency_id='e1dekad',
            date_format='YYYYMMDD',
            masked=False,
            timeseries_role='Initial',
            active_default=True)

        self.out_prod_ident_10d = functions.set_path_filename_no_date(
            self.prod, output_sprod, self.mapset, self.version, self.ext)
        self.output_subdir_10d = functions.set_path_sub_directory(
            self.prod, output_sprod, 'Derived', self.version, self.mapset)
Exemple #13
0
def create_pipeline(prod,
                    starting_sprod,
                    native_mapset,
                    target_mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    day_time=None,
                    logger=None):

    # Test flag (to save non-projected cumulated products)
    test_mode = False

    # Create Logger
    # logger.fatal('Version 13.06.2017 !!!!!!!!!!!!!!!!!!!!!!!!!!')

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all ON
    activate_1dcum_comput = 1
    activate_10dcum_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files ('mpe' subproduct)
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, native_mapset, version, ext)

    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset)

    # ----------------------------------------------------------------------------------------------------------------
    # 1dcum
    # Daily cumulate of the 15 min MPE, re-projected on target mapset
    output_sprod = proc_lists.proc_add_subprod("1dmax",
                                               "msg-mpe",
                                               final=False,
                                               descriptive_name='1d Cumulate',
                                               description='Daily Cumulate',
                                               frequency_id='e1day',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    output_sprod = '1dcum'
    out_prod_ident_1dcum = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset, version, ext)
    output_subdir_1dcum = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, target_mapset)

    # Use a specific function, to skip the current day
    def generate_parameters_1dcum():

        #   Look for all input files in input_dir, and sort them
        if starting_dates is not None:
            input_files = []
            for my_date in starting_dates:
                input_files.append(input_dir + my_date + in_prod_ident)
        else:
            starting_files = input_dir + "*" + in_prod_ident
            input_files = glob.glob(starting_files)

        logger.debug("starting_files %s" % input_files)

        day_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            if mydate_yyyymmdd not in day_list:
                day_list.append(mydate_yyyymmdd)

        day_list = sorted(day_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for myday in day_list:
            # Exclude the current day
            if myday != today_str:
                file_list = []
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    # Date is in format YYYYMMDDhhmm
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    if day_time is None:
                        # Append files for myday
                        if mydate_yyyymmdd[0:8] == myday:
                            file_list.append(input_file)
                    else:
                        # Append files in time range myday+hhmm |-| (myday+1)+ hhmm
                        if int(mydate_yyyymmdd) >= int(myday) * 10000 + int(
                                day_time) and int(mydate_yyyymmdd) < (
                                    int(myday) + 1) * 10000 + int(day_time):
                            file_list.append(input_file)

                output_file = es_constants.processing_dir + output_subdir_1dcum + os.path.sep + str(
                    (int(myday)) * 10000 +
                    int(day_time)) + out_prod_ident_1dcum
                file_list = sorted(file_list)
                # Check here the number of missing files (for optimization)
                if len(file_list) > 86:
                    yield (file_list, output_file)


#

    @active_if(activate_1dcum_comput)
    @files(generate_parameters_1dcum)
    def msg_mpe_1dcum(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)
        # Divide by 10 to pass from 0.01 to 0.1 as scale factor for 1d cum
        factor = 0.1
        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "scale_factor": factor,
            "input_nodata": -32768
        }

        raster_image_math.do_cumulate(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        # Copy the non-reprojected file for validation, only in test_mode
        if test_mode:
            msg_proj_dir = es_constants.processing_dir + functions.set_path_sub_directory(
                prod, '1dcum', 'Derived', version, native_mapset)
            functions.check_output_dir(msg_proj_dir)
            shutil.copy(tmp_output_file, msg_proj_dir + os.path.sep)

        # Copy the non-reprojected file for validation, only in test_mode
        shutil.rmtree(tmpdir)

    # ----------------------------------------------------------------------------------------------------------------
    #   10 day Cumulate (mm)

    output_sprod = proc_lists.proc_add_subprod(
        "10dcum",
        "msg-mpe",
        final=False,
        descriptive_name='10day Cumulate',
        description='10day Cumulate in mm',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_10dcum = functions.set_path_filename_no_date(
        prod, "10dcum", target_mapset, version, ext)
    output_subdir_10dcum = functions.set_path_sub_directory(
        prod, "10dcum", 'Derived', version, target_mapset)

    in_prod_10dcum = '1dcum'
    in_prod_ident_10dcum = functions.set_path_filename_no_date(
        prod, in_prod_10dcum, target_mapset, version, ext)
    input_dir_10dcum = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, target_mapset)

    starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum

    #   Define input files
    def generate_parameters_10dcum():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files_10dcum)
        dekad_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for dekad in dekad_list:
            # Exclude the current dekad
            if dekad != dekad_now:
                file_list = []
                my_dekad_str = functions.conv_dekad_2_date(dekad)
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydekad_nbr = functions.conv_date_2_dekad(
                        mydate_yyyymmdd[0:8])
                    if mydekad_nbr == dekad:
                        file_list.append(input_file)

                    output_file = es_constants.processing_dir + output_subdir_10dcum + os.path.sep + my_dekad_str + out_prod_ident_10dcum

                yield (file_list, output_file)

    @follows(msg_mpe_1dcum)
    @active_if(activate_10dcum_comput)
    @files(generate_parameters_10dcum)
    def msg_mpe_10dcum(input_file, output_file):

        if len(input_file) > 8:
            output_file = functions.list_to_element(output_file)
            # Get the number of days of that dekad
            basename = os.path.basename(output_file)
            mydate = functions.get_date_from_path_filename(basename)
            nbr_days_dekad = functions.day_per_dekad(mydate)
            factor = 1.0
            functions.check_output_dir(os.path.dirname(output_file))

            args = {
                "input_file": input_file,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw",
                "scale_factor": factor,
                "input_nodata": -32768
            }

            raster_image_math.do_cumulate(**args)

        else:
            logger.warning(
                'More than 2 files missing for output {0}: Skip'.format(
                    os.path.basename(output_file)))

    return proc_lists
#   for Group 3.c  (monthly_stats)
activate_1monavg = 1
activate_1monmax = 1
activate_1monmin = 1
activate_1monstd = 1

#   for Group 3.d  (monthly_anomalies) -> TB Done
activate_1monsndvi = 1
activate_1monandvi = 1
activate_1monvci = 1
activate_1monicn = 1

#   ---------------------------------------------------------------------
#   Define input files (NDV)
starting_sprod = 'ndv'
in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, ext)

logger.debug('Base data directory is: %s' % es_constants.processing_dir)
input_dir = es_constants.processing_dir+ \
            functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

logger.debug('Input data directory is: %s' % input_dir)
starting_files = input_dir+"*"+in_prod_ident
logger.debug('Starting files wild card is: %s' % starting_files)

#   ---------------------------------------------------------------------
#   1.a 10Day non-filtered Stats
#   ---------------------------------------------------------------------

#   ---------------------------------------------------------------------
#   NDV avg x dekad (i.e. avg_dekad)
Exemple #15
0
def create_pipeline(prod,
                    starting_sprod,
                    native_mapset,
                    target_mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    # Create Logger
    logger = log.my_logger('log.lst')
    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all ON
    activate_1dmax_comput = 1
    activate_10dmax_comput = 1
    activate_10d15min_comput = 1
    activate_10dmin_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files ('lst' subproduct)
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, native_mapset, version, ext)

    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    logger.info("starting_files %s" % starting_files)

    # ----------------------------------------------------------------------------------------------------------------
    # 1dmax
    # Daily maximum from 15min lst, re-projected on target mapset
    output_sprod = proc_lists.proc_add_subprod("1dmax",
                                               "lsasaf-lst",
                                               final=False,
                                               descriptive_name='1d Maximum',
                                               description='Daily Maximum',
                                               frequency_id='e1day',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    output_sprod = '1dmax'
    out_prod_ident_1dmax = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset, version, ext)
    output_subdir_1dmax = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, target_mapset)

    formatter_in_1dmax = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident
    formatter_out_1dmax = "{subpath[0][5]}" + os.path.sep + output_subdir_1dmax + "{YYYYMMDD[0]}" + out_prod_ident_1dmax
    #
    @active_if(activate_1dmax_comput)
    @collate(starting_files, formatter(formatter_in_1dmax),
             formatter_out_1dmax)
    def lsasaf_lst_1dmax(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)

        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_max_image(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        shutil.rmtree(tmpdir)

    # ----------------------------------------------------------------------------------------------------------------
    # 10dmax
    # 10 Day maximum from daily max, on target mapset
    output_sprod = proc_lists.proc_add_subprod("10dmax",
                                               "lsasaf-lst",
                                               final=False,
                                               descriptive_name='10d Maximum',
                                               description='10d Maximum',
                                               frequency_id='e1dekad',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    output_sprod_10dmax = '10dmax'
    out_prod_ident_10dmax = functions.set_path_filename_no_date(
        prod, output_sprod_10dmax, target_mapset, version, ext)
    output_subdir_10dmax = functions.set_path_sub_directory(
        prod, output_sprod_10dmax, 'Derived', version, target_mapset)

    # #   Define input files
    in_prod_10dmax = '1dmax'
    in_prod_ident_10dmax = functions.set_path_filename_no_date(
        prod, in_prod_10dmax, target_mapset, version, ext)
    #
    input_dir_10dmax = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dmax, 'Derived', version, target_mapset)
    #
    starting_files_10dmax = input_dir_10dmax + "*" + in_prod_ident_10dmax

    #
    def generate_parameters_10dmax():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files_10dmax)
        dekad_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for dekad in dekad_list:
            # Exclude the current dekad
            if dekad != dekad_now:
                file_list = []
                my_dekad_str = functions.conv_dekad_2_date(dekad)
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydekad_nbr = functions.conv_date_2_dekad(
                        mydate_yyyymmdd[0:8])
                    if mydekad_nbr == dekad:
                        file_list.append(input_file)

                    output_file = es_constants.processing_dir + output_subdir_10dmax + os.path.sep + my_dekad_str + out_prod_ident_10dmax

                yield (file_list, output_file)


#

    @active_if(activate_10dmax_comput)
    @files(generate_parameters_10dmax)
    def lsasaf_lst_10dmax(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   Dekad maximum for every 15min
    #   NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset

    output_sprod = proc_lists.proc_add_subprod(
        "10d15min",
        "lsasaf-lst",
        final=False,
        descriptive_name='10day Maximum over 15 min',
        description='10day Maximum computed for every 15 min',
        frequency_id='e15minute',  # Is it OK ???????
        date_format='YYYYMMDDHHMM',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, native_mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     native_mapset)

    def generate_parameters_10d15min():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files)
        dekad_list = []
        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        # Generate the list of 30 min time in a day
        timelist = [
            datetime.time(h, m).strftime("%H%M")
            for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 15))
        ]

        for time in timelist:
            files_for_time = glob.glob(input_dir + os.path.sep + '*' + time +
                                       in_prod_ident)
            for dekad in dekad_list:
                # Exclude the current dekad
                if dekad != dekad_now:
                    file_list = []
                    my_dekad_str = functions.conv_dekad_2_date(dekad)
                    output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident

                    for myfile in files_for_time:
                        basename = os.path.basename(myfile)
                        mydate_yyyymmdd = functions.get_date_from_path_filename(
                            basename)
                        mydekad_nbr = functions.conv_date_2_dekad(
                            mydate_yyyymmdd[0:8])
                        if mydekad_nbr == dekad:
                            file_list.append(myfile)
                    if len(file_list) > 8:
                        yield (file_list, output_file)

    @active_if(activate_10d15min_comput)
    @files(generate_parameters_10d15min)
    def lsasaf_lst_10d15min(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
                "options": "compress=lzw", "input_nodata":-32768}

        raster_image_math.do_max_image(**args)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, starting_sprod, version, native_mapset,
                         'Ingest', number_months_keep)

    # ----------------------------------------------------------------------------------------------------------------
    #   10 day minimum (mm)
    #   NOTE: this product is compute with re-projection, i.e. on the 'target' mapset

    output_sprod = proc_lists.proc_add_subprod(
        "10dmin",
        "lsasaf-et",
        final=False,
        descriptive_name='10day Minimum',
        description='10day minimum',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_10dmin = functions.set_path_filename_no_date(
        prod, "10dmin", target_mapset, version, ext)
    output_subdir_10dmin = functions.set_path_sub_directory(
        prod, "10dmin", 'Derived', version, target_mapset)

    #   Define input files
    in_prod_10dmin = '10d15min'
    in_prod_ident_10dmin = functions.set_path_filename_no_date(
        prod, in_prod_10dmin, native_mapset, version, ext)

    input_dir_10dmin = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dmin, 'Derived', version, native_mapset)

    starting_files_10dmin = input_dir_10dmin + "*" + in_prod_ident_10dmin

    formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dmin
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_10dmin +
        "{YYYYMMDD[0]}" + out_prod_ident_10dmin
    ]

    @follows(lsasaf_lst_10d15min)
    @active_if(activate_10dmin_comput)
    @collate(starting_files_10dmin, formatter(formatter_in), formatter_out)
    def lsasaf_lst_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        # Get the number of days of that dekad
        basename = os.path.basename(output_file)
        mydate = functions.get_date_from_path_filename(basename)
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)

        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }

        raster_image_math.do_min_image(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        shutil.rmtree(tmpdir)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, '10d15min', version, native_mapset, 'Ingest',
                         number_months_keep)

    return proc_lists
Exemple #16
0
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    logger=None):

    my_date = None

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # 8d cumul
    activate_8dayavg_comput = 1

    # monthly
    activate_monavg_comput = 1
    activate_monclim_comput = 0
    activate_monanom_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, starting_sprod, 'Ingest', version, mapset)

    if my_date is not None:
        starting_files = input_dir + my_date + "*" + in_prod_ident
    else:
        starting_files = input_dir + "*" + in_prod_ident

#   ---------------------------------------------------------------------
#   8-days Average

    output_sprod_group_8day = proc_lists.proc_add_subprod_group("8days")
    output_sprod_8day = proc_lists.proc_add_subprod(
        "8daysavg",
        "8days",
        final=False,
        descriptive_name='8Day average',
        description='8Day average',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_8day = functions.set_path_filename_no_date(
        prod, output_sprod_8day, mapset, version, ext)
    output_subdir_8day = functions.set_path_sub_directory(
        prod, output_sprod_8day, 'Derived', version, mapset)

    def generate_parameters_8days():

        years_periods_list = []

        #   Look for all input files in input_dir
        input_files = glob.glob(starting_files)
        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydate_year = str(mydate)[0:4]

            period_nbr = functions.conv_date_2_8days(mydate_yyyymmdd)

            if (mydate_year, period_nbr) not in years_periods_list:
                years_periods_list.append((mydate_year, period_nbr))

        periods_sorted = sorted(years_periods_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        year_now = today.strftime('%Y')
        period_now = functions.conv_date_2_8days(today_str)

        # Generate the list of 30 min time in a day
        for year, period in periods_sorted:
            # Exclude the current dekad
            if period != period_now or year != year_now:
                file_list = []
                jdoy_period = "{0:03d}".format(1 + 8 * (int(period) - 1))
                mmdd_period = functions.conv_date_yyyydoy_2_yyyymmdd(
                    year + jdoy_period)
                output_file = es_constants.processing_dir + output_subdir_8day + os.path.sep + mmdd_period + out_prod_ident_8day

                for myfile in input_files:
                    basename = os.path.basename(myfile)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydate_year = mydate_yyyymmdd[0:4]

                    period_nbr = functions.conv_date_2_8days(
                        mydate_yyyymmdd[0:8])
                    if period_nbr == period and mydate_year == year:
                        file_list.append(myfile)

                # Special case of last period of the year: add few days of next year
                if period == 46:
                    next_year = "{0:04d}".format(int(year) + 1)
                    if calendar.isleap(int(year)):
                        add_days = ('0101', '0102', '0103')
                    else:
                        add_days = ('0101', '0102', '0103', '0104')
                    for day in add_days:
                        date = next_year + day
                        matches = [
                            x for x in input_files
                            if fnmatch.fnmatch(x, '*{0}*'.format(date))
                        ]
                        # Fixes ES2-35 (see YouTrack)
                        if len(matches) > 0:
                            file_list.append(matches[0])

                yield (sorted(file_list), output_file)

    @active_if(activate_8dayavg_comput)
    @files(generate_parameters_8days)
    def modis_8dayavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        out_filename = os.path.basename(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_avg_image(**args)

#   ---------------------------------------------------------------------
#   Monthly Average for a given month

    output_sprod_group = proc_lists.proc_add_subprod_group("monstats")
    output_sprod = proc_lists.proc_add_subprod(
        "monavg",
        "monstats",
        final=False,
        descriptive_name='Monthly average',
        description='Monthly average',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident

    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def modis_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        out_filename = os.path.basename(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        str_date = out_filename[0:6]
        today = datetime.date.today()
        today_yyyymm = today.strftime('%Y%m')

        #expected_ndays=functions.get_number_days_month(str_date)
        #current_ndays=len(input_file)
        if str_date == today_yyyymm:
            logger.info(
                'Do not perform computation for current month {0}. Skip'.
                format(str_date))
        else:
            args = {
                "input_file": input_file,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Climatology for all years

    output_sprod = proc_lists.proc_add_subprod(
        "monclim",
        "monstats",
        final=False,
        descriptive_name='Monthly climatology',
        description='Monthly climatology',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    new_input_subprod = 'monavg'
    new_in_prod_ident = functions.set_path_filename_no_date(
        prod, new_input_subprod, mapset, version, ext)
    new_input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, new_input_subprod, 'Derived', version, mapset)

    new_starting_files = new_input_dir + "*" + new_in_prod_ident

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + new_in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" + out_prod_ident

    @active_if(activate_monclim_comput)
    @collate(new_starting_files, formatter(formatter_in), formatter_out)
    def modis_chla_monclim(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
     "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)

#   ---------------------------------------------------------------------
#   Monthly Anomaly for a given monthly

    output_sprod = proc_lists.proc_add_subprod(
        "monanom",
        "monstats",
        final=False,
        descriptive_name='Monthly anomaly',
        description='Monthly anomaly',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + new_in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "monclim"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @active_if(activate_monanom_comput)
    @transform(new_starting_files, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    def modis_chla_mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_oper_subtraction(**args)

    return proc_lists
def create_pipeline(starting_sprod):
    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    starting_files = input_dir + "*" + in_prod_ident
    # Read input product nodata
    in_prod_info = querydb.get_product_out_info(productcode=prod,
                                                subproductcode=starting_sprod,
                                                version=version)
    product_info = functions.list_to_element(in_prod_info)
    in_nodata = product_info.nodata

    print in_nodata

    #   ---------------------------------------------------------------------
    #   Monthly Average for a given month
    output_sprod = "monavg"
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" +
        out_prod_ident
    ]

    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def modis_kd_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
        "options": "compress=lzw", "input_nodata": in_nodata}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Climatology for all years
    new_input_subprod = 'monavg'
    new_in_prod_ident = functions.set_path_filename_no_date(
        prod, new_input_subprod, mapset, version, ext)
    new_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset)

    new_starting_files = new_input_dir + "*" + new_in_prod_ident

    output_sprod = "monclim"
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" +
        out_prod_ident
    ]

    @active_if(activate_monclim_comput)
    @collate(new_starting_files, formatter(formatter_in), formatter_out)
    def modis_kd_monclim(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
     "options": "compress=lzw", "input_nodata": in_nodata}
        raster_image_math.do_avg_image(**args)

#   ---------------------------------------------------------------------
#   Monthly Anomaly for a given monthly

    output_sprod = "monanom"
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident

    ancillary_sprod = "monclim"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident

    @active_if(activate_monanom_comput)
    @transform(new_starting_files, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    def modis_kd_mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_oper_subtraction(**args)
    def do_percent_difference(self):
        # Percent Difference
        if self.frequency == '10d':
            output_sprod = self.proc_lists.proc_add_subprod(
                "10dperc",
                "10anomalies",
                final=False,
                descriptive_name='10d Percent Difference ' +
                self.subproduct_code,
                description='10d Percent Difference vs. LTA',
                frequency_id='e1dekad',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.starting_sprod,  # '10d',
                # display_index=7,
                active_default=True)
            out_prod_ident = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            output_subdir = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            #   Starting files + avg
            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

            ancillary_sprod = "10davg"
            ancillary_sprod_ident = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod, self.mapset, self.version,
                self.ext)
            ancillary_subdir = functions.set_path_sub_directory(
                self.prod, ancillary_sprod, 'Derived', self.version,
                self.mapset)
            self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
        else:

            output_sprod = self.proc_lists.proc_add_subprod(
                "1monperc",
                "filtered_anomalies",
                final=False,
                descriptive_name='Monthly Percent Difference',
                description='Monthly Percent Difference',
                frequency_id='e1month',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.input_subprod_monthly,  # '10d',
                # display_index=117,
                active_default=True)
            prod_ident_1monperc = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            subdir_1monperc = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            # inputs
            #   Starting files + avg
            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1monperc + "{YYYY[0]}{MMDD[0]}" + prod_ident_1monperc

            ancillary_sprod = "1monavg"
            ancillary_sprod_ident = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod, self.mapset, self.version,
                self.ext)
            ancillary_subdir = functions.set_path_sub_directory(
                self.prod, ancillary_sprod, 'Derived', self.version,
                self.mapset)
            self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None,
                    update_stats=False, nrt_products=True):

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all off
    activate_10danomalies_comput=0      # 10d anomalies
    activate_monthly_comput=0           # monthly cumulation
    activate_monanomalies_comput=0      # monthly anomalies
    activate_10dstats_comput=0          # 10d stats
    activate_monstats_comput=0          # 1mon stats

    #   switch wrt groups - according to options
    if nrt_products:
        activate_10danomalies_comput=1      # 10d anomalies
        activate_monthly_comput=1           # monthly cumulation
        activate_monanomalies_comput=1      # monthly anomalies

    if update_stats:
        activate_10dstats_comput= 1         # 10d stats
        activate_monstats_comput=1          # 1mon stats

    #   switch wrt single products: not to be changed !!
    activate_10davg_comput=1
    activate_10dmin_comput=1
    activate_10dmax_comput=1
    activate_10ddiff_comput=1
    activate_10dperc_comput=1
    activate_10dnp_comput=1

    activate_1moncum_comput=1
    activate_1monavg_comput=1
    activate_1monmin_comput=1
    activate_1monmax_comput=1
    activate_1mondiff_comput=1
    activate_1monperc_comput=1
    activate_1monnp_comput=1

    es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext)

    logger.debug('Base data directory is: %s' % es2_data_dir)
    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    logger.debug('Input data directory is: %s' % input_dir)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir+my_date+in_prod_ident)
    else:
        starting_files=input_dir+"*"+in_prod_ident

    logger.debug('Starting files wild card is: %s' % starting_files)

    #   ---------------------------------------------------------------------
    #   Average
    output_sprod_group=proc_lists.proc_add_subprod_group("10dstats")
    output_sprod=proc_lists.proc_add_subprod("10davg", "10dstats", final=False,
                                             descriptive_name='10d Average',
                                             description='Average rainfall for dekad',
                                             frequency_id='e1dekad',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_10dstats_comput, activate_10davg_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def std_precip_10davg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Minimum
    output_sprod=proc_lists.proc_add_subprod("10dmin", "10dstats", final=False,
                                             descriptive_name='10d Minimum',
                                             description='Minimum rainfall for dekad',
                                             frequency_id='e1dekad',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_10dstats_comput, activate_10dmin_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def std_precip_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Maximum
    output_sprod=proc_lists.proc_add_subprod("10dmax", "10dstats", final=False,
                                             descriptive_name='10d Maximum',
                                             description='Maximum rainfall for dekad',
                                             frequency_id='e1dekad',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_10dstats_comput, activate_10dmax_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def std_precip_10dmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   10dDiff
    output_sprod_group=proc_lists.proc_add_subprod_group("10anomalies")
    output_sprod=proc_lists.proc_add_subprod("10ddiff", "10anomalies", final=False,
                                             descriptive_name='10d Absolute Difference',
                                             description='10d Absolute Difference vs. LTA',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(std_precip_10davg)
    @active_if(activate_10danomalies_comput, activate_10ddiff_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def std_precip_10ddiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   10dperc
    output_sprod=proc_lists.proc_add_subprod("10dperc", "10anomalies",  final=False,
                                             descriptive_name='10d Percent Difference',
                                             description='10d Percent Difference vs. LTA',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(std_precip_10davg)
    @active_if(activate_10danomalies_comput, activate_10dperc_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def std_precip_10dperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   10dnp
    output_sprod=proc_lists.proc_add_subprod("10dnp", "10anomalies",  final=False,
                                             descriptive_name='10d Normalized Anomaly',
                                             description='10d Normalized Anomaly',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + min + max
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod_1 = "10dmin"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, version, ext)
    ancillary_subdir_1      = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset)
    ancillary_input_1="{subpath[0][5]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1

    ancillary_sprod_2 = "10dmax"
    ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, version, ext)
    ancillary_subdir_2      = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset)
    ancillary_input_2="{subpath[0][5]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2

    @follows(std_precip_10dmin, std_precip_10dmax)
    @active_if(activate_10danomalies_comput, activate_10dnp_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out)
    def std_precip_10dnp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_make_vci(**args)

    #   ---------------------------------------------------------------------
    #   1moncum
    output_sprod_group=proc_lists.proc_add_subprod_group("monthly")
    output_sprod=proc_lists.proc_add_subprod("1moncum", "monthly", final=False,
                                             descriptive_name='Monthly Cumulate',
                                             description='Monthly Cumulate Precipitation',
                                             frequency_id='e1month',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs: files from same months
    formatter_in="(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+'01'+out_prod_ident

    # @follows(std_precip_10davg)
    @active_if(activate_monthly_comput, activate_1moncum_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def std_precip_1moncum(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file,"output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_cumulate(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Average
    new_input_subprod='1moncum'
    in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, version, ext)
    output_sprod_group=proc_lists.proc_add_subprod_group("monstat")
    output_sprod=proc_lists.proc_add_subprod("1monavg", "monstat", final=False,
                                             descriptive_name='Monthly Average',
                                             description='Monthly Average Precipitation',
                                             frequency_id='e1month',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @follows(std_precip_1moncum)
    @active_if(activate_monstats_comput, activate_1monavg_comput)
    @collate(std_precip_1moncum, formatter(formatter_in),formatter_out)
    def std_precip_1monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Minimum
    output_sprod=proc_lists.proc_add_subprod("1monmin", "monstat",final=False,
                                             descriptive_name='Monthly Minimum',
                                             description='Monthly Minimum Precipitation',
                                             frequency_id='e1month',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @follows(std_precip_1moncum)
    @active_if(activate_monstats_comput, activate_1monmin_comput)
    @collate(std_precip_1moncum, formatter(formatter_in),formatter_out)
    def std_precip_1monmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Maximum
    output_sprod=proc_lists.proc_add_subprod("1monmax", "monstat",final=False,
                                             descriptive_name='Monthly Maximum',
                                             description='Monthly Maximum Precipitation',
                                             frequency_id='e1month',
                                             date_format='MMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    reg_ex_in="[0-9]{4}([0-9]{4})"+in_prod_ident

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @follows(std_precip_1moncum)
    @active_if(activate_monstats_comput, activate_1monmax_comput)
    @collate(std_precip_1moncum, formatter(formatter_in),formatter_out)
    def std_precip_1monmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   1monDiff
    output_sprod_group=proc_lists.proc_add_subprod_group("monanomalies")
    output_sprod=proc_lists.proc_add_subprod("1mondiff", "monanomalies", final=False,
                                             descriptive_name='Monthly Absolute Difference',
                                             description='Monthly Absolute Difference Precipitation',
                                             frequency_id='e1month',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs
    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "1monavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(std_precip_1monavg)
    @active_if(activate_monanomalies_comput, activate_1mondiff_comput)
    @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def std_precip_1mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   1monperc
    output_sprod=proc_lists.proc_add_subprod("1monperc", "monanomalies", final=False,
                                             descriptive_name='Monthly Percent Difference',
                                             description='Monthly Percent Difference Precipitation',
                                             frequency_id='e1month',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs
    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "1monavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(std_precip_1monavg)
    @active_if(activate_monanomalies_comput, activate_1monperc_comput)
    @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def std_precip_1monperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   1monnp
    output_sprod=proc_lists.proc_add_subprod("1monnp", "monanomalies", final=False,
                                             descriptive_name='Monthly Normalized Anomaly',
                                             description='Monthly Normalized Anomaly Precipitation',
                                             frequency_id='e1month',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='1mon',
                                             active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + min + max
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod_1 = "1monmin"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, version, ext)
    ancillary_subdir_1      = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset)
    ancillary_input_1="{subpath[0][5]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1

    ancillary_sprod_2 = "1monmax"
    ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, version, ext)
    ancillary_subdir_2      = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset)
    ancillary_input_2="{subpath[0][5]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2

    @follows(std_precip_1monmin, std_precip_1monmax)
    @active_if(activate_monanomalies_comput, activate_1monnp_comput)
    @transform(std_precip_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out)
    def std_precip_1monnp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_make_vci(**args)

    return proc_lists
Exemple #20
0
def create_pipeline(prod,
                    starting_sprod,
                    native_mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    logger=None,
                    mapset=None):

    # Definitions
    start_season = '0901'
    end_season = '0421'
    agriculture_mask = '/data/temp/AGRIC_MASK.tif'

    # Manage mapset
    if mapset is None:
        mapset = native_mapset

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all off
    activate_seas_cum_comput = 1  # season cumulation
    activate_cum_comput = 1  # season cumulation

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, native_mapset, version, ext)

    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = glob.glob(input_dir + "*" + in_prod_ident)

    #   ---------------------------------------------------------------------
    #   3.a NDVI linearx2 Season Cumulation masked using Crop Mask
    #   ---------------------------------------------------------------------
    # Define output subproduct
    out_sub_prod_name = 'seas-cum-of-' + starting_sprod
    output_sprod_group = proc_lists.proc_add_subprod_group("seas_cum_prods")
    output_sprod = proc_lists.proc_add_subprod(
        out_sub_prod_name,
        "seas_cum_prods",
        final=True,
        descriptive_name='Season Cumulation for ' + out_sub_prod_name,
        description='Season Cumulation for ' + out_sub_prod_name,
        frequency_id='e1year',
        date_format='YYYYMMDD',
        masked=True,
        timeseries_role='',
        active_default=True)

    # Generate prod_identifier (_fewsnet-rfe_seas-cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir
    prod_ident_seas_cum = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    subdir_ident_seas_cum = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    def generate_parameters_seas_cum():

        starting_files.sort()
        # Convert from string to in (for comparison)
        dekad_start = int(start_season)
        dekad_end = int(end_season)

        # Loop over all input files
        for file_t0 in starting_files:
            # Get current date (in format '19980901')
            date_t0 = functions.get_date_from_path_full(file_t0)

            # Extract from date-string the dekad/year as integer
            dekad_t0 = int(date_t0[4:])
            year2 = int(date_t0[0:4])

            # Check if season goes across two years -> define year1/2
            if dekad_start < dekad_end:
                if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end:
                    year1 = year2
            else:
                if dekad_t0 > dekad_start or dekad_t0 <= dekad_end:
                    year1 = year2 - 1

            # Detect the end of the season and trigger processing
            if dekad_t0 == dekad_end:

                # Define output filename
                output_file = es2_data_dir + subdir_ident_seas_cum + str(
                    year2) + end_season + prod_ident_seas_cum

                # Get list of dates from start of season to end of season
                list_dates = proc_functions.get_list_dates_for_dataset(
                    prod,
                    starting_sprod,
                    version,
                    start_date=str(year1) + start_season,
                    end_date=str(year2) + end_season)
                input_files = []
                missing_file = False
                for ldate in list_dates:
                    # Append the file to list if it exists ...
                    if os.path.isfile(input_dir + ldate + in_prod_ident):
                        input_files.append(input_dir + ldate + in_prod_ident)
                    # ... otherwise raise a warning and break
                    else:
                        logger.warning(
                            'Missing file for date {0}. Season not computed.'.
                            format(ldate))
                        missing_file = True
                        break

                if not missing_file:
                    yield (input_files, output_file)

    @active_if(activate_seas_cum_comput)
    @files(generate_parameters_seas_cum)

    # Function to do actual computation from inputs to output
    def seas_cum(input_files, output_file):
        # Ensure out subdirectory exists
        functions.check_output_dir(os.path.dirname(output_file))

        # If output_file it is a list, force to a string
        output_file = functions.list_to_element(output_file)

        # Prepare temporary working directory for intermediate results
        tmpdirpath = tempfile.mkdtemp()
        # Cumulated but not masked output
        tmp_output_file = tmpdirpath + os.path.sep + os.path.basename(
            output_file)
        # Temp mask in the final projection (mapset)
        tmp_reproj_file = tmpdirpath + os.path.sep + 'my_temp_reprojected_output.tif'

        # Call the function for cumulating
        args = {
            "input_file": input_files,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_cumulate(**args)

        # Create from the original mask a new one, by using raster_image_math.do_reprojection()
        # and save it as a temporary mask

        # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset)
        raster_image_math.do_reproject(tmp_output_file, tmp_reproj_file,
                                       native_mapset, mapset)

        # Call the function for masking
        args = {
            "input_file": tmp_reproj_file,
            "mask_file": agriculture_mask,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "mask_value": 0,
            "out_value": 0
        }
        raster_image_math.do_mask_image(**args)

        # Remove temp directory
        shutil.rmtree(tmpdirpath)

    #   ---------------------------------------------------------------------
    #   3.a Season Cumulation fron start of season to current dekad till end of season
    #   ---------------------------------------------------------------------
    # Define output subproduct
    out_sub_prod_name = 'cum-of-' + starting_sprod
    output_sprod_group = proc_lists.proc_add_subprod_group("cum_prods")
    output_sprod = proc_lists.proc_add_subprod(
        out_sub_prod_name,
        "cum_prods",
        final=True,
        descriptive_name='Cumulation for ' + out_sub_prod_name,
        description='Cumulation for ' + out_sub_prod_name,
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    # Generate prod_identifier (_fewsnet-rfe_cum-of-10d_FEWSNET-Africa-8km_2.0.tif) ad subdir
    prod_ident_cum = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    subdir_ident_cum = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    def generate_parameters_cum():

        starting_files.sort()
        # Convert from string to in (for comparison)
        dekad_start = int(start_season)
        dekad_end = int(end_season)

        # Loop over all input files
        for file_t0 in starting_files:
            # Get current date (in format '19980901')
            date_t0 = functions.get_date_from_path_full(file_t0)

            # Extract from date-string the dekad/year as integer
            dekad_t0 = int(date_t0[4:])
            year_t0 = int(date_t0[0:4])
            in_season = False

            # Check if season goes across two years -> define year1/2
            if dekad_start < dekad_end:
                if dekad_t0 >= dekad_start and dekad_t0 <= dekad_end:
                    year_sos = year_t0
                    in_season = True
            else:
                if dekad_t0 >= dekad_start:
                    year_sos = year_t0
                    in_season = True
                if dekad_t0 <= dekad_end:
                    year_sos = year_t0 - 1
                    in_season = True

            # Detect the end of the season and trigger processing
            if in_season:

                # Define output filename
                output_file = es2_data_dir + subdir_ident_cum + date_t0 + prod_ident_cum

                # Get list of dates from start of season to end of season
                list_dates = proc_functions.get_list_dates_for_dataset(
                    prod,
                    starting_sprod,
                    version,
                    start_date=str(year_sos) + start_season,
                    end_date=date_t0)
                input_files = []
                missing_file = False
                for ldate in list_dates:
                    # Append the file to list if it exists ...
                    if os.path.isfile(input_dir + ldate + in_prod_ident):
                        input_files.append(input_dir + ldate + in_prod_ident)
                    # ... otherwise raise a warning and break
                    else:
                        logger.warning(
                            'Missing file for date {0}. Season not computed.'.
                            format(ldate))
                        missing_file = True
                        break

                if not missing_file:
                    yield (input_files, output_file)

    @active_if(activate_cum_comput)
    @files(generate_parameters_cum)
    # Function to do actual computation from inputs to output
    def cum(input_files, output_file):
        # Ensure out subdirectory exists
        functions.check_output_dir(os.path.dirname(output_file))

        # If output_file it is a list, force to a string
        output_file = functions.list_to_element(output_file)

        # Prepare temporary working directory for intermediate results
        tmpdirpath = tempfile.mkdtemp()
        # Cumulated but not masked output
        tmp_output_file = tmpdirpath + os.path.sep + os.path.basename(
            output_file)

        # Call the function for cumulating
        args = {
            "input_file": input_files,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_cumulate(**args)

        # Create from the original mask a new one, by using raster_image_math.do_reprojection()
        # and save it as a temporary mask

        # raster_image_math.do_reproject(agriculture_mask, tmp_reproj_file, 'SPOTV-SADC-1km', mapset)
        raster_image_math.do_reproject(tmp_output_file, output_file,
                                       native_mapset, mapset)

        # Remove temp directory
        shutil.rmtree(tmpdirpath)

    return proc_lists
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None):


    my_date=None
    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    activate_gradient_computation = 1
    #activate_shapefile_conversion = 1

    sds_meta = metadata.SdsMetadata()
    es2_data_dir = es_constants.es2globals['processing_dir']+os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files (chla)
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext)

    input_dir = es2_data_dir+ functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    if my_date:
        starting_files = input_dir+my_date+"*"+in_prod_ident
    else:
        starting_files = input_dir+"*"+in_prod_ident

    #   ---------------------------------------------------------------------
    #   1. Define and customize parameters
    #   ---------------------------------------------------------------------
    #
    # # Default values are from the routine are used if None is passed
    # parameters = {'histogramWindowStride': 16,
    #               'histogramWindowSize': 32,
    #               'minTheta': 0.76,
    #               'minPopProp': 0.25,
    #               'minPopMeanDifference': 20,  # Temperature: 0.45 deg (multiply by 100 !!)
    #               'minSinglePopCohesion': 0.60,
    #               'minImageValue': 1,
    #               'minThreshold': 1}
    # if prod == 'modis-sst':
    #     parameters = {  'histogramWindowStride': None,
    #                     'minTheta' : None,
    #                     'minPopProp' : None,
    #                     'minPopMeanDifference' : None,
    #                     'minSinglePopCohesion' : None,
    #                     'histogramWindowSize' : None,
    #                     'minImageValue' : None,
    #                     'minThreshold' : None }
    #
    # if prod == 'pml-modis-sst':
    #     parameters = {  'histogramWindowSize' : 32,
    #                     'histogramWindowStride': 16,
    #                     'minTheta' : 0.76,
    #                     'minPopProp' : 0.25,
    #                     'minPopMeanDifference' : 20,
    #                     'minSinglePopCohesion' : 0.60,
    #                     'minImageValue' : 1,
    #                     'minThreshold' : 1 }

    #   ---------------------------------------------------------------------
    #   Chal Gradient (raster)
    output_sprod_group=proc_lists.proc_add_subprod_group("gradient")
    output_sprod=proc_lists.proc_add_subprod("gradient", "gradient", final=False,
                                             descriptive_name='Gradient',
                                             description='Gradient',
                                             frequency_id='',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='',
                                             active_default=True)

    prod_ident_gradient = functions.set_path_filename_no_date(prod, output_sprod,mapset, version, ext)
    subdir_gradient = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset)

    formatter_in = "(?P<YYYYMMDD>[0-9]{8})"+in_prod_ident
    formatter_out = ["{subpath[0][5]}"+os.path.sep+subdir_gradient+"{YYYYMMDD[0]}"+prod_ident_gradient]

    @active_if(activate_gradient_computation)
    @transform(starting_files, formatter(formatter_in),formatter_out)
    def gradient_computation(input_file, output_file):

        no_data = int(sds_meta.get_nodata_value(input_file))
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "nodata": no_data,  "output_format": 'GTIFF', "options": "compress = lzw"}

        raster_image_math.do_compute_chla_gradient(**args)
        print ('Done with raster')

    return proc_lists
    def do_standardized_prod(self):
        #   ---------------------------------------------------------------------
        #   Standardized 10d product
        if self.frequency == '10d':

            output_sprod = self.proc_lists.proc_add_subprod(
                "10dzscore",
                "filtered_anomalies",
                final=False,
                descriptive_name='10d Standardized ' + self.subproduct_code,
                description='Z Score, Standardized ' + self.subproduct_code,
                frequency_id='e1dekad',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.starting_sprod,  # '10d',
                # display_index=10,
                active_default=True)

            prod_ident_10dsndvi = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            subdir_10dsndvi = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            input_subprod_10diff = "10ddiff"
            in_prod_ident_10diff = functions.set_path_filename_no_date(
                self.prod, input_subprod_10diff, self.mapset, self.version,
                self.ext)
            input_dir_10diff = self.es2_data_dir + \
                               functions.set_path_sub_directory(self.prod, input_subprod_10diff, 'Derived', self.version, self.mapset)
            self.starting_files_10ddiff = input_dir_10diff + "*" + in_prod_ident_10diff

            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident_10diff
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_10dsndvi + "{YYYY[0]}{MMDD[0]}" + prod_ident_10dsndvi

            ancillary_sprod = "10dstd"
            ancillary_sprod_ident = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod, self.mapset, self.version,
                self.ext)
            ancillary_subdir = functions.set_path_sub_directory(
                self.prod, ancillary_sprod, 'Derived', self.version,
                self.mapset)
            self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
        else:
            output_sprod = self.proc_lists.proc_add_subprod(
                "1monzscore",
                "filtered_anomalies",
                final=False,
                descriptive_name='Monthly Standardized ' +
                self.subproduct_code,
                description='Z Score, Monthly Standardized ' +
                self.subproduct_code,
                frequency_id='e1month',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.input_subprod_monthly,  # '10d',
                # display_index=120,
                active_default=True)

            prod_ident_1monstdprod = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            subdir_1monstdprod = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            input_subprod_mondiff = "1mondiff"
            in_prod_ident_mondiff = functions.set_path_filename_no_date(
                self.prod, input_subprod_mondiff, self.mapset, self.version,
                self.ext)

            input_dir_mondiff = self.es2_data_dir + \
                                functions.set_path_sub_directory(self.prod, input_subprod_mondiff, 'Derived', self.version,
                                                                 self.mapset)

            self.starting_files_mondiff = input_dir_mondiff + "*" + in_prod_ident_mondiff

            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident_mondiff
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + subdir_1monstdprod + "{YYYY[0]}{MMDD[0]}" + prod_ident_1monstdprod

            ancillary_sprod = "1monstd"
            ancillary_sprod_ident = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod, self.mapset, self.version,
                self.ext)
            ancillary_subdir = functions.set_path_sub_directory(
                self.prod, ancillary_sprod, 'Derived', self.version,
                self.mapset)
            self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
    def do_ratio(self):
        # ---------------------------------------------------------------------
        #   10dratio
        if self.frequency == '10d':

            output_sprod = self.proc_lists.proc_add_subprod(
                "10dratio",
                "10anomalies",
                final=False,
                descriptive_name='10d Ratio ' + self.subproduct_code,
                description='10d Ratio (curr/avg) ' + self.subproduct_code,
                frequency_id='e1dekad',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.starting_sprod,  #'10d',
                # display_index=8,
                active_default=True)
            out_prod_ident = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            output_subdir = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            #   Starting files + min + max
            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

            ancillary_sprod_1 = "10davg"
            ancillary_sprod_ident_1 = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod_1, self.mapset, self.version,
                self.ext)
            ancillary_subdir_1 = functions.set_path_sub_directory(
                self.prod, ancillary_sprod_1, 'Derived', self.version,
                self.mapset)
            self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1

        else:
            output_sprod = self.proc_lists.proc_add_subprod(
                "1monratio",
                "filtered_anomalies",
                final=False,
                descriptive_name='Monthly Ratio',
                description='Monthly Ratio (curr/avg)',
                frequency_id='e1month',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.input_subprod_monthly,  # '10d',
                # display_index=118,
                active_default=True)

            prod_ident_ratio_linearx2 = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            subdir_ratio_linearx2 = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly
            self.formatter_out = [
                "{subpath[0][5]}" + os.path.sep + subdir_ratio_linearx2 +
                "{YYYY[0]}{MMDD[0]}" + prod_ident_ratio_linearx2
            ]

            ancillary_sprod = "1monavg"
            ancillary_sprod_ident = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod, self.mapset, self.version,
                self.ext)
            ancillary_subdir = functions.set_path_sub_directory(
                self.prod, ancillary_sprod, 'Derived', self.version,
                self.mapset)
            self.ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident
Exemple #24
0
def reproject_output(input_file,
                     native_mapset_id,
                     target_mapset_id,
                     output_dir=None,
                     version=None,
                     logger=None):

    # Check logger
    if logger is None:
        logger = log.my_logger(__name__)

    # Check output dir
    if output_dir is None:
        output_dir = es_constants.es2globals['processing_dir']

    # Get the existing dates for the dataset
    logger.debug("Entering routine %s for file %s" %
                 ('reproject_output', input_file))
    ext = es_constants.ES2_OUTFILE_EXTENSION

    # Test the file/files exists
    if not os.path.isfile(input_file):
        logger.error('Input file: %s does not exist' % input_file)
        return 1

    # Instance metadata object (for output_file)
    sds_meta_out = metadata.SdsMetadata()

    # Read metadata from input_file
    sds_meta_in = metadata.SdsMetadata()
    sds_meta_in.read_from_file(input_file)

    # Extract info from input file
    str_date = sds_meta_in.get_item('eStation2_date')
    product_code = sds_meta_in.get_item('eStation2_product')
    sub_product_code = sds_meta_in.get_item('eStation2_subProduct')
    # 22.06.2017 Add the option to force the version
    if version is None:
        version = sds_meta_in.get_item('eStation2_product_version')

    # Define output filename
    sub_dir = sds_meta_in.get_item('eStation2_subdir')
    # Fix a bug for 10davg-linearx2 metadata - and make method more robust
    if re.search('.*derived.*', sub_dir):
        product_type = 'Derived'
    elif re.search('.*tif.*', sub_dir):
        product_type = 'Ingest'
    # product_type = functions.get_product_type_from_subdir(sub_dir)

    out_prod_ident = functions.set_path_filename_no_date(
        product_code, sub_product_code, target_mapset_id, version, ext)
    output_subdir = functions.set_path_sub_directory(product_code,
                                                     sub_product_code,
                                                     product_type, version,
                                                     target_mapset_id)

    output_file = output_dir+\
                  output_subdir +\
                  str_date +\
                  out_prod_ident

    # make sure output dir exists
    output_dir = os.path.split(output_file)[0]
    functions.check_output_dir(output_dir)

    # -------------------------------------------------------------------------
    # Manage the geo-referencing associated to input file
    # -------------------------------------------------------------------------
    orig_ds = gdal.Open(input_file, gdal.GA_Update)

    # Read the data type
    band = orig_ds.GetRasterBand(1)
    out_data_type_gdal = band.DataType

    if native_mapset_id != 'default':
        native_mapset = MapSet()
        native_mapset.assigndb(native_mapset_id)
        orig_cs = osr.SpatialReference(
            wkt=native_mapset.spatial_ref.ExportToWkt())

        # Complement orig_ds info (necessary to Re-project)
        try:
            #orig_ds.SetGeoTransform(native_mapset.geo_transform)
            orig_ds.SetProjection(orig_cs.ExportToWkt())
        except:
            logger.debug('Cannot set the geo-projection .. Continue')
    else:
        try:
            # Read geo-reference from input file
            orig_cs = osr.SpatialReference()
            orig_cs.ImportFromWkt(orig_ds.GetProjectionRef())
        except:
            logger.debug('Cannot read geo-reference from file .. Continue')

    # TODO-M.C.: add a test on the mapset-id in DB table !
    trg_mapset = MapSet()
    trg_mapset.assigndb(target_mapset_id)
    logger.debug('Target Mapset is: %s' % target_mapset_id)

    # -------------------------------------------------------------------------
    # Generate the output file
    # -------------------------------------------------------------------------
    # Prepare output driver
    out_driver = gdal.GetDriverByName(es_constants.ES2_OUTFILE_FORMAT)

    logger.debug('Doing re-projection to target mapset: %s' %
                 trg_mapset.short_name)
    # Get target SRS from mapset
    out_cs = trg_mapset.spatial_ref
    out_size_x = trg_mapset.size_x
    out_size_y = trg_mapset.size_y

    # Create target in memory
    mem_driver = gdal.GetDriverByName('MEM')

    # Assign mapset to dataset in memory
    mem_ds = mem_driver.Create('', out_size_x, out_size_y, 1,
                               out_data_type_gdal)

    mem_ds.SetGeoTransform(trg_mapset.geo_transform)
    mem_ds.SetProjection(out_cs.ExportToWkt())

    # Apply Reproject-Image to the memory-driver
    orig_wkt = orig_cs.ExportToWkt()
    res = gdal.ReprojectImage(orig_ds, mem_ds, orig_wkt, out_cs.ExportToWkt(),
                              es_constants.ES2_OUTFILE_INTERP_METHOD)

    logger.debug('Re-projection to target done.')

    # Read from the dataset in memory
    out_data = mem_ds.ReadAsArray()

    # Write to output_file
    trg_ds = out_driver.CreateCopy(output_file, mem_ds, 0,
                                   [es_constants.ES2_OUTFILE_OPTIONS])
    trg_ds.GetRasterBand(1).WriteArray(out_data)

    # -------------------------------------------------------------------------
    # Assign Metadata to the ingested file
    # -------------------------------------------------------------------------
    # Close dataset
    trg_ds = None

    sds_meta_out.assign_es2_version()
    sds_meta_out.assign_mapset(target_mapset_id)
    sds_meta_out.assign_from_product(product_code, sub_product_code, version)
    sds_meta_out.assign_date(str_date)
    sds_meta_out.assign_subdir_from_fullpath(output_dir)
    sds_meta_out.assign_comput_time_now()
    # Copy the same input files as in the non-reprojected input
    file_list = sds_meta_in.get_item('eStation2_input_files')
    sds_meta_out.assign_input_files(file_list)

    # Write metadata to file
    sds_meta_out.write_to_file(output_file)

    # Return the filename
    return output_file
    def do_normalized_anomaly(self):
        #   ---------------------------------------------------------------------
        #   Normalized Anomaly
        if self.frequency == '10d':

            output_sprod = self.proc_lists.proc_add_subprod(
                "10dna",
                "10anomalies",
                final=False,
                descriptive_name='10d Normalized Anomaly ' +
                self.subproduct_code,
                description='10d Normalized Anomaly ' + self.subproduct_code,
                frequency_id='e1dekad',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.starting_sprod,  #'10d',
                # display_index=9,
                active_default=True)
            out_prod_ident = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            output_subdir = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            #   Starting files + min + max
            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

            ancillary_sprod_1 = "10dmin"
            ancillary_sprod_ident_1 = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod_1, self.mapset, self.version,
                self.ext)
            ancillary_subdir_1 = functions.set_path_sub_directory(
                self.prod, ancillary_sprod_1, 'Derived', self.version,
                self.mapset)
            self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1

            ancillary_sprod_2 = "10dmax"
            ancillary_sprod_ident_2 = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod_2, self.mapset, self.version,
                self.ext)
            ancillary_subdir_2 = functions.set_path_sub_directory(
                self.prod, ancillary_sprod_2, 'Derived', self.version,
                self.mapset)
            self.ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2

        else:
            output_sprod = self.proc_lists.proc_add_subprod(
                "1monna",
                "monanomalies",
                final=False,
                descriptive_name='Monthly Normalized Anomaly ' +
                self.subproduct_code,
                description='Monthly Normalized Anomaly ' +
                self.subproduct_code,
                frequency_id='e1month',
                date_format='YYYYMMDD',
                masked=False,
                timeseries_role=self.input_subprod_monthly,  # '10d',
                # display_index=119,
                active_default=True)
            out_prod_ident = functions.set_path_filename_no_date(
                self.prod, output_sprod, self.mapset, self.version, self.ext)
            output_subdir = functions.set_path_sub_directory(
                self.prod, output_sprod, 'Derived', self.version, self.mapset)

            #   Starting files + min + max
            self.formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + self.in_prod_ident_monthly
            self.formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

            ancillary_sprod_1 = "1monmin"
            ancillary_sprod_ident_1 = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod_1, self.mapset, self.version,
                self.ext)
            ancillary_subdir_1 = functions.set_path_sub_directory(
                self.prod, ancillary_sprod_1, 'Derived', self.version,
                self.mapset)
            self.ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1

            ancillary_sprod_2 = "1monmax"
            ancillary_sprod_ident_2 = functions.set_path_filename_no_date(
                self.prod, ancillary_sprod_2, self.mapset, self.version,
                self.ext)
            ancillary_subdir_2 = functions.set_path_sub_directory(
                self.prod, ancillary_sprod_2, 'Derived', self.version,
                self.mapset)
            self.ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2
def create_pipeline(starting_sprod):
    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext)

    input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)
                
    starting_files = input_dir+"*"+in_prod_ident
    # Read input product nodata
    in_prod_info = querydb.get_product_out_info(productcode=prod, subproductcode=starting_sprod, version=version)  
    product_info = functions.list_to_element(in_prod_info)
    in_nodata = product_info.nodata
    
    print in_nodata
    
   #   ---------------------------------------------------------------------
   #   Monthly Average for a given month
    output_sprod="monavg"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)
    
    formatter_in="(?P<YYYYMM>[0-9]{6})[0-9]{2}"+in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident]
   
    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def modis_chla_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        out_filename=os.path.basename(output_file)
        str_date=out_filename[0:6]
        expected_ndays=functions.get_number_days_month(str_date)
        functions.check_output_dir(os.path.dirname(output_file))
        current_ndays=len(input_file)
        if expected_ndays != current_ndays:
            logger.info('Missing days for period: %s. Skip' % str_date)
        else:
            args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
            "options": "compress=lzw", "input_nodata": in_nodata}
            raster_image_math.do_avg_image(**args)
 
    #   ---------------------------------------------------------------------
    #   Monthly Climatology for all years
    new_input_subprod='monavg'
    new_in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, version, ext)
    new_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, new_input_subprod, 'Derived', version, mapset)

    new_starting_files = new_input_dir+"*"+new_in_prod_ident

    output_sprod="monclim"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)
    
    formatter_in="[0-9]{4}(?P<MM>[0-9]{2})"+new_in_prod_ident
    formatter_out=["{subpath[0][5]}"+os.path.sep+output_subdir+"{MM[0]}"+out_prod_ident]

    @active_if(activate_monclim_comput)
    @collate(new_starting_files, formatter(formatter_in),formatter_out)
    def modis_chla_monclim(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
	    "options": "compress=lzw", "input_nodata": in_nodata}
        raster_image_math.do_avg_image(**args)
  
    
   #   ---------------------------------------------------------------------
   #   Monthly Anomaly for a given monthly    
    output_sprod="monanom"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)    
    
    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})"+new_in_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MM[0]}"+out_prod_ident
        
    ancillary_sprod = "monclim"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset,version,ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MM[0]}"+ancillary_sprod_ident

    @active_if(activate_monanom_comput)
    @transform(new_starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def modis_chla_mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_subtraction(**args)
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    my_date = None

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # 1. 10d prod stats
    activate_monavg_comput = 1
    activate_monclim_comput = 1
    activate_monanom_comput = 0

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, starting_sprod, 'Ingest', version, mapset)

    if my_date is not None:
        starting_files = input_dir + my_date + "*" + in_prod_ident
    else:
        starting_files = input_dir + "*" + in_prod_ident

#   ---------------------------------------------------------------------
#   Monthly Average for a given month

    output_sprod_group = proc_lists.proc_add_subprod_group("monstats")
    output_sprod = proc_lists.proc_add_subprod(
        "monavg",
        "monstats",
        final=False,
        descriptive_name='Monthly average',
        description='Chla Monthly average',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" +
        out_prod_ident
    ]

    @active_if(activate_monavg_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def modis_chla_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        out_filename = os.path.basename(output_file)
        str_date = out_filename[0:6]
        expected_ndays = functions.get_number_days_month(str_date)
        functions.check_output_dir(os.path.dirname(output_file))
        current_ndays = len(input_file)

        # if expected_ndays != current_ndays:
        #     logger.info('Missing days for period: %s. Skip' % str_date)
        # else:
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Climatology for all years

    output_sprod = proc_lists.proc_add_subprod(
        "monclim",
        "monstats",
        final=False,
        descriptive_name='Monthly climatology',
        description='Chla Monthly climatology',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    new_input_subprod = 'monavg'
    new_in_prod_ident = functions.set_path_filename_no_date(
        prod, new_input_subprod, mapset, version, ext)
    new_input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, new_input_subprod, 'Derived', version, mapset)

    new_starting_files = new_input_dir + "*" + new_in_prod_ident

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MM>[0-9]{2})" + new_in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MM[0]}" +
        out_prod_ident
    ]

    @active_if(activate_monclim_comput)
    @collate(new_starting_files, formatter(formatter_in), formatter_out)
    def modis_chla_monclim(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
     "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)

#   ---------------------------------------------------------------------
#   Monthly Anomaly for a given monthly

    output_sprod = proc_lists.proc_add_subprod(
        "monanom",
        "monstats",
        final=False,
        descriptive_name='Monthly anomaly',
        description='Chla Monthly anomaly',
        frequency_id='',
        date_format='YYYMMMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MM>[0-9]{2})" + new_in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MM[0]}" + out_prod_ident

    ancillary_sprod = "monclim"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MM[0]}" + ancillary_sprod_ident

    @active_if(activate_monanom_comput)
    @transform(new_starting_files, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    def modis_chla_mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_oper_subtraction(**args)

    return proc_lists
Exemple #28
0
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    update_stats=False,
                    nrt_products=True):

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all off
    activate_10dstats_comput = 0  # 10d stats
    activate_10danomalies_comput = 0  # 10d anomalies

    #   switch wrt groups - according to options
    if nrt_products:
        activate_10dcount_comput = 1  # 10d anomalies
        activate_10danomalies_comput = 1  # monthly anomalies

    if update_stats:
        activate_10dstats_comput = 1  # 10d stats

    #   switch wrt single products: not to be changed !!
    activate_10dcount_comput = 1  # 10d count

    activate_10dcountavg_comput = 1
    activate_10dcountmin_comput = 1
    activate_10dcountmax_comput = 1

    activate_10ddiff_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    #logger.debug('Base data directory is: %s' % es2_data_dir)
    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    #   ---------------------------------------------------------------------
    #   10dcount

    output_sprod_group = proc_lists.proc_add_subprod_group("10dcount")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcount",
        "10dcount",
        final=False,
        descriptive_name='10d Count',
        description='Fire Count for dekad',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident_10dcount = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir_10dcount = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    def generate_parameters_10dcount():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files)
        dekad_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for dekad in dekad_list:
            # Exclude the current dekad
            if dekad != dekad_now:
                file_list = []
                my_dekad_str = functions.conv_dekad_2_date(dekad)
                for input_file in input_files:

                    basename = os.path.basename(input_file)
                    mydate_yyyymmdd = functions.get_date_from_path_filename(
                        basename)
                    mydekad_nbr = functions.conv_date_2_dekad(
                        mydate_yyyymmdd[0:8])
                    if mydekad_nbr == dekad:
                        file_list.append(input_file)

                    output_file = es_constants.processing_dir + output_subdir_10dcount + os.path.sep + my_dekad_str + out_prod_ident_10dcount

                yield (file_list, output_file)

    @active_if(activate_10dcount_comput)
    @files(generate_parameters_10dcount)
    def std_precip_10dcount(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_cumulate(**args)

    #   ---------------------------------------------------------------------
    #   10dcountavg

    starting_files_10dcount = es_constants.processing_dir + output_subdir_10dcount + "*" + out_prod_ident_10dcount

    output_sprod_group = proc_lists.proc_add_subprod_group("10dstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10dcountavg",
        "10dstats",
        final=False,
        descriptive_name='10d Fire Average',
        description='Average fire for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountavg_comput)
    @collate(starting_files_10dcount, formatter(formatter_in), formatter_out)
    def std_precip_10davg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            'output_type': 'Float32',
            'input_nodata': -32767
        }
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   10dcountmin

    output_sprod = proc_lists.proc_add_subprod(
        "10dcountmin",
        "10dstats",
        final=False,
        descriptive_name='10d Fire Minimum',
        description='Minimum Fire for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountmin_comput)
    @collate(starting_files_10dcount, formatter(formatter_in), formatter_out)
    def std_precip_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   10dcountmax
    output_sprod = proc_lists.proc_add_subprod(
        "10dcountmax",
        "10dstats",
        final=False,
        descriptive_name='10d Maximum',
        description='Maximum rainfall for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + out_prod_ident_10dcount
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dcountmax_comput)
    @collate(starting_files_10dcount, formatter(formatter_in), formatter_out)
    def std_precip_10dmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_max_image(**args)

    # #   ---------------------------------------------------------------------
    # #   10dDiff
    # output_sprod_group=proc_lists.proc_add_subprod_group("10danomalies")
    # output_sprod=proc_lists.proc_add_subprod("10dcountdiff", "10danomalies", final=False,
    #                                          descriptive_name='10d Absolute Difference',
    #                                          description='10d Absolute Difference vs. LTA',
    #                                          frequency_id='e1dekad',
    #                                          date_format='YYYYMMDD',
    #                                          masked=False,
    #                                          timeseries_role='10d',
    #                                          active_default=True)
    # out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    # output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)
    #
    # #   Starting files + avg
    # formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    # formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident
    #
    # ancillary_sprod = "10davg"
    # ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, version, ext)
    # ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    # ancillary_input="{subpath[0][5]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident
    #
    # @follows(std_precip_10davg)
    # @active_if(activate_10danomalies_comput, activate_10ddiff_comput)
    # @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    # def std_precip_10ddiff(input_file, output_file):
    #
    #     output_file = functions.list_to_element(output_file)
    #     functions.check_output_dir(os.path.dirname(output_file))
    #     args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
    #     raster_image_math.do_oper_subtraction(**args)

    return proc_lists
Exemple #29
0
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    # Definitions
    start_season = '0901'
    second_dekad = '0911'
    end_season = '0421'

    #   ---------------------------------------------------------------------
    #   Create lists

    if proc_lists is None:
        proc_lists = functions.ProcLists()

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files (10d)
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)
    input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, starting_sprod, 'Ingest', version, mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = glob.glob(input_dir + "*" + in_prod_ident)

    #   ---------------------------------------------------------------------
    #   Define output files (onset)

    output_sprod = proc_lists.proc_add_subprod(
        "rain-onset",
        "none",
        final=False,
        descriptive_name='Rain Onset',
        description='Rainfall Start of the season',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    prod_ident_onset = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    subdir_onset = functions.set_path_sub_directory(prod, output_sprod,
                                                    'Derived', version, mapset)

    def generate_parameters_onset():

        starting_files.sort()

        for file_t0 in starting_files:
            # Get current date
            date_t0 = functions.get_date_from_path_full(file_t0)
            # Check if we are in the seasonal range [start < current <= end]
            dekad_t0 = int(date_t0[4:])
            dekad_start = int(start_season)
            dekad_second = int(second_dekad)
            dekad_end = int(end_season)

            # Initialize processing to 0
            do_proc = 0
            in_season = False

            # Check we are within the season -> do_proc
            if dekad_start < dekad_end:
                if dekad_t0 > dekad_start and dekad_t0 <= dekad_end:
                    in_season = True
            else:
                if dekad_t0 > dekad_start or dekad_t0 <= dekad_end:
                    in_season = True
            if in_season and (dekad_t0 == dekad_second):
                do_proc = 1
            if in_season and (dekad_t0 != dekad_second):
                do_proc = 2

            if do_proc:

                output_file = es2_data_dir + subdir_onset + str(
                    date_t0) + prod_ident_onset
                # Get files at t-1 and t-2 (if they exist)
                previous_files = functions.previous_files(file_t0)

                # Check if at least one previous file has been identified
                if do_proc == 1:

                    # Check at least 1 previous file exist
                    if len(previous_files) < 1:
                        print('Error Case 1: no any previous file')
                    else:
                        # Pass two arguments (t0 and t0-1)
                        input_files = [file_t0, previous_files[0]]
                        yield (input_files, output_file)

                elif do_proc == 2:

                    error = False
                    # Check 2 previous files exist
                    if len(previous_files) < 2:
                        print('Error Case 2: a previous file is missing')
                        error = True

                    # Look for previous output
                    previous_outputs = functions.previous_files(output_file)

                    if len(previous_outputs) < 1:
                        print('Error Case 2: the previous output is missing')
                        error = True

                    # Pass four arguments (t0, t0-1, t0-2 and output-1)
                    if not error:
                        previous_output = previous_outputs[0]
                        if os.path.isfile(previous_output):
                            input_files = [
                                file_t0, previous_files[0], previous_files[1],
                                previous_output
                            ]
                            yield (input_files, output_file)

    @active_if(activate_onset_comput)
    @files(generate_parameters_onset)
    def rain_onset(input_files, output_file):
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        # Need to define the current_dekad number, wrt begin of season
        current_date = functions.get_date_from_path_full(output_file)
        current_dekad = current_date[4:]
        dekad_number = functions.dekad_nbr_in_season(current_dekad,
                                                     start_season)

        # Call the function
        args = {
            "input_file": input_files,
            "output_file": output_file,
            'input_nodata': None,
            'output_nodata': None,
            'output_type': 'Int16',
            "output_format": 'GTIFF',
            "options": "compress = lzw",
            'current_dekad': dekad_number
        }
        raster_image_math.do_rain_onset(**args)
Exemple #30
0
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    nrt_products=True,
                    logger=None):

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all ON
    activate_3davg_comput = 1
    activate_1monavg_comput = 1

    sds_meta = metadata.SdsMetadata()
    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files (chl)
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)
    input_dir = es2_data_dir + functions.set_path_sub_directory(
        prod, starting_sprod, 'Ingest', version, mapset)
    starting_files = input_dir + "*" + in_prod_ident

    # ----------------------------------------------------------------------------------------------------------------
    # 1 . 3davg
    # 3 Day average of the 1 day Chl, re-projected on target mapset
    output_sprod = proc_lists.proc_add_subprod("3dayavg",
                                               prod,
                                               final=False,
                                               descriptive_name='3day Avg',
                                               description='3 day Average',
                                               frequency_id='e1day',
                                               date_format='YYYYMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)

    prod_ident_3davg = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    subdir_3davg = functions.set_path_sub_directory(prod, output_sprod,
                                                    'Derived', version, mapset)

    # Use a specific function, to skip the current day
    def generate_parameters_3davg():

        #   Look for all input files in input_dir, and sort them
        if starting_dates is not None:
            input_files = []
            for my_date in starting_dates:
                input_files.append(input_dir + my_date + in_prod_ident)
        else:
            starting_files = input_dir + "*" + in_prod_ident
            input_files = glob.glob(starting_files)

        logger.debug("starting_files %s" % input_files)

        day_list = []

        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            if mydate_yyyymmdd not in day_list:
                day_list.append(mydate_yyyymmdd)

        day_list = sorted(day_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.today()
        yesterday = today - timedelta(1)
        today_str = today.strftime('%Y%m%d')
        yesterday_str = yesterday.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        for myday in day_list:
            # Exclude the current day and yesterday
            #if myday != today_str or myday != yesterday_str:

            #some_list = ['abc-123', 'def-456', 'ghi-789', 'abc-456']
            input_file = [s for s in input_files if myday in s]
            file_list = []
            #for input_file in input_files:
            #for i, input_file in enumerate(input_files, 1):

            basename = os.path.basename(input_file[0])
            # Date is in format YYYYMMDD
            mydate_yyyymmdd = functions.get_date_from_path_filename(basename)

            #if mydate_yyyymmdd != day_list[i]:
            yyyy = int(mydate_yyyymmdd[0:4])
            mm = int(mydate_yyyymmdd[4:6])
            dd = int(mydate_yyyymmdd[6:8])
            day2 = datetime(yyyy, mm, dd) + timedelta(1)
            day2_filepath = input_dir + day2.strftime('%Y%m%d') + in_prod_ident
            if not functions.is_file_exists_in_path(day2_filepath):
                continue

            day3 = datetime(yyyy, mm, dd) + timedelta(2)
            day3_filepath = input_dir + day3.strftime('%Y%m%d') + in_prod_ident
            if not functions.is_file_exists_in_path(day3_filepath):
                continue

            file_list.append(input_file[0])
            file_list.append(day2_filepath)
            file_list.append(day3_filepath)

            output_file = es_constants.processing_dir + subdir_3davg + os.path.sep + mydate_yyyymmdd + prod_ident_3davg
            file_list = sorted(file_list)
            # Check here the number of missing files (for optimization)
            if len(file_list) == 3:
                yield (file_list, output_file)

    @active_if(activate_3davg_comput)
    @files(generate_parameters_3davg)
    def compute_3dayavg(input_file, output_file):

        no_data = int(sds_meta.get_nodata_value(input_file[0]))
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "",
            "input_nodata": no_data,
            "output_nodata": no_data
        }
        raster_image_math.do_avg_image(**args)

    return proc_lists
Exemple #31
0
def processing_merge(pipeline_run_level=0, pipeline_printout_level=0,
                     input_products='', output_product='', mapset='', logfile=None):

    if logfile:
        spec_logger = log.my_logger(logfile)
        spec_logger.info("Entering routine %s" % 'processing_merge')

    # Dummy return arguments
    proc_lists = functions.ProcLists()
    list_subprods = proc_lists.list_subprods
    list_subprod_groups = proc_lists.list_subprod_groups

    es2_data_dir = es_constants.processing_dir+os.path.sep

    # Do some checks on the integrity of the inputs

    # Manage output_product data
    out_product_code = output_product[0].productcode
    out_sub_product_code = output_product[0].subproductcode
    out_version = output_product[0].version
    out_mapset = output_product[0].mapsetcode

    out_subdir = functions.set_path_sub_directory(out_product_code, out_sub_product_code,'Ingest', out_version, out_mapset)
    out_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, out_version, ext)
    out_dir = es2_data_dir + out_subdir

    # Check the output product directory
    functions.check_output_dir(out_dir)
    # Fill the processing list -> some fields to be taken by innput products
    output_sprod_group=proc_lists.proc_add_subprod_group("merged")
    output_sprod=proc_lists.proc_add_subprod(out_sub_product_code, "merged", final=False,
                                             descriptive_name='undefined',
                                             description='undefined',
                                             frequency_id='e1dekad',
                                             date_format='YYYYMMDD',
                                             masked=False,
                                             timeseries_role='10d',
                                             active_default=True)

    # Loop over the input products:
    for input in input_products:

        # Extract info from input product
        product_code = input.productcode
        sub_product_code = input.subproductcode
        version = input.version
        start_date = input.start_date
        end_date = input.end_date
        product_info = querydb.get_product_out_info_connect(productcode=product_code,
                                                   subproductcode=sub_product_code,
                                                   version=version)
        prod_type = product_info[0].product_type

        in_subdir = functions.set_path_sub_directory(product_code, sub_product_code, prod_type, version, out_mapset)
        in_prod_ident = functions.set_path_filename_no_date(out_product_code, out_sub_product_code, out_mapset, version, ext)

        # Create the list of dates -> returns empty if start==end==None
        list_dates = proc_functions.get_list_dates_for_dataset(product_code, sub_product_code, version,
                                                               start_date=start_date, end_date=end_date)
        # If list_dates == None, look at all existing files
        if list_dates is None:
            print ('To be Done !!!')
        # Otherwise, build list of files from list of dates
        else:
            for my_date in list_dates:
                in_file_path = es2_data_dir + in_subdir + my_date + in_prod_ident
                out_file_path = out_dir+my_date+out_prod_ident

                # Create the link
                status = functions.create_sym_link(in_file_path, out_file_path, force=False)
                if status == 0 and logfile:
                    spec_logger.info("Merged file %s created" % out_file_path)

    return list_subprods, list_subprod_groups
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None,
                    update_stats=False,
                    nrt_products=True):

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    #   switch wrt groups - according to options

    # DEFAULT: ALL off

    activate_10dstats_comput = 0  # 10d stats
    activate_10danomalies_comput = 0  # 10d anomalies

    activate_monthly_comput = 0  # monthly cumulation
    activate_monstats_comput = 0  # monthly stats
    activate_monanomalies_comput = 0  # monthly anomalies

    if nrt_products:
        activate_monthly_comput = 0  # monthly cumulation
        activate_monanomalies_comput = 0  # monthly anomalies
        activate_10danomalies_comput = 1  # 2.d

    if update_stats:
        activate_10dstats_comput = 1  # 10d stats
        activate_monstats_comput = 0  # monthly stats

    #   switch wrt single products: not to be changed !!
    activate_10davg_comput = 1
    activate_10dmin_comput = 1
    activate_10dmax_comput = 1
    activate_10ddiff_comput = 1
    activate_10dperc_comput = 1
    activate_10dnp_comput = 0
    activate_10dratio_comput = 1

    activate_1moncum_comput = 1
    activate_1monavg_comput = 1
    activate_1monmin_comput = 1
    activate_1monmax_comput = 1
    activate_1mondiff_comput = 1
    activate_1monperc_comput = 1
    activate_1monnp_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    #logger.debug('Base data directory is: %s' % es2_data_dir)
    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    #   ---------------------------------------------------------------------
    #   Average
    output_sprod_group = proc_lists.proc_add_subprod_group("10dstats")
    output_sprod = proc_lists.proc_add_subprod(
        "10davg",
        "10dstats",
        final=False,
        descriptive_name='10d Average',
        description='Average dry matter productivity for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10davg_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def std_dmp_10davg(input_file, output_file):

        reduced_list = exclude_current_year(input_file)
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": reduced_list,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Minimum
    output_sprod = proc_lists.proc_add_subprod(
        "10dmin",
        "10dstats",
        final=False,
        descriptive_name='10d Minimum',
        description='Minimum DMP for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dmin_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def std_dmp_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        reduced_list = exclude_current_year(input_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": reduced_list,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Maximum
    output_sprod = proc_lists.proc_add_subprod(
        "10dmax",
        "10dstats",
        final=False,
        descriptive_name='10d Maximum',
        description='Maximum DMP for dekad',
        frequency_id='e1dekad',
        date_format='MMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir + "{MMDD[0]}" +
        out_prod_ident
    ]

    @active_if(activate_10dstats_comput, activate_10dmax_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def std_dmp_10dmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        reduced_list = exclude_current_year(input_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": reduced_list,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   10dDiff
    output_sprod_group = proc_lists.proc_add_subprod_group("10anomalies")
    output_sprod = proc_lists.proc_add_subprod(
        "10ddiff",
        "10anomalies",
        final=False,
        descriptive_name='10d Absolute Difference',
        description='10d Absolute Difference vs. LTA',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @active_if(activate_10danomalies_comput, activate_10ddiff_comput)
    @transform(starting_files, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    def std_dmp_10ddiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   10dperc
    output_sprod = proc_lists.proc_add_subprod(
        "10dperc",
        "10anomalies",
        final=False,
        descriptive_name='10d Percent Difference',
        description='10d Percent Difference vs. LTA',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + avg
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(
        prod, ancillary_sprod, mapset, version, ext)
    ancillary_subdir = functions.set_path_sub_directory(
        prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input = "{subpath[0][5]}" + os.path.sep + ancillary_subdir + "{MMDD[0]}" + ancillary_sprod_ident

    @follows(std_dmp_10davg)
    @active_if(activate_10danomalies_comput, activate_10dperc_comput)
    @transform(starting_files, formatter(formatter_in),
               add_inputs(ancillary_input), formatter_out)
    def std_dmp_10dperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file[0],
            "avg_file": input_file[1],
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   10dnp
    output_sprod = proc_lists.proc_add_subprod(
        "10dnp",
        "10anomalies",
        final=False,
        descriptive_name='10d Normalized Anomaly',
        description='10d Normalized Anomaly',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + min + max
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod_1 = "10dmin"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(
        prod, ancillary_sprod_1, mapset, version, ext)
    ancillary_subdir_1 = functions.set_path_sub_directory(
        prod, ancillary_sprod_1, 'Derived', version, mapset)
    ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1

    ancillary_sprod_2 = "10dmax"
    ancillary_sprod_ident_2 = functions.set_path_filename_no_date(
        prod, ancillary_sprod_2, mapset, version, ext)
    ancillary_subdir_2 = functions.set_path_sub_directory(
        prod, ancillary_sprod_2, 'Derived', version, mapset)
    ancillary_input_2 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_2 + "{MMDD[0]}" + ancillary_sprod_ident_2

    @active_if(activate_10danomalies_comput, activate_10dnp_comput)
    @transform(starting_files, formatter(formatter_in),
               add_inputs(ancillary_input_1, ancillary_input_2), formatter_out)
    def std_dmp_10dnp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file[0],
            "min_file": input_file[1],
            "max_file": input_file[2],
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_make_vci(**args)

    #   ---------------------------------------------------------------------
    #   10dratio
    output_sprod = proc_lists.proc_add_subprod(
        "10dratio",
        "10anomalies",
        final=False,
        descriptive_name='10d Ratio',
        description='10d Ratio (curr/avg)',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='10d',
        active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     mapset)

    #   Starting files + min + max
    formatter_in = "(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYY[0]}{MMDD[0]}" + out_prod_ident

    ancillary_sprod_1 = "10davg"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(
        prod, ancillary_sprod_1, mapset, version, ext)
    ancillary_subdir_1 = functions.set_path_sub_directory(
        prod, ancillary_sprod_1, 'Derived', version, mapset)
    ancillary_input_1 = "{subpath[0][5]}" + os.path.sep + ancillary_subdir_1 + "{MMDD[0]}" + ancillary_sprod_ident_1

    @active_if(activate_10danomalies_comput, activate_10dratio_comput)
    @transform(starting_files, formatter(formatter_in),
               add_inputs(ancillary_input_1), formatter_out)
    def std_dmp_10dratio(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))

        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress = lzw"
        }
        raster_image_math.do_oper_division_perc(**args)

    return proc_lists
def create_pipeline(prod,
                    starting_sprod,
                    mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    #   ---------------------------------------------------------------------
    #   Create lists to store definition of the derived products, and their
    #   groups
    #   ---------------------------------------------------------------------

    if proc_lists is None:
        proc_lists = functions.ProcLists()

    #   ---------------------------------------------------------------------
    #   Define and assign the flags to control the individual derived products
    #   and the groups. NOT to be changed by the User
    #   ---------------------------------------------------------------------

    # Set DEFAULTS: all off
    activate_cumul_comput = 1  # cumulated products
    activate_spi_comput = 1  # spi indicators

    # Set DEFAULTS: all off
    activate_cumul_3mon_comput = 1  # cumulated product 3mon
    activate_cumul_6mon_comput = 1  # cumulated product 6mon
    activate_cumul_1year_comput = 1  # cumulated product 1year

    activate_spi_1mon_comput = 1  # spi indicator 1mon
    activate_spi_3mon_comput = 1  # spi indicator 3mon
    activate_spi_6mon_comput = 1  # spi indicator 6mon
    activate_spi_1year_comput = 1  # spi indicator 1year

    #   switch wrt groups - according to options
    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files from the starting_sprod and starting_dates arguments
    #   ---------------------------------------------------------------------

    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, mapset, version, ext)

    #logger.debug('Base data directory is: %s' % es2_data_dir)
    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Derived', version, mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            if os.path.isfile(input_dir + my_date + in_prod_ident):
                starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    #   Look for all input files in input_dir, and sort them
    if starting_dates is not None:
        input_files = starting_files
    else:
        input_files = glob.glob(starting_files)

    #   ---------------------------------------------------------------------
    #   Cumulated products - 3mon
    #   ---------------------------------------------------------------------
    output_sprod_group = proc_lists.proc_add_subprod_group("cumul")
    output_sprod = proc_lists.proc_add_subprod(
        "3mon",
        "cumul",
        final=False,
        descriptive_name='3-monthly Precipitation',
        description='Precipitation for 3 months',
        frequency_id='e3month',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_3moncum = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir_3moncum = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    def generate_parameters_3moncum():

        # Number of months to consider
        n_mon = 3
        dates_list = []

        # Extract and sort all dates
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            dates_list.append(mydate_yyyymmdd)

        dates_list = sorted(dates_list)
        # loop from the 'n_mon'-1 date to the last date - this is the period end-limit
        for date_index in range(n_mon - 1, len(dates_list) - 1):

            mydate = dates_list[date_index]
            prev_date = dates_list[date_index - n_mon + 1]
            file_list = []
            # Get month-date and
            m_1 = datetime.date(int(mydate[0:4]), int(mydate[4:6]), 1)
            m_2 = datetime.date(int(prev_date[0:4]), int(prev_date[4:6]), 1)
            delta = m_1 - m_2
            # Check there are no missing month, i.e. tot_delta < 155 days
            if delta.days <= (31 * (n_mon - 1)):
                for curr_index in range(0, n_mon):
                    curr_date = dates_list[date_index - curr_index]
                    if os.path.isfile(input_dir + curr_date + in_prod_ident):
                        file_list.append(input_dir + curr_date + in_prod_ident)

                output_file = es_constants.processing_dir + output_subdir_3moncum + os.path.sep + mydate + out_prod_ident_3moncum
                yield (file_list, output_file)
            else:
                print(
                    'At least 1 month is missing for period ending {0}'.format(
                        mydate))

    @active_if(activate_cumul_3mon_comput)
    @files(generate_parameters_3moncum)
    def std_precip_3moncum(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw"
        }
        raster_image_math.do_cumulate(**args)

    # #   ---------------------------------------------------------------------
    # #   Cumulated products - 6mon
    # #   ---------------------------------------------------------------------
    #
    # output_sprod_group=proc_lists.proc_add_subprod_group("cumul")
    # output_sprod=proc_lists.proc_add_subprod("6mon", "cumul", final=False,
    #                                          descriptive_name='3-monthly Precipitation',
    #                                          description='Precipitation for 3 months',
    #                                          frequency_id='e3month',
    #                                          date_format='YYYYMMDD',
    #                                          masked=False,
    #                                          timeseries_role='',
    #                                          active_default=True)
    #
    # out_prod_ident_6moncum = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    # output_subdir_6moncum  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)
    #
    # def generate_parameters_6moncum():
    #
    #     # Number of months to consider
    #     n_mon = 6
    #     dates_list = []
    #
    #     # Extract and sort all dates
    #     for input_file in input_files:
    #         basename=os.path.basename(input_file)
    #         mydate=functions.get_date_from_path_filename(basename)
    #         mydate_yyyymmdd=str(mydate)[0:8]
    #         dates_list.append(mydate_yyyymmdd)
    #
    #     dates_list = sorted(dates_list)
    #     # loop from the 'n_mon'-1 date to the last date - this is the period end-limit
    #     for date_index in range(n_mon-1,len(dates_list)-1):
    #
    #         mydate = dates_list[date_index]
    #         prev_date = dates_list[date_index-n_mon+1]
    #         file_list = []
    #         # Get month-date and
    #         m_1 = datetime.date(int(mydate[0:4]),int(mydate[4:6]),1)
    #         m_2 = datetime.date(int(prev_date[0:4]),int(prev_date[4:6]),1)
    #         delta = m_1 - m_2
    #         # Check there are no missing month, i.e. tot_delta < 155 days
    #         if delta.days <=(31*(n_mon-1)):
    #             for curr_index in range(0,n_mon):
    #                 curr_date = dates_list[date_index-curr_index]
    #                 if os.path.isfile(input_dir+curr_date+in_prod_ident):
    #                     file_list.append(input_dir+curr_date+in_prod_ident)
    #
    #             output_file=es_constants.processing_dir+output_subdir_6moncum+os.path.sep+mydate+out_prod_ident_6moncum
    #             yield (file_list, output_file)
    #         else:
    #             print 'At least 1 month is missing for period ending {0}'.format(mydate)
    #
    # @active_if(activate_cumul_6mon_comput)
    # @files(generate_parameters_6moncum)
    # def std_precip_6moncum(input_file, output_file):
    #
    #     output_file = functions.list_to_element(output_file)
    #     functions.check_output_dir(os.path.dirname(output_file))
    #     args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
    #     raster_image_math.do_cumulate(**args)

    # End of pipeline definition
    return proc_lists
Exemple #34
0
def create_pipeline(input_products,
                    output_product,
                    logfile=None,
                    nrt_products=True,
                    update_stats=False):
    proc_lists = None

    if proc_lists is None:
        proc_lists = functions.ProcLists()

    spec_logger = log.my_logger(logfile)
    spec_logger.info("Entering routine %s" % 'processing_modis_pp')

    # Set DEFAULTS: all off
    activate_pp_comput = 0  # PP from Chla, SST, Kd490 and PAR

    activate_stats_comput = 0  # Stats computation (inter-annual clim, min, max)
    activate_anomalies_comput = 0  # Anomalies computation (not yet done!!)

    #   switch wrt groups - according to options
    if nrt_products:
        activate_pp_comput = 1  # PP from Chla, SST, Kd490 and PAR

    if update_stats:
        activate_stats_comput = 1
        activate_anomalies_comput = 1

    activate_pp_prod_comput = 1
    activate_pp_stats_clim_comput = 1
    activate_pp_stats_min_comput = 1
    activate_pp_stats_max_comput = 1

    #   ---------------------------------------------------------------------
    #   Create lists

    # my_date='20160601'
    my_date = ''
    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #    Parse the arguments and extract the 4 input variables
    #
    if len(input_products) != 4:
        spec_logger.error('Modis PP computation requires 4 inputs. Exit')
        return 1

    found_chla = False
    found_sst = False
    found_par = False
    found_kd490 = False

    for input_product in input_products:

        if re.search('.*chla.*', input_product.productcode):
            found_chla = True
            chla_prod = input_product.productcode
            chla_version = input_product.version
            chla_sprod = input_product.subproductcode
            chla_mapset = input_product.mapsetcode
            chla_prod_ident = functions.set_path_filename_no_date(
                chla_prod, chla_sprod, chla_mapset, chla_version, ext)
            chla_input_dir = es2_data_dir + \
                             functions.set_path_sub_directory(chla_prod, chla_sprod, 'Derived', chla_version,
                                                              chla_mapset)

        if re.search('.*sst.*', input_product.productcode):
            found_sst = True
            sst_prod = input_product.productcode
            sst_version = input_product.version
            sst_sprod = input_product.subproductcode
            sst_mapset = input_product.mapsetcode
            sst_prod_ident = functions.set_path_filename_no_date(
                sst_prod, sst_sprod, sst_mapset, sst_version, ext)
            sst_input_dir = es2_data_dir + \
                            functions.set_path_sub_directory(sst_prod, sst_sprod, 'Derived', sst_version, sst_mapset)

        if re.search('.*kd490.*', input_product.productcode):
            found_kd490 = True
            kd490_prod = input_product.productcode
            kd490_version = input_product.version
            kd490_sprod = input_product.subproductcode
            kd490_mapset = input_product.mapsetcode
            kd490_prod_ident = functions.set_path_filename_no_date(
                kd490_prod, kd490_sprod, kd490_mapset, kd490_version, ext)
            kd490_input_dir = es2_data_dir + \
                              functions.set_path_sub_directory(kd490_prod, kd490_sprod, 'Derived', kd490_version,
                                                               kd490_mapset)

        if re.search('.*par.*', input_product.productcode):
            found_par = True
            par_prod = input_product.productcode
            par_version = input_product.version
            par_sprod = input_product.subproductcode
            par_mapset = input_product.mapsetcode
            par_prod_ident = functions.set_path_filename_no_date(
                par_prod, par_sprod, par_mapset, par_version, ext)
            par_input_dir = es2_data_dir + \
                            functions.set_path_sub_directory(par_prod, par_sprod, 'Derived', par_version, par_mapset)

    # Check consistency of inputs
    if not (found_chla) or not (found_kd490) or not (found_par) or not (
            found_sst):
        spec_logger.error('At least one of 4 expected inputs missing. Exit')
        return 1

    if chla_mapset != sst_mapset or chla_mapset != kd490_mapset or chla_mapset != par_mapset:
        spec_logger.error('All 4 input mapset must be equals. Exit')
        return 1

    # Read input product nodata

    chla_prod_info = querydb.get_product_out_info(productcode=chla_prod,
                                                  subproductcode=chla_sprod,
                                                  version=chla_version)
    chla_product_info = functions.list_to_element(chla_prod_info)
    chla_nodata = chla_product_info.nodata
    chla_frequency = chla_product_info.frequency_id

    sst_prod_info = querydb.get_product_out_info(productcode=sst_prod,
                                                 subproductcode=sst_sprod,
                                                 version=sst_version)
    sst_product_info = functions.list_to_element(sst_prod_info)
    sst_nodata = sst_product_info.nodata

    kd_prod_info = querydb.get_product_out_info(productcode=kd490_prod,
                                                subproductcode=kd490_sprod,
                                                version=kd490_version)
    kd_product_info = functions.list_to_element(kd_prod_info)
    kd_nodata = kd_product_info.nodata

    par_prod_info = querydb.get_product_out_info(productcode=par_prod,
                                                 subproductcode=par_sprod,
                                                 version=par_version)
    par_product_info = functions.list_to_element(par_prod_info)
    par_nodata = par_product_info.nodata

    # Define input files
    # if starting_dates is not None:
    #     starting_files = []
    #     for my_date in starting_dates:
    #         starting_files.append(input_dir+my_date+in_prod_ident)
    # else:
    #     starting_files=input_dir+"*"+in_prod_ident

    # Define outputs

    output_nodata = -32767

    old = False

    # NOTE: the prod/mapset/version are taken from the FIRST OUTPUT passed
    #       subprod defined according to the frequency

    output_prod = output_product[0].productcode
    output_version = output_product[0].version
    output_mapset = output_product[0].mapsetcode

    if old:
        # Get the first output -> PP subproduct generated (8daysavg or monavg)
        output_sprod = output_product[0].subproductcode
    else:
        # Define the outputs according to the frequency (method in 'functions' to be created !!)
        if chla_frequency == 'e1month':
            frequency_string = 'monthly'
            output_sprod = 'monavg'
            output_sprod_clim = '1monclim'
            output_sprod_min = '1monmin'
            output_sprod_max = '1monmax'
            sub_product_group = '1monstat'
        elif chla_frequency == 'e1modis8day':
            frequency_string = '8 days'
            output_sprod = '8daysavg'
            activate_pp_stats_clim_comput = 1
            activate_pp_stats_min_comput = 1
            activate_pp_stats_max_comput = 1
            sub_product_group = '8daysstat'
            output_sprod_clim = '8daysclim'
            output_sprod_min = '8daysmin'
            output_sprod_max = '8daysmax'
        else:
            spec_logger.error('Frequency not recognized: %s. Exit!',
                              chla_frequency)
            return

    out_prod_ident = functions.set_path_filename_no_date(
        output_prod, output_sprod, output_mapset, output_version, ext)
    output_subdir = functions.set_path_sub_directory(output_prod, output_sprod,
                                                     'Derived', output_version,
                                                     output_mapset)

    # Fixes ES2-36
    def generate_input_files_pp():

        # Take kd490 as starting point
        kd_files = kd490_input_dir + my_date + "*" + kd490_prod_ident
        input_files = sorted(glob.glob(kd_files))

        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)

            ancillary_chla = chla_input_dir + mydate + chla_prod_ident
            ancillary_par = par_input_dir + mydate + par_prod_ident
            ancillary_sst = sst_input_dir + mydate + sst_prod_ident

            do_comp = True
            if not os.path.isfile(ancillary_chla):
                do_comp = False
            if not os.path.isfile(ancillary_par):
                do_comp = False
            if not os.path.isfile(ancillary_sst):
                do_comp = False

            if do_comp is True:
                output_file = es_constants.processing_dir + output_subdir + os.path.sep + mydate + out_prod_ident
                my_inputs = (input_file, ancillary_chla, ancillary_par,
                             ancillary_sst)
                yield (my_inputs, output_file)

    @active_if(activate_pp_comput)
    @files(generate_input_files_pp)
    def modis_pp_comp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],
                "par_file": input_file[2], \
                "sst_nodata": sst_nodata, "kd_nodata": kd_nodata, "chla_nodata": chla_nodata, \
                "par_nodata": par_nodata, "output_file": output_file, "output_nodata": output_nodata,
                "output_format": 'GTIFF', \
                "output_type": None, "options": "compress=lzw"}
        raster_image_math.do_compute_primary_production(**args)

    #   ---------------------------------------------------------------------
    #   Climatology (inter-annual average)

    prod = output_prod
    mapset = output_mapset
    new_input_subprod = output_sprod
    version = output_version
    in_prod_ident = functions.set_path_filename_no_date(
        prod, new_input_subprod, mapset, version, ext)
    in_prod_subdir = functions.set_path_sub_directory(prod, new_input_subprod,
                                                      'Derived', version,
                                                      mapset)
    starting_files = es2_data_dir + in_prod_subdir + "*" + in_prod_ident

    output_sprod_group = proc_lists.proc_add_subprod_group(sub_product_group)
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_clim,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Climatology at ' + frequency_string +
        ' frequency',
        description='Inter-annual Climatology at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_clim = functions.set_path_filename_no_date(
        prod, output_sprod, mapset, version, ext)
    output_subdir_clim = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_clim + "{MMDD[0]}" +
        out_prod_ident_clim
    ]

    # Fixes ES2-304
    def generate_input_files_pp_stats():

        # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314',
        #                        '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618',
        #                        '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914',
        #                        '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211',
        #                        '1219', '1227']

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_clim + os.path.sep + MMDD_nonleap + out_prod_ident_clim
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_clim_comput)
        @files(generate_input_files_pp_stats)
        def std_yearly_clim(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_avg_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_clim_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_clim(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_avg_image(**args)

    # #   ---------------------------------------------------------------------
    # #   Minimum
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_min,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Minimum at ' + frequency_string +
        ' frequency',
        description='Inter-annual Minimum at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_min = functions.set_path_filename_no_date(
        prod, output_sprod_min, mapset, version, ext)
    output_subdir_min = functions.set_path_sub_directory(
        prod, output_sprod_min, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_min + "{MMDD[0]}" +
        out_prod_ident_min
    ]

    def generate_input_files_pp_stats_min():

        # MMDD_nonleap_list = ['0101', '0109', '0117', '0125', '0202', '0210', '0218', '0226', '0306', '0314', '0314',
        #                        '0330', '0407', '0415', '0423', '0501', '0509', '0517', '0525', '0602', '0610', '0618',
        #                        '0626', '0704', '0712', '0720', '0728', '0805', '0813', '0821', '0829', '0906', '0914',
        #                        '0922', '0930', '1008', '1016', '1024', '1101', '1109', '1117', '1125', '1203', '1211',
        #                        '1219', '1227']

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_min + os.path.sep + MMDD_nonleap + out_prod_ident_min
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_min_comput)
        @files(generate_input_files_pp_stats_min)
        def std_yearly_min(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_min_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_min_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_min(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_min_image(**args)

    # #   ---------------------------------------------------------------------
    # #   Monthly Maximum
    output_sprod = proc_lists.proc_add_subprod(
        output_sprod_max,
        sub_product_group,
        final=False,
        descriptive_name='Inter-annual Maximum at ' + frequency_string +
        ' frequency',
        description='Inter-annual Maximum at ' + frequency_string +
        ' frequency',
        frequency_id=chla_frequency,
        date_format='MMDD',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident_max = functions.set_path_filename_no_date(
        prod, output_sprod_max, mapset, version, ext)
    output_subdir_max = functions.set_path_sub_directory(
        prod, output_sprod_max, 'Derived', version, mapset)

    formatter_in = "[0-9]{4}(?P<MMDD>[0-9]{4})" + in_prod_ident
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_max + "{MMDD[0]}" +
        out_prod_ident_max
    ]

    def generate_input_files_pp_stats_max():

        MMDD_nonleap_dict = {
            '0101': '0101',
            '0109': '0109',
            '0117': '0117',
            '0125': '0125',
            '0202': '0202',
            '0210': '0210',
            '0218': '0218',
            '0226': '0226',
            '0306': '0305',
            '0314': '0313',
            '0322': '0321',
            '0330': '0329',
            '0407': '0406',
            '0415': '0414',
            '0423': '0422',
            '0501': '0430',
            '0509': '0508',
            '0517': '0516',
            '0525': '0524',
            '0602': '0601',
            '0610': '0609',
            '0618': '0617',
            '0626': '0625',
            '0704': '0703',
            '0712': '0711',
            '0720': '0719',
            '0728': '0727',
            '0805': '0804',
            '0813': '0812',
            '0821': '0820',
            '0829': '0828',
            '0906': '0905',
            '0914': '0913',
            '0922': '0921',
            '0930': '0929',
            '1008': '1007',
            '1016': '1015',
            '1024': '1023',
            '1101': '1031',
            '1109': '1108',
            '1117': '1116',
            '1125': '1124',
            '1203': '1202',
            '1211': '1210',
            '1219': '1218',
            '1227': '1226'
        }
        # for MMDD_nonleap in MMDD_nonleap_list:
        for MMDD_nonleap, MMDD_leap in MMDD_nonleap_dict.items():
            formatter_in_nonleap = es2_data_dir + in_prod_subdir + "*" + MMDD_nonleap + in_prod_ident
            nonleap_files = sorted(glob.glob(formatter_in_nonleap))
            formatter_in_leap = es2_data_dir + in_prod_subdir + "*" + MMDD_leap + in_prod_ident
            leap_files = sorted(glob.glob(formatter_in_leap))

            my_inputs = leap_files + nonleap_files
            input_files_unique = list(set(my_inputs))
            output_file = es_constants.processing_dir + output_subdir_max + os.path.sep + MMDD_nonleap + out_prod_ident_max
            yield (input_files_unique, output_file)

    if frequency_string != 'monthly':

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_max_comput)
        @files(generate_input_files_pp_stats_max)
        def std_yearly_max(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_max_image(**args)

    else:

        @follows(modis_pp_comp)
        @active_if(activate_stats_comput, activate_pp_stats_max_comput)
        @collate(starting_files, formatter(formatter_in), formatter_out)
        def std_yearly_max(input_file, output_file):

            output_file = functions.list_to_element(output_file)
            reduced_list = exclude_current_year(input_file)
            functions.check_output_dir(os.path.dirname(output_file))
            args = {
                "input_file": reduced_list,
                "output_file": output_file,
                "output_format": 'GTIFF',
                "options": "compress=lzw"
            }
            raster_image_math.do_max_image(**args)
def create_pipeline(starting_sprod):
    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, ext)

    input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    starting_files = input_dir+"*"+in_prod_ident

    #   ---------------------------------------------------------------------
    #   Average
    output_sprod="10davg"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10davg_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def fewsnet_10davg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)


    #   ---------------------------------------------------------------------
    #   Minimum
    output_sprod="10dmin"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dmin_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def fewsnet_10dmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Maximum
    output_sprod="10dmax"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dmax_comput)
    @collate(starting_files, formatter(formatter_in),formatter_out)
    def fewsnet_10dmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   10dDiff
    output_sprod="10ddiff"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(fewsnet_10davg)
    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10ddiff_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def fewsnet_10ddiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_subtraction(**args)
   #   ---------------------------------------------------------------------
    #   10dAvgPerc
    output_sprod="10davgperc"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(fewsnet_10ddiff)
    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10davgperc_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def fewsnet_10davgperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_division_perc(**args)
        
    #   ---------------------------------------------------------------------
    #   10dperc
    output_sprod="10dperc"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "10davg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(fewsnet_10davg)
    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dperc_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def fewsnet_10dperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   10dnp
    output_sprod="10dnp"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + min + max
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod_1 = "10dmin"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, ext)
    ancillary_subdir_1      = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset)
    ancillary_input_1="{subpath[0][4]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1

    ancillary_sprod_2 = "10dmax"
    ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, ext)
    ancillary_subdir_2      = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset)
    ancillary_input_2="{subpath[0][4]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2

    @follows(fewsnet_10dmin, fewsnet_10dmax)
    @active_if(activate_fewsnet_rfe_comput, activate_10d_comput, activate_10dnp_comput)
    @transform(starting_files, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out)
    def fewsnet_10dnp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_make_vci(**args)

    #   ---------------------------------------------------------------------
    #   1moncum
    output_sprod="1moncum"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs: files from same months
    formatter_in="(?P<YYYYMM>[0-9]{6})(?P<DD>[0-9]{2})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+'01'+out_prod_ident

    # @follows(fewsnet_10davg)
    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1moncum_comput)
    @collate(starting_files, formatter(formatter_in), formatter_out)
    def fewsnet_1moncum(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file,"output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_cumulate(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Average
    new_input_subprod='1moncum'
    in_prod_ident= functions.set_path_filename_no_date(prod, new_input_subprod, mapset, ext)

    output_sprod='1monavg'
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monavg_comput)
    @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out)
    def fewsnet_1monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_avg_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Minimum
    output_sprod="1monmin"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monmin_comput)
    @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out)
    def fewsnet_1monmin(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_min_image(**args)

    #   ---------------------------------------------------------------------
    #   Monthly Maximum
    output_sprod="1monmax"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    reg_ex_in="[0-9]{4}([0-9]{4})"+in_prod_ident

    formatter_in="[0-9]{4}(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out=["{subpath[0][4]}"+os.path.sep+output_subdir+"{MMDD[0]}"+out_prod_ident]

    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monmax_comput)
    @collate(fewsnet_1moncum, formatter(formatter_in),formatter_out)
    def fewsnet_1monmax(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_max_image(**args)

    #   ---------------------------------------------------------------------
    #   1monDiff
    output_sprod="1mondiff"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs
    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "1monavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived', version, mapset)
    ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(fewsnet_1monavg)
    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1mondiff_comput)
    @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def fewsnet_1mondiff(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_oper_subtraction(**args)

    #   ---------------------------------------------------------------------
    #   1monperc
    output_sprod="1monperc"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    # inputs
    #   Starting files + avg
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod = "1monavg"
    ancillary_sprod_ident = functions.set_path_filename_no_date(prod, ancillary_sprod, mapset, ext)
    ancillary_subdir      = functions.set_path_sub_directory(prod, ancillary_sprod, 'Derived',version, mapset)
    ancillary_input="{subpath[0][4]}"+os.path.sep+ancillary_subdir+"{MMDD[0]}"+ancillary_sprod_ident

    @follows(fewsnet_1monavg)
    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monperc_comput)
    @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input), formatter_out)
    def fewsnet_1monperc(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "avg_file": input_file[1], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_compute_perc_diff_vs_avg(**args)

    #   ---------------------------------------------------------------------
    #   1monnp
    output_sprod="1monnp"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, ext)
    output_subdir  = functions.set_path_sub_directory   (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files + min + max
    formatter_in="(?P<YYYY>[0-9]{4})(?P<MMDD>[0-9]{4})"+in_prod_ident
    formatter_out="{subpath[0][4]}"+os.path.sep+output_subdir+"{YYYY[0]}{MMDD[0]}"+out_prod_ident

    ancillary_sprod_1 = "1monmin"
    ancillary_sprod_ident_1 = functions.set_path_filename_no_date(prod, ancillary_sprod_1, mapset, ext)
    ancillary_subdir_1      = functions.set_path_sub_directory(prod, ancillary_sprod_1, 'Derived',version, mapset)
    ancillary_input_1="{subpath[0][4]}"+os.path.sep+ancillary_subdir_1+"{MMDD[0]}"+ancillary_sprod_ident_1

    ancillary_sprod_2 = "1monmax"
    ancillary_sprod_ident_2 = functions.set_path_filename_no_date(prod, ancillary_sprod_2, mapset, ext)
    ancillary_subdir_2      = functions.set_path_sub_directory(prod, ancillary_sprod_2, 'Derived',version, mapset)
    ancillary_input_2="{subpath[0][4]}"+os.path.sep+ancillary_subdir_2+"{MMDD[0]}"+ancillary_sprod_ident_2

    @follows(fewsnet_1monmin, fewsnet_1monmax)
    @active_if(activate_fewsnet_rfe_comput, activate_1month_comput, activate_1monnp_comput)
    @transform(fewsnet_1moncum, formatter(formatter_in), add_inputs(ancillary_input_1, ancillary_input_2), formatter_out)
    def fewsnet_1monnp(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file[0], "min_file": input_file[1],"max_file": input_file[2], "output_file": output_file, "output_format": 'GTIFF', "options": "compress=lzw"}
        raster_image_math.do_make_vci(**args)
Exemple #36
0
def create_pipeline(prod,
                    starting_sprod,
                    native_mapset,
                    target_mapset,
                    version,
                    starting_dates=None,
                    proc_lists=None):

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    # Set DEFAULTS: all ON
    activate_10d30min_comput = 1
    activate_10dcum_comput = 1
    activate_1moncum_comput = 1

    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(
        prod, starting_sprod, native_mapset, version, ext)

    input_dir = es2_data_dir+ \
                functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, native_mapset)

    if starting_dates is not None:
        starting_files = []
        for my_date in starting_dates:
            starting_files.append(input_dir + my_date + in_prod_ident)
    else:
        starting_files = input_dir + "*" + in_prod_ident

    #   ---------------------------------------------------------------------
    #   Dekad average for every 30min (mm/h)
    #   NOTE: this product is compute w/o re-projection, i.e. on the 'native' mapset

    output_sprod_group = proc_lists.proc_add_subprod_group("lsasaf-et")
    output_sprod = proc_lists.proc_add_subprod(
        "10d30min",
        "lsasaf-et",
        final=False,
        descriptive_name='10day Average over 30 min',
        description='10day Average computed for every 30 min',
        frequency_id='e30minute',  # Is it OK ???????
        date_format='YYYYMMDDHHMM',
        masked=False,
        timeseries_role='',
        active_default=True)

    out_prod_ident = functions.set_path_filename_no_date(
        prod, output_sprod, native_mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod,
                                                     'Derived', version,
                                                     native_mapset)

    def generate_parameters_10d30min():

        #   Look for all input files in input_dir, and sort them
        input_files = glob.glob(starting_files)
        dekad_list = []
        # Create unique list of all dekads (as 'Julian' number)
        for input_file in input_files:
            basename = os.path.basename(input_file)
            mydate = functions.get_date_from_path_filename(basename)
            mydate_yyyymmdd = str(mydate)[0:8]
            mydekad_nbr = functions.conv_date_2_dekad(mydate_yyyymmdd)
            if mydekad_nbr not in dekad_list:
                dekad_list.append(mydekad_nbr)

        dekad_list = sorted(dekad_list)

        # Compute the 'julian' dakad for the current day
        today = datetime.date.today()
        today_str = today.strftime('%Y%m%d')
        dekad_now = functions.conv_date_2_dekad(today_str)

        # Generate the list of 30 min time in a day
        timelist = [
            datetime.time(h, m).strftime("%H%M")
            for h, m in itertools.product(xrange(0, 24), xrange(0, 60, 30))
        ]

        for time in timelist:
            files_for_time = glob.glob(input_dir + os.path.sep + '*' + time +
                                       in_prod_ident)
            for dekad in dekad_list:
                # Exclude the current dekad
                if dekad != dekad_now:
                    file_list = []
                    my_dekad_str = functions.conv_dekad_2_date(dekad)
                    output_file = es_constants.processing_dir + output_subdir + os.path.sep + my_dekad_str + time + out_prod_ident

                    for myfile in files_for_time:
                        basename = os.path.basename(myfile)
                        mydate_yyyymmdd = functions.get_date_from_path_filename(
                            basename)
                        mydekad_nbr = functions.conv_date_2_dekad(
                            mydate_yyyymmdd[0:8])
                        if mydekad_nbr == dekad:
                            file_list.append(myfile)

                    yield (file_list, output_file)

    @active_if(activate_10d30min_comput)
    @files(generate_parameters_10d30min)
    def lsasaf_etp_10d30min(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', \
                "options": "compress=lzw", "input_nodata":-32768}

        raster_image_math.do_avg_image(**args)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, "lsasaf-et", version, native_mapset, 'Ingest',
                         number_months_keep)

    # ----------------------------------------------------------------------------------------------------------------
    #   10 day Cumulate (mm)
    #   NOTE: this product is compute with re-projection, i.e. on the 'target' mapset

    output_sprod = proc_lists.proc_add_subprod(
        "10dcum",
        "lsasaf-et",
        final=False,
        descriptive_name='10day Cumulate',
        description='10day Cumulate in mm',
        frequency_id='e1dekad',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    out_prod_ident_10dcum = functions.set_path_filename_no_date(
        prod, "10dcum", target_mapset, version, ext)
    output_subdir_10dcum = functions.set_path_sub_directory(
        prod, "10dcum", 'Derived', version, target_mapset)

    #   Define input files
    in_prod_10dcum = '10d30min'
    in_prod_ident_10dcum = functions.set_path_filename_no_date(
        prod, in_prod_10dcum, native_mapset, version, ext)

    input_dir_10dcum = es_constants.processing_dir+ \
                functions.set_path_sub_directory(prod, in_prod_10dcum, 'Derived', version, native_mapset)

    starting_files_10dcum = input_dir_10dcum + "*" + in_prod_ident_10dcum

    formatter_in = "(?P<YYYYMMDD>[0-9]{8})[0-9]{4}" + in_prod_ident_10dcum
    formatter_out = [
        "{subpath[0][5]}" + os.path.sep + output_subdir_10dcum +
        "{YYYYMMDD[0]}" + out_prod_ident_10dcum
    ]

    @follows(lsasaf_etp_10d30min)
    @active_if(activate_10dcum_comput)
    @collate(starting_files_10dcum, formatter(formatter_in), formatter_out)
    def lsasaf_etp_10dcum(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        # Get the number of days of that dekad
        basename = os.path.basename(output_file)
        mydate = functions.get_date_from_path_filename(basename)
        nbr_days_dekad = functions.day_per_dekad(mydate)
        # Compute the correcting factor: we sum-up all 48 30min cycles and:
        # Divide by 2 (mm/h -> mm)
        # Multiply by number of days
        # Divide by 100, so that the scale factor changes from 0.0001 (30min) to 0.01
        factor = float(nbr_days_dekad) * 0.005
        functions.check_output_dir(os.path.dirname(output_file))

        tmpdir = tempfile.mkdtemp(prefix=__name__,
                                  suffix='',
                                  dir=es_constants.base_tmp_dir)

        tmp_output_file = tmpdir + os.path.sep + os.path.basename(output_file)

        args = {
            "input_file": input_file,
            "output_file": tmp_output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "scale_factor": factor,
            "input_nodata": -32768
        }

        raster_image_math.do_cumulate(**args)

        reproject_output(tmp_output_file, native_mapset, target_mapset)

        shutil.rmtree(tmpdir)

        # Do also the house-keeping, by deleting the files older than 6 months
        number_months_keep = 6
        remove_old_files(prod, "10d30min-et", version, native_mapset,
                         'Derived', number_months_keep)

    # ----------------------------------------------------------------------------------------------------------------
    # 1moncum
    output_sprod = proc_lists.proc_add_subprod(
        "1moncum",
        "lsasaf-et",
        final=False,
        descriptive_name='1mon Cumulate',
        description='Monthly Cumulate in mm',
        frequency_id='e1month',
        date_format='YYYYMMDD',
        masked=False,
        timeseries_role='',
        active_default=True)
    output_sprod = '1moncum'
    out_prod_ident_1moncum = functions.set_path_filename_no_date(
        prod, output_sprod, target_mapset, version, ext)
    output_subdir_1moncum = functions.set_path_sub_directory(
        prod, output_sprod, 'Derived', version, target_mapset)
    #file d'entre
    in_prod_1moncum = '10dcum'
    in_prod_ident_1moncum = functions.set_path_filename_no_date(
        prod, in_prod_1moncum, target_mapset, version, ext)
    input_dir_1moncum = es_constants.processing_dir+ \
                     functions.set_path_sub_directory(prod, in_prod_1moncum, 'Derived', version, target_mapset)

    starting_files_1moncum = input_dir_1moncum + "*" + in_prod_ident_1moncum

    formatter_in_1moncum = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident_1moncum
    formatter_out_1moncum = "{subpath[0][5]}" + os.path.sep + output_subdir_1moncum + "{YYYYMM[0]}" + '01' + out_prod_ident_1moncum
    #
    @follows(lsasaf_etp_10dcum)
    @active_if(activate_1moncum_comput)
    @collate(starting_files_1moncum, formatter(formatter_in_1moncum),
             formatter_out_1moncum)
    def lsasaf_etp_1moncum(input_file, output_file):
        #
        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {
            "input_file": input_file,
            "output_file": output_file,
            "output_format": 'GTIFF',
            "options": "compress=lzw",
            "input_nodata": -32768
        }
        raster_image_math.do_cumulate(**args)

    return proc_lists
def create_pipeline(starting_sprod):

    #   ---------------------------------------------------------------------
    #   Define input files: Chla is the 'driver', sst,kd and par 'ancillary inputs'

    chla_prod="modis-chla"
    chla_prod_ident = functions.set_path_filename_no_date(chla_prod, starting_sprod, mapset, version, ext)
    chla_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(chla_prod, starting_sprod, 'Derived', version, mapset)
                
    #chla_files = chla_input_dir+"2014*"+chla_prod_ident

    #   ---------------------------------------------------------------------
    sst_prod="modis-sst"
    sst_prod_ident = functions.set_path_filename_no_date(sst_prod, starting_sprod, mapset, version, ext)
    sst_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(sst_prod, starting_sprod, 'Derived', version, mapset)

    #   ---------------------------------------------------------------------
    kd_prod="modis-kd490"
    kd_prod_ident = functions.set_path_filename_no_date(kd_prod, starting_sprod, mapset, version, ext)

    kd_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(kd_prod, starting_sprod, 'Derived', version, mapset)

    kd_files = kd_input_dir+"*"+kd_prod_ident

    #   ---------------------------------------------------------------------
    par_prod="modis-par"
    par_prod_ident = functions.set_path_filename_no_date(par_prod, starting_sprod, mapset, version, ext)

    par_input_dir = es_constants.processing_dir+ \
                functions.set_path_sub_directory(par_prod, starting_sprod, 'Derived', version, mapset)

    # Read input product nodata

    chla_prod_info = querydb.get_product_out_info(productcode=chla_prod, subproductcode="chla-day", version=version)
    chla_product_info = functions.list_to_element(chla_prod_info)
    chla_nodata = chla_product_info.nodata

    sst_prod_info = querydb.get_product_out_info(productcode=sst_prod, subproductcode="sst-day", version=version)
    sst_product_info = functions.list_to_element(sst_prod_info)
    sst_nodata = sst_product_info.nodata

    kd_prod_info = querydb.get_product_out_info(productcode=kd_prod, subproductcode="kd490-day", version=version)
    kd_product_info = functions.list_to_element(kd_prod_info)
    kd_nodata = kd_product_info.nodata

    par_prod_info = querydb.get_product_out_info(productcode=par_prod, subproductcode="par-day", version=version)
    par_product_info = functions.list_to_element(par_prod_info)
    par_nodata = par_product_info.nodata

   #   ---------------------------------------------------------------------
   #   Monthly Primary Productivity from chl-a, sst, kd490 and par monthly data

    output_sprod="1mon"
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset,version, ext)
    output_subdir  = functions.set_path_sub_directory (prod, output_sprod, 'Derived', version, mapset)

    #   Starting files monthly composites
    formatter_kd="(?P<YYYYMM>[0-9]{6})"+kd_prod_ident
    formatter_out="{subpath[0][5]}"+os.path.sep+output_subdir+"{YYYYMM[0]}"+out_prod_ident

    ancillary_sst = sst_input_dir+"{YYYYMM[0]}"+sst_prod_ident
    ancillary_par = par_input_dir+"{YYYYMM[0]}"+par_prod_ident
    ancillary_chla  = chla_input_dir+"{YYYYMM[0]}"+chla_prod_ident

    @active_if(activate_pp_1mon_comput)
    @transform(kd_files, formatter(formatter_kd), add_inputs(ancillary_chla, ancillary_par, ancillary_sst), formatter_out)
    def modis_pp_1mon(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        args = {"chla_file": input_file[1], "sst_file": input_file[3], "kd_file": input_file[0],"par_file": input_file[2], \
                "sst_nodata": sst_nodata, "kd_nodata": kd_nodata,\
                "par_nodata": par_nodata, "output_file": output_file, "output_nodata": -9999, "output_format": 'GTIFF',\
                "output_type": None, "options": "compress=lzw"}
        raster_image_math.do_compute_primary_production(**args)
def create_pipeline(prod, starting_sprod, mapset, version, starting_dates=None, proc_lists=None, logger=None):
    my_date = None

    #   ---------------------------------------------------------------------
    #   Create lists
    if proc_lists is None:
        proc_lists = functions.ProcLists()

    sds_meta = metadata.SdsMetadata()
    es2_data_dir = es_constants.es2globals['processing_dir'] + os.path.sep

    #   ---------------------------------------------------------------------
    #   Define input files
    in_prod_ident = functions.set_path_filename_no_date(prod, starting_sprod, mapset, version, ext)

    input_dir = es2_data_dir + functions.set_path_sub_directory(prod, starting_sprod, 'Ingest', version, mapset)

    if my_date is not None:
        starting_files = input_dir + my_date + "*" + in_prod_ident
    else:
        starting_files = input_dir + "*" + in_prod_ident


    #   ---------------------------------------------------------------------
    #   Monthly Average for a given month

    output_sprod_group = proc_lists.proc_add_subprod_group("monstats")
    output_sprod = proc_lists.proc_add_subprod("monavg", "monstats", final=False,
                                               descriptive_name='Monthly average',
                                               description='Monthly average',
                                               frequency_id='',
                                               date_format='YYYMMMMDD',
                                               masked=False,
                                               timeseries_role='',
                                               active_default=True)
    out_prod_ident = functions.set_path_filename_no_date(prod, output_sprod, mapset, version, ext)
    output_subdir = functions.set_path_sub_directory(prod, output_sprod, 'Derived', version, mapset)

    formatter_in = "(?P<YYYYMM>[0-9]{6})[0-9]{2}" + in_prod_ident
    formatter_out = "{subpath[0][5]}" + os.path.sep + output_subdir + "{YYYYMM[0]}" + '01' + out_prod_ident

    @collate(starting_files, formatter(formatter_in), formatter_out)
    def compute_monavg(input_file, output_file):

        output_file = functions.list_to_element(output_file)
        out_filename = os.path.basename(output_file)
        functions.check_output_dir(os.path.dirname(output_file))
        no_data = int(sds_meta.get_nodata_value(input_file[0]))

        str_date = out_filename[0:6]
        today = datetime.date.today()
        today_yyyymm = today.strftime('%Y%m')

        # expected_ndays=functions.get_number_days_month(str_date)
        # current_ndays=len(input_file)
        if str_date == today_yyyymm:
            logger.info('Do not perform computation for current month {0}. Skip'.format(str_date))
        else:
            args = {"input_file": input_file, "output_file": output_file, "output_format": 'GTIFF', "input_nodata": no_data,
                    "options": "compress=lzw"}
            raster_image_math.do_avg_image(**args)


    return proc_lists