Ejemplo n.º 1
0
 def getResult(self, **kwargs) -> Optional[TaskResult]:
     raiseErrors = kwargs.get("raiseErrors", False)
     type = kwargs.get("type", "file")
     block = kwargs.get("block")
     self._exception = None
     if block: self.waitUntilReady()
     self.status()
     self.logger.info(
         f"GetResult[{type}]-> STATUS: {self._status}, args: {kwargs}")
     if self._status == Status.ERROR:
         self.logger.error(" *** Remote execution error: " +
                           self._statMessage)
         self._exception = Exception(self._statMessage)
         if raiseErrors: raise self._exception
         return None
     elif self._status == Status.COMPLETED:
         if type == "file":
             filePath = self.cacheDir + "/" + self.fileUrl.split(
                 '=')[-1] + ".nc"
             self.wpsRequest.downloadFile(filePath, self.fileUrl)
             self.logger.info(
                 f"Downloaded result file using '{self.fileUrl}' to '{filePath}'"
             )
             return TaskResult(
                 dict(file=filePath, rid=self.rid, cid=self.cid))
         else:
             xarray: xa.Dataset = self.wpsRequest.downloadData(self.dataUrl)
             self.logger.info(
                 f"Downloaded result data using '{self.dataUrl}'")
             return TaskResult(
                 {
                     **self._parms, "rid": self.rid,
                     "cid": self.cid
                 }, [xarray])
Ejemplo n.º 2
0
 def getResult(self, timeout=None, block=False) -> Optional[TaskResult]:
     edasResults: List[EDASDataset] = self.results.get(block, timeout)
     for edasResult in edasResults:
         if edasResult.getResultClass() == "METADATA":
             return TaskResult(edasResult.attrs, [])
     xaResults: Iterable[xa.Dataset] = itertools.chain.from_iterable(
         [edasResult.xr for edasResult in edasResults])
     return TaskResult(self._parms, list(xaResults))
Ejemplo n.º 3
0
 def getResult(self, **kwargs) -> Optional[TaskResult]:
     status = self.status()
     header = {}
     block = kwargs.get("block")
     if block: self.waitUntilReady()
     if status == Status.ERROR:
         for ex in self.execution.errors:
             header[f"Error-{ex.code}"] = ex.text
             self.logger.error(
                 'WPS Execution Error: code=%s, locator=%s, text=%s' %
                 (ex.code, ex.locator, ex.text))
         return TaskResult(header)
     elif status == Status.EXECUTING:
         return None
     elif status == Status.COMPLETED:
         type = kwargs.get("type", "file")
         for output in self.execution.processOutputs:
             output_content = output.retrieveData(
                 self.execution.username,
                 self.execution.password,
                 headers=self.execution.headers,
                 verify=self.execution.verify,
                 cert=self.execution.cert)
             header.update(self.execution.headers)
             header["Reference"] = output.reference
             header["FileName"] = output.fileName
             if type == "file":
                 filepath = f"{self.cacheDir}/{output.fileName}"
                 out = open(filepath, 'wb')
                 if output_content is b'' and len(output.data) > 0:
                     for data in output.data:
                         output_content = output_content + data
                 out.write(output_content)
                 out.close()
                 self.logger.info('Output written to file: %s' % filepath)
             else:
                 if output_content is b'' and len(output.data) > 0:
                     results = [
                         pickle.loads(data, encoding="bytes")
                         for data in output.data
                     ]
                     return TaskResult(header, results)
                 else:
                     results = [
                         pickle.loads(output_content, encoding="bytes")
                     ]
                     return TaskResult(header, results)
     return None
Ejemplo n.º 4
0
 def getResult(self, **kwargs) -> Optional[TaskResult]:
     timeout = kwargs.get("timeout")
     block = kwargs.get("block")
     rid = kwargs.get("rid")
     if block: self.waitUntilReady(rid, timeout)
     result = self.getMessage("result", dict(rid=rid))
     rtype = result["type"]
     self.active_requests.remove(rid)
     if rtype == "error": raise Exception(result["message"])
     elif rtype == "json":
         return TaskResult({"rid": rid, "cid": self.cid, **result["json"]})
     elif rtype == "data":
         return result.get("content", None)
     else:
         raise Exception(f"Unrecognized result type: {rtype}")
Ejemplo n.º 5
0
    def execute(self, **kwargs) -> TaskResult:
        """
            Executes the operation.
            Creates an Executable for each analytics operation
            The operation request is available as self.request.
            The operation inputs are available as self.inputs.

            Returns:
            TaskResult: The result of the operation.
            """
        print(f"Executing request {self.request}")
        inputSpec = self.request.get('input', [])
        dset: xa.Dataset = xa.open_dataset(inputSpec['filename'])
        vid = inputSpec['name']
        variable: xa.DataArray = dset[vid]
        result_arrays = self.operate(vid, variable)
        return TaskResult(kwargs, [result_arrays])
Ejemplo n.º 6
0
    def execute(self, **kwargs) -> TaskResult:
        """
            Executes the operation.
            Creates an Executable for each analytics operation
            The operation request is available as self.request.
            The operation inputs are available as self.inputs.

            Returns:
            TaskResult: The result of the operation.
            """
        print(f"Executing request {self.request}")

        inputSpec = self.request.get('input', [])
        M03_dir = inputSpec['path1']
        M06_dir = inputSpec['path2']
        M03_files = sorted(glob.glob(M03_dir + "MYD03.A2008*"))
        M06_files = sorted(glob.glob(M06_dir + "MYD06_L2.A2008*"))
        cf = self.operate(M03_files, M06_files)
        resultDataset = xa.DataArray(cf.tolist(), name='test')
        return TaskResult(kwargs, [resultDataset])
Ejemplo n.º 7
0
 def getResult(self, **kwargs) -> TaskResult:
     results = []
     for tid, tfuture in self.futures.items():
         result = tfuture.getResult(**kwargs)
         if results is not None: results.append(result)
     return TaskResult.merge(results)
Ejemplo n.º 8
0
    def execute(self, **kwargs) -> TaskResult:
        """
            Executes the operation.
            Creates an Executable for each analytics operation
            The operation request is available as self.request.
            The operation inputs are available as self.inputs.

            Returns:
            TaskResult: The result of the operation.
            """
        print(f"Executing request {self.request}")

        # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv

        # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \
        #                                         <Polygon boundaries> <Lat & Lon Grid Size > \
        #                                         <Sampling number larger than 0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <Variable Imput File> <JHist Variable Imput File>")

        inputSpec = self.request.get('input', [])

        spl_num = np.int(inputSpec['spl_num'][1:-1])
        poly = np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',')
        grid = np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',')

        # Define the statistics names for HDF5 output
        sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \
                    'Standard_Deviation','Histogram_Counts','Jhisto_vs_']

        # Pass system arguments to the function
        # sts_switch = np.array(sys.argv[7:14],dtype=np.int)
        sts_switch = np.fromstring(inputSpec['sts_switch'],
                                   dtype=np.int,
                                   sep=',')
        sts_switch = np.array((sts_switch == 1))
        #varlist = inputSpec['varlist']

        # Read the variable names from the variable name list
        #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r")
        text_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['varlist'])))
        varnames = text_file[:, 0]

        if sts_switch[5] == True:
            intervals_1d = text_file[:, 1]  # This is a string interval arrays
        else:
            intervals_1d = [0]

        if sts_switch[6] == True:
            # Read the joint histogram names from the variable name list
            #jvarlist = inputSpec['jvarlist']
            #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r")
            text_file = np.array(
                pd.DataFrame.from_dict(json.loads(
                    inputSpec['jvarlist'])))  #open(varlist, "r")
            histnames = text_file[:, 1]
            var_idx = text_file[:,
                                2]  #This is the index of the input variable name which is used for 2D histogram
            intervals_2d = text_file[:, 3]
        else:
            intervals_2d, var_idx = [0], [0]

        #-------------STEP 1: Set up the specific directory --------
        # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True))
        data_path_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file'])))
        if data_path_file is not None:
            MYD06_dir = data_path_file[
                0,
                0]  #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/'
            MYD06_prefix = data_path_file[0, 1]  #'MYD06_L2.A'
            MYD03_dir = data_path_file[
                1,
                0]  #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/'
            MYD03_prefix = data_path_file[1, 1]  #'MYD03.A'
        else:
            #autodownloading the MYD03 and MYD06 files from the website.
            start = time.time()

            M03_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD03/2008/001/"
            M06_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD06_L2/2008/001/"
            M03_localpath = "../new_data/MYD03"
            M06_localpath = "../new_data/MYD06_L2"

            token = "F43033A6-B1DB-11EA-9C3C-E3E73909D347"

            if not os.path.exists(M03_localpath):
                os.makedirs(M03_localpath)

            if not os.path.exists(M06_localpath):
                os.makedirs(M06_localpath)

            sync(M03_source_url, M03_localpath, token)
            sync(M06_source_url, M06_localpath, token)

            print("\n\nCompleted download in " + str(time.time() - start) +
                  " seconds")

            MYD03_dir = M03_localpath
            MYD03_prefix = "MYD03.A"
            MYD06_dir = M06_localpath
            MYD06_prefix = "MYD06_L2.A"

        fileformat = 'hdf'

        # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True))
        output_path_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['output_path'])))
        output_dir = output_path_file[0, 0]
        output_prefix = output_path_file[0, 1]

        #-------------STEP 2: Set up spactial and temporal resolution & variable names----------
        NTA_lats = [poly[0], poly[1]]  #[  0,40] #[-90,90]   #[-30,30]
        NTA_lons = [poly[2], poly[3]]  #[-40,60] #[-180,180] #[-60,60]

        gap_x, gap_y = grid[1], grid[0]  #0.5,0.625

        if ((NTA_lons[-1] - NTA_lons[0]) % gap_x != 0) | (
            (NTA_lats[-1] - NTA_lats[0]) % gap_y != 0):
            print(
                "Grid size should be dividable by the dimension of the selected region."
            )
            print(
                "If you choose the region of latitude  from -40 to 40, then you gird size (Latitude ) should be dividable by 80."
            )
            print(
                "If you choose the region of longitude from  20 to 35, then you gird size (Longitude) should be dividable by 55."
            )
            print("Please try again!")
            sys.exit()

        map_lon = np.arange(NTA_lons[0], NTA_lons[1], gap_x)
        map_lat = np.arange(NTA_lats[0], NTA_lats[1], gap_y)
        Lon, Lat = np.meshgrid(map_lon, map_lat)
        grid_lon = np.int((NTA_lons[-1] - NTA_lons[0]) / gap_x)
        grid_lat = np.int((NTA_lats[-1] - NTA_lats[0]) / gap_y)

        #--------------STEP 3: Create arrays for level-3 statistics data-------------------------
        t_grid_data = {}
        bin_num1 = np.zeros(len(varnames)).astype(np.int)
        bin_num2 = np.zeros(len(varnames)).astype(np.int)
        key_idx = 0
        for key in varnames:
            if sts_switch[0] == True:
                t_grid_data[key + '_' + sts_name[0]] = np.zeros(
                    grid_lat * grid_lon) + np.inf
            if sts_switch[1] == True:
                t_grid_data[key + '_' + sts_name[1]] = np.zeros(
                    grid_lat * grid_lon) - np.inf
            if (sts_switch[2] == True) | (sts_switch[3]
                                          == True) | (sts_switch[4] == True):
                t_grid_data[key + '_' + sts_name[2]] = np.zeros(grid_lat *
                                                                grid_lon)
                t_grid_data[key + '_' + sts_name[3]] = np.zeros(grid_lat *
                                                                grid_lon)
                t_grid_data[key + '_' + sts_name[4]] = np.zeros(grid_lat *
                                                                grid_lon)
            if sts_switch[5] == True:
                bin_interval1 = np.fromstring(intervals_1d[key_idx],
                                              dtype=np.float,
                                              sep=',')
                bin_num1[key_idx] = bin_interval1.shape[0] - 1
                t_grid_data[key + '_' + sts_name[5]] = np.zeros(
                    (grid_lat * grid_lon, bin_num1[key_idx]))

                if sts_switch[6] == True:
                    bin_interval2 = np.fromstring(intervals_2d[key_idx],
                                                  dtype=np.float,
                                                  sep=',')
                    bin_num2[key_idx] = bin_interval2.shape[0] - 1
                    t_grid_data[key + '_' + sts_name[6] +
                                histnames[key_idx]] = np.zeros(
                                    (grid_lat * grid_lon, bin_num1[key_idx],
                                     bin_num2[key_idx]))

            key_idx += 1

        # Sort the dictionary by alphabetizing
        t_grid_data = OrderedDict(
            sorted(t_grid_data.items(), key=lambda x: x[0]))
        print("t_grid_data")
        print(t_grid_data)

        print("Output Variables for Level-3 File:")
        for key in t_grid_data:
            print(key)

        # Read the filename list for different time period
        fname1, fname2 = [], []

        start_date = np.fromstring(inputSpec['start_date'],
                                   dtype=np.int,
                                   sep='/')
        end_date = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/')
        start = date(start_date[0], start_date[1], start_date[2])
        until = date(end_date[0], end_date[1], end_date[2])

        for dt in rrule(DAILY, interval=1, dtstart=start, until=until):
            year = np.int(dt.strftime("%Y"))
            month = np.int(dt.strftime("%m"))
            day = np.int(dt.strftime("%d"))

            data = datetime(year, month, day)
            daynew = data.toordinal()
            yearstart = datetime(year, 1, 1)
            day_yearstart = yearstart.toordinal()
            day_in_year = (daynew - day_yearstart) + 1

            yc = '%04i' % year
            dc = '%03i' % day_in_year

            fname_tmp1 = series.read_filelist(MYD06_dir, MYD06_prefix, yc, dc,
                                              fileformat)
            fname_tmp2 = series.read_filelist(MYD03_dir, MYD03_prefix, yc, dc,
                                              fileformat)
            fname1 = np.append(fname1, fname_tmp1)
            fname2 = np.append(fname2, fname_tmp2)
        print('***********year************')
        print(year, month)
        print('***********month************')

        filenum = np.arange(len(fname1))
        print(len(fname1))

        #--------------STEP 6: Start Aggregation------------------------------------------------

        print('***********calling operate************')

        xds = self.operate(fname1,fname2,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \
                                    t_grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \
                                    sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \
                                    output_dir, output_prefix)

        #resultDataset = xa.DataArray(xds, name='test')
        return TaskResult(kwargs, [xds])
Ejemplo n.º 9
0
 def cacheResult(self, header: Dict, data: Optional[xa.Dataset]):
     self.logger.info("Caching result: " + str(header))
     dataList = [] if data is None else [data]
     self.cached_results.put(TaskResult(header, dataList))
Ejemplo n.º 10
0
    def execute(self, **kwargs) -> TaskResult:
        """
            Executes the operation.
            Creates an Executable for each analytics operation
            The operation request is available as self.request.
            The operation inputs are available as self.inputs.
            Returns:
            TaskResult: The result of the operation.
            """
        print(f"Executing request {self.request}")

        # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv
        
        # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \
        #                                         <Polygon boundaries> <Lat & Lon Grid Size > \
        #                                         <Sampling number larger than 0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <Variable Imput File> <JHist Variable Imput File>")

        inputSpec = self.request.get('input', [])
        
        num_nodes = np.int(inputSpec['num_nodes'])
        spl_num = np.int(inputSpec['spl_num'][1:-1])
        poly=np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',' )
        grid=np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',' )
        
        # Define the statistics names for HDF5 output
        sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \
                    'Standard_Deviation','Histogram_Counts','Jhisto_vs_']

        # Pass system arguments to the function
        # sts_switch = np.array(sys.argv[7:14],dtype=np.int)
        sts_switch = np.fromstring(inputSpec['sts_switch'],dtype=np.int, sep=',')
        sts_switch = np.array((sts_switch == 1))
        #varlist = inputSpec['varlist']

        # Read the variable names from the variable name list
        #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r")
        text_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['varlist']))) 
        varnames  = text_file[:,0] 

        if sts_switch[5] == True: 
            intervals_1d = text_file[:,1] # This is a string interval arrays
        else: 
            intervals_1d = [0]

        if sts_switch[6] == True:   
            # Read the joint histogram names from the variable name list
            #jvarlist = inputSpec['jvarlist']
            #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r")
            text_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['jvarlist']))) #open(varlist, "r")
            histnames = text_file[:,1] 
            var_idx   = text_file[:,2] #This is the index of the input variable name which is used for 2D histogram
            intervals_2d = text_file[:,3]
        else:
            intervals_2d,var_idx = [0],[0]

        #-------------STEP 1: Set up the specific directory --------
        # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True))
        data_path_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file'])))

        if data_path_file is not None:   
            MYD06_dir    = data_path_file[0,0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/'
            MYD06_prefix = data_path_file[0,1] #'MYD06_L2.A'
            MYD03_dir    = data_path_file[1,0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/'
            MYD03_prefix = data_path_file[1,1] #'MYD03.A'
        else:
            #autodownloading the MYD03 and MYD06 files from the website.
            start = time.time()

            M03_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD03/2008/001/"
            M06_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD06_L2/2008/001/"
            M03_localpath = "../new_data/MYD03"
            M06_localpath = "../new_data/MYD06_L2"

            token =  "F43033A6-B1DB-11EA-9C3C-E3E73909D347"

            if not os.path.exists(M03_localpath):
                os.makedirs(M03_localpath)

            if not os.path.exists(M06_localpath):
                os.makedirs(M06_localpath)

            sync(M03_source_url, M03_localpath, token)
            sync(M06_source_url, M06_localpath, token)
            
            print("\n\nCompleted download in " + str(time.time() - start) + " seconds")

            MYD03_dir = M03_localpath
            MYD03_prefix = "MYD03.A"
            MYD06_dir = M06_localpath 
            MYD06_prefix = "MYD06_L2.A"

        fileformat = 'hdf'

        # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True))
        output_path_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['output_path']))) 
        output_dir = output_path_file[0,0]
        output_prefix = output_path_file[0,1]

        #-------------STEP 2: Set up spactial and temporal resolution & variable names----------
        NTA_lats = [poly[0],poly[1]] #[  0,40] #[-90,90]   #[-30,30]    
        NTA_lons = [poly[2],poly[3]] #[-40,60] #[-180,180] #[-60,60]  
        
        gap_x, gap_y = grid[1],grid[0] #0.5,0.625

        if ((NTA_lons[-1]-NTA_lons[0])%gap_x != 0) | ((NTA_lats[-1]-NTA_lats[0])%gap_y != 0): 
            print("## ERROR!!!")
            print("Grid size should be dividable by the dimension of the selected region.")
            print("If you choose the region of latitude  from -40 to 40, then you gird size (Latitude ) should be dividable by 80.")
            print("If you choose the region of longitude from  20 to 35, then you gird size (Longitude) should be dividable by 55.")
            print("Please try again!")
            sys.exit()

        map_lon = np.arange(NTA_lons[0],NTA_lons[1],gap_x)
        map_lat = np.arange(NTA_lats[0],NTA_lats[1],gap_y)
        Lon,Lat = np.meshgrid(map_lon,map_lat)
        grid_lon=np.int((NTA_lons[-1]-NTA_lons[0])/gap_x)
        grid_lat=np.int((NTA_lats[-1]-NTA_lats[0])/gap_y)

        #--------------STEP 3: Create arrays for level-3 statistics data-------------------------
        grid_data = {}
        bin_num1 = np.zeros(len(varnames)).astype(np.int)
        bin_num2 = np.zeros(len(varnames)).astype(np.int)
        key_idx = 0
        for key in varnames:
            if sts_switch[0] == True:
                grid_data[key+'_'+sts_name[0]] = np.zeros(grid_lat*grid_lon) + np.inf
            if sts_switch[1] == True:
                grid_data[key+'_'+sts_name[1]] = np.zeros(grid_lat*grid_lon) - np.inf
            if (sts_switch[2] == True) | (sts_switch[3] == True) | (sts_switch[4] == True):
                grid_data[key+'_'+sts_name[2]] = np.zeros(grid_lat*grid_lon)
                grid_data[key+'_'+sts_name[3]] = np.zeros(grid_lat*grid_lon)
                grid_data[key+'_'+sts_name[4]] = np.zeros(grid_lat*grid_lon)
            if sts_switch[5] == True:
                bin_interval1 = np.fromstring(intervals_1d[key_idx], dtype=np.float, sep=',' )
                bin_num1[key_idx] = bin_interval1.shape[0]-1
                grid_data[key+'_'+sts_name[5]] = np.zeros((grid_lat*grid_lon,bin_num1[key_idx]))

                if sts_switch[6] == True:
                    bin_interval2 = np.fromstring(intervals_2d[key_idx], dtype=np.float, sep=',' )
                    bin_num2[key_idx] = bin_interval2.shape[0]-1
                    grid_data[key+'_'+sts_name[6]+histnames[key_idx]] = np.zeros((grid_lat*grid_lon,bin_num1[key_idx],bin_num2[key_idx]))

            key_idx += 1

        #--------------STEP 4: Read the filename list for different time period-------------------
        fname1,fname2 = [],[]

        start_date = np.fromstring(inputSpec['start_date'], dtype=np.int, sep='/' )
        end_date   = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/' )
        start = date(start_date[0], start_date[1], start_date[2])
        until = date(end_date[0], end_date[1], end_date[2])

        for dt in rrule(DAILY, interval=1, dtstart=start, until=until):
            year  = np.array([np.int(dt.strftime("%Y"))])
            month = np.array([np.int(dt.strftime("%m"))])
            day   = np.array([np.int(dt.strftime("%d"))])
            time  = np.arange(24) #np.int(dt.strftime("%H"))
            
            daynew = dt.toordinal()
            yearstart = datetime(year,1,1)
            yearend   = calendar.monthrange(year, 12)[1]

            day_yearstart = yearstart.toordinal()
            day_yearend = datetime(year,12,yearend).toordinal()

            day_in_year = np.array([(daynew-day_yearstart)+1])
            end_in_year = np.array([(day_yearend-day_yearstart)+1])
            
            # Adjust to 3 hours previous/after the End Date for the orbit gap/overlap problem
            if (dt.year == until.year) & (dt.month == until.month) & (dt.day == until.day):
                shift_hour = 3
                time    = np.append(np.arange(24),np.arange(shift_hour))
                year    = [year[0],year[0]]
                day_in_year = [day_in_year[0],day_in_year[0] + 1]
                if day_in_year[1] > end_in_year:
                    year[1]   -= 1 
                    yearstart = datetime(year[1],1,1)
                    yearend   = datetime(year[1],12,31)
                    day_yearstart = yearstart.toordinal()
                    day_yearend   = yearend.toordinal()
                    day_in_year[1] = (day_yearend-day_yearstart)+1
            
            # Start reading Level-2 files 
            fname_tmp1,fname_tmp2 = series.read_filelist(MYD06_dir,MYD06_prefix,MYD03_dir,MYD03_prefix,year,day_in_year,time,fileformat)
            fname1 = np.append(fname1,fname_tmp1)
            fname2 = np.append(fname2,fname_tmp2)
            
            #print(fname1.shape,fname2.shape)

        filenum = np.arange(len(fname1))
        #print(len(fname1))

        #--------------STEP 5: Read Attributes of each variables----------------------------------
        unit_list = []
        scale_list = []
        offst_list = []
        longname_list = []
        fillvalue_list = []

        ncfile=Dataset(fname1[0],'r')

        # Read the User-defined variables from MYD06 product
        tmp_idx = 0
        for key in varnames:
            if key == 'cloud_fraction': 
                name_idx = tmp_idx
                continue #Ignoreing Cloud_Fraction from the input file
            else:
                tmp_data,data_dim,lonam,unit,fill,scale,offst = series.readEntry(key,ncfile,spl_num)
                unit_list  = np.append(unit_list,unit)
                scale_list = np.append(scale_list,scale)
                offst_list = np.append(offst_list,offst)
                longname_list = np.append(longname_list, lonam)
                fillvalue_list = np.append(fillvalue_list, fill)
                tmp_idx += 1

        # Add the long name of cloud freaction at the first row
        CM_unit     = 'none'
        CM_longname = 'Cloud Fraction from Cloud Mask (cloudy & prob cloudy)'
        CM_fillvalue = -9999
        CM_scale_factor = 0.0001
        CM_add_offset   = 0.0
        unit_list      = np.insert(unit_list,      name_idx, CM_unit)
        scale_list     = np.insert(scale_list,     name_idx, CM_scale_factor)
        offst_list     = np.insert(offst_list,     name_idx, CM_add_offset)
        longname_list  = np.insert(longname_list,  name_idx, CM_longname)
        fillvalue_list = np.insert(fillvalue_list, name_idx, CM_fillvalue)

        ncfile.close()
        #--------------STEP 6: Start Aggregation------------------------------------------------


        xds = self.operate(fname1,fname2,day_in_year,shift_hour,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \
                                    grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \
                                    sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \
                                    unit_list, scale_list, offst_list, longname_list, fillvalue_list,output_dir, output_prefix, num_nodes)


        #resultDataset = xa.DataArray(xds, name='test')
        return TaskResult(kwargs, [xds])
Ejemplo n.º 11
0
    def execute(self, **kwargs) -> TaskResult:
        """
            Executes the operation.
            Creates an Executable for each analytics operation
            The operation request is available as self.request.
            The operation inputs are available as self.inputs.

            Returns:
            TaskResult: The result of the operation.
            """
        print(f"Executing request {self.request}")

        # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv

        # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \
        #                                         <Polygon boundaries> <Lat & Lon Grid Size > \
        #                                         <Sampling number larger than 0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <1/0> <1/0> \
        #                                         <1/0> <Variable Imput File> <JHist Variable Imput File>")

        inputSpec = self.request.get('input', [])

        spl_num = np.int(inputSpec['spl_num'][1:-1])
        poly = np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',')
        grid = np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',')

        # Define the statistics names for HDF5 output
        sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \
                    'Standard_Deviation','Histogram_Counts','Jhisto_vs_']

        # Pass system arguments to the function
        # sts_switch = np.array(sys.argv[7:14],dtype=np.int)
        sts_switch = np.fromstring(inputSpec['sts_switch'],
                                   dtype=np.int,
                                   sep=',')
        sts_switch = np.array((sts_switch == 1))
        #varlist = inputSpec['varlist']

        # Read the variable names from the variable name list
        #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r")
        text_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['varlist'])))
        varnames = text_file[:, 0]

        if sts_switch[5] == True:
            intervals_1d = text_file[:, 1]  # This is a string interval arrays
        else:
            intervals_1d = [0]

        if sts_switch[6] == True:
            # Read the joint histogram names from the variable name list
            #jvarlist = inputSpec['jvarlist']
            #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r")
            text_file = np.array(
                pd.DataFrame.from_dict(json.loads(
                    inputSpec['jvarlist'])))  #open(varlist, "r")
            histnames = text_file[:, 1]
            var_idx = text_file[:,
                                2]  #This is the index of the input variable name which is used for 2D histogram
            intervals_2d = text_file[:, 3]
        else:
            intervals_2d, var_idx = [0], [0]

        #-------------STEP 1: Set up the specific directory --------
        # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True))
        data_path_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file'])))
        MYD06_dir = data_path_file[
            0,
            0]  #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/'
        MYD06_prefix = data_path_file[0, 1]  #'MYD06_L2.A'
        MYD03_dir = data_path_file[
            1,
            0]  #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/'
        MYD03_prefix = data_path_file[1, 1]  #'MYD03.A'
        fileformat = 'hdf'

        # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True))
        output_path_file = np.array(
            pd.DataFrame.from_dict(json.loads(inputSpec['output_path'])))
        output_dir = output_path_file[0, 0]
        output_prefix = output_path_file[0, 1]

        #-------------STEP 2: Set up spactial and temporal resolution & variable names----------
        NTA_lats = [poly[0], poly[1]]  #[  0,40] #[-90,90]   #[-30,30]
        NTA_lons = [poly[2], poly[3]]  #[-40,60] #[-180,180] #[-60,60]

        gap_x, gap_y = grid[1], grid[0]  #0.5,0.625

        if ((NTA_lons[-1] - NTA_lons[0]) % gap_x != 0) | (
            (NTA_lats[-1] - NTA_lats[0]) % gap_y != 0):
            print(
                "Grid size should be dividable by the dimension of the selected region."
            )
            print(
                "If you choose the region of latitude  from -40 to 40, then you gird size (Latitude ) should be dividable by 80."
            )
            print(
                "If you choose the region of longitude from  20 to 35, then you gird size (Longitude) should be dividable by 55."
            )
            print("Please try again!")
            sys.exit()

        map_lon = np.arange(NTA_lons[0], NTA_lons[1], gap_x)
        map_lat = np.arange(NTA_lats[0], NTA_lats[1], gap_y)
        Lon, Lat = np.meshgrid(map_lon, map_lat)
        grid_lon = np.int((NTA_lons[-1] - NTA_lons[0]) / gap_x)
        grid_lat = np.int((NTA_lats[-1] - NTA_lats[0]) / gap_y)

        #--------------STEP 3: Create arrays for level-3 statistics data-------------------------
        grid_data = {}
        bin_num1 = np.zeros(len(varnames)).astype(np.int)
        bin_num2 = np.zeros(len(varnames)).astype(np.int)
        key_idx = 0
        for key in varnames:
            if sts_switch[0] == True:
                grid_data[key + '_' +
                          sts_name[0]] = np.zeros(grid_lat * grid_lon) + np.inf
            if sts_switch[1] == True:
                grid_data[key + '_' +
                          sts_name[1]] = np.zeros(grid_lat * grid_lon) - np.inf
            if (sts_switch[2] == True) | (sts_switch[3]
                                          == True) | (sts_switch[4] == True):
                grid_data[key + '_' + sts_name[2]] = np.zeros(grid_lat *
                                                              grid_lon)
                grid_data[key + '_' + sts_name[3]] = np.zeros(grid_lat *
                                                              grid_lon)
                grid_data[key + '_' + sts_name[4]] = np.zeros(grid_lat *
                                                              grid_lon)
            if sts_switch[5] == True:
                bin_interval1 = np.fromstring(intervals_1d[key_idx],
                                              dtype=np.float,
                                              sep=',')
                bin_num1[key_idx] = bin_interval1.shape[0] - 1
                grid_data[key + '_' + sts_name[5]] = np.zeros(
                    (grid_lat * grid_lon, bin_num1[key_idx]))

                if sts_switch[6] == True:
                    bin_interval2 = np.fromstring(intervals_2d[key_idx],
                                                  dtype=np.float,
                                                  sep=',')
                    bin_num2[key_idx] = bin_interval2.shape[0] - 1
                    grid_data[key + '_' + sts_name[6] +
                              histnames[key_idx]] = np.zeros(
                                  (grid_lat * grid_lon, bin_num1[key_idx],
                                   bin_num2[key_idx]))

            key_idx += 1

        #--------------STEP 4: Read the filename list for different time period-------------------
        fname1, fname2 = [], []

        start_date = np.fromstring(inputSpec['start_date'],
                                   dtype=np.int,
                                   sep='/')
        end_date = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/')
        start = date(start_date[0], start_date[1], start_date[2])
        until = date(end_date[0], end_date[1], end_date[2])

        for dt in rrule(DAILY, interval=1, dtstart=start, until=until):
            year = np.int(dt.strftime("%Y"))
            month = np.int(dt.strftime("%m"))
            day = np.int(dt.strftime("%d"))

            data = datetime(year, month, day)
            daynew = data.toordinal()
            yearstart = datetime(year, 1, 1)
            day_yearstart = yearstart.toordinal()
            day_in_year = (daynew - day_yearstart) + 1

            yc = '%04i' % year
            dc = '%03i' % day_in_year

            fname_tmp1 = series.read_filelist(MYD06_dir, MYD06_prefix, yc, dc,
                                              fileformat)
            fname_tmp2 = series.read_filelist(MYD03_dir, MYD03_prefix, yc, dc,
                                              fileformat)
            fname1 = np.append(fname1, fname_tmp1)
            fname2 = np.append(fname2, fname_tmp2)
        print('***********year************')
        print(year, month)
        print('***********month************')

        filenum = np.arange(len(fname1))
        print(len(fname1))

        #--------------STEP 5: Read Attributes of each variables----------------------------------
        unit_list = []
        scale_list = []
        offst_list = []
        longname_list = []
        fillvalue_list = []

        ncfile = Dataset(fname1[0], 'r')

        # Read the User-defined variables from MYD06 product
        tmp_idx = 0
        for key in varnames:
            if key == 'Cloud_Fraction':
                name_idx = tmp_idx
                continue  #Ignoreing Cloud_Fraction from the input file
            else:
                #tmp_data,lonam,unit,fill,scale,offst = series.readEntry(key,ncfile,spl_num)
                tmp_data, lonam = series.readEntry(key, ncfile, spl_num)
                #unit_list  = np.append(unit_list,unit)
                #scale_list = np.append(scale_list,scale)
                #offst_list = np.append(offst_list,offst)
                longname_list = np.append(longname_list, lonam)
                #fillvalue_list = np.append(fillvalue_list, fill)
                tmp_idx += 1

        # Add the long name of cloud freaction at the first row
        CM_unit = 'none'
        CM_longname = 'Cloud Fraction from Cloud Mask (cloudy & prob cloudy)'
        CM_fillvalue = -9999
        CM_scale_factor = 0.0001
        CM_add_offset = 0.0
        #unit_list      = np.insert(unit_list,      name_idx, CM_unit)
        #scale_list     = np.insert(scale_list,     name_idx, CM_scale_factor)
        #offst_list     = np.insert(offst_list,     name_idx, CM_add_offset)
        longname_list = np.insert(longname_list, name_idx, CM_longname)
        #fillvalue_list = np.insert(fillvalue_list, name_idx, CM_fillvalue)

        ncfile.close()
        #--------------STEP 6: Start Aggregation------------------------------------------------

        print('***********calling operate************')

        xds = self.operate(fname1,fname2,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \
                                    grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \
                                    sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \
                                    longname_list, output_dir, output_prefix)

        #resultDataset = xa.DataArray(xds, name='test')
        return TaskResult(kwargs, [xds])