def getResult(self, **kwargs) -> Optional[TaskResult]: raiseErrors = kwargs.get("raiseErrors", False) type = kwargs.get("type", "file") block = kwargs.get("block") self._exception = None if block: self.waitUntilReady() self.status() self.logger.info( f"GetResult[{type}]-> STATUS: {self._status}, args: {kwargs}") if self._status == Status.ERROR: self.logger.error(" *** Remote execution error: " + self._statMessage) self._exception = Exception(self._statMessage) if raiseErrors: raise self._exception return None elif self._status == Status.COMPLETED: if type == "file": filePath = self.cacheDir + "/" + self.fileUrl.split( '=')[-1] + ".nc" self.wpsRequest.downloadFile(filePath, self.fileUrl) self.logger.info( f"Downloaded result file using '{self.fileUrl}' to '{filePath}'" ) return TaskResult( dict(file=filePath, rid=self.rid, cid=self.cid)) else: xarray: xa.Dataset = self.wpsRequest.downloadData(self.dataUrl) self.logger.info( f"Downloaded result data using '{self.dataUrl}'") return TaskResult( { **self._parms, "rid": self.rid, "cid": self.cid }, [xarray])
def getResult(self, timeout=None, block=False) -> Optional[TaskResult]: edasResults: List[EDASDataset] = self.results.get(block, timeout) for edasResult in edasResults: if edasResult.getResultClass() == "METADATA": return TaskResult(edasResult.attrs, []) xaResults: Iterable[xa.Dataset] = itertools.chain.from_iterable( [edasResult.xr for edasResult in edasResults]) return TaskResult(self._parms, list(xaResults))
def getResult(self, **kwargs) -> Optional[TaskResult]: status = self.status() header = {} block = kwargs.get("block") if block: self.waitUntilReady() if status == Status.ERROR: for ex in self.execution.errors: header[f"Error-{ex.code}"] = ex.text self.logger.error( 'WPS Execution Error: code=%s, locator=%s, text=%s' % (ex.code, ex.locator, ex.text)) return TaskResult(header) elif status == Status.EXECUTING: return None elif status == Status.COMPLETED: type = kwargs.get("type", "file") for output in self.execution.processOutputs: output_content = output.retrieveData( self.execution.username, self.execution.password, headers=self.execution.headers, verify=self.execution.verify, cert=self.execution.cert) header.update(self.execution.headers) header["Reference"] = output.reference header["FileName"] = output.fileName if type == "file": filepath = f"{self.cacheDir}/{output.fileName}" out = open(filepath, 'wb') if output_content is b'' and len(output.data) > 0: for data in output.data: output_content = output_content + data out.write(output_content) out.close() self.logger.info('Output written to file: %s' % filepath) else: if output_content is b'' and len(output.data) > 0: results = [ pickle.loads(data, encoding="bytes") for data in output.data ] return TaskResult(header, results) else: results = [ pickle.loads(output_content, encoding="bytes") ] return TaskResult(header, results) return None
def getResult(self, **kwargs) -> Optional[TaskResult]: timeout = kwargs.get("timeout") block = kwargs.get("block") rid = kwargs.get("rid") if block: self.waitUntilReady(rid, timeout) result = self.getMessage("result", dict(rid=rid)) rtype = result["type"] self.active_requests.remove(rid) if rtype == "error": raise Exception(result["message"]) elif rtype == "json": return TaskResult({"rid": rid, "cid": self.cid, **result["json"]}) elif rtype == "data": return result.get("content", None) else: raise Exception(f"Unrecognized result type: {rtype}")
def execute(self, **kwargs) -> TaskResult: """ Executes the operation. Creates an Executable for each analytics operation The operation request is available as self.request. The operation inputs are available as self.inputs. Returns: TaskResult: The result of the operation. """ print(f"Executing request {self.request}") inputSpec = self.request.get('input', []) dset: xa.Dataset = xa.open_dataset(inputSpec['filename']) vid = inputSpec['name'] variable: xa.DataArray = dset[vid] result_arrays = self.operate(vid, variable) return TaskResult(kwargs, [result_arrays])
def execute(self, **kwargs) -> TaskResult: """ Executes the operation. Creates an Executable for each analytics operation The operation request is available as self.request. The operation inputs are available as self.inputs. Returns: TaskResult: The result of the operation. """ print(f"Executing request {self.request}") inputSpec = self.request.get('input', []) M03_dir = inputSpec['path1'] M06_dir = inputSpec['path2'] M03_files = sorted(glob.glob(M03_dir + "MYD03.A2008*")) M06_files = sorted(glob.glob(M06_dir + "MYD06_L2.A2008*")) cf = self.operate(M03_files, M06_files) resultDataset = xa.DataArray(cf.tolist(), name='test') return TaskResult(kwargs, [resultDataset])
def getResult(self, **kwargs) -> TaskResult: results = [] for tid, tfuture in self.futures.items(): result = tfuture.getResult(**kwargs) if results is not None: results.append(result) return TaskResult.merge(results)
def execute(self, **kwargs) -> TaskResult: """ Executes the operation. Creates an Executable for each analytics operation The operation request is available as self.request. The operation inputs are available as self.inputs. Returns: TaskResult: The result of the operation. """ print(f"Executing request {self.request}") # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \ # <Polygon boundaries> <Lat & Lon Grid Size > \ # <Sampling number larger than 0> \ # <1/0> <1/0> <1/0> \ # <1/0> <1/0> <1/0> \ # <1/0> <Variable Imput File> <JHist Variable Imput File>") inputSpec = self.request.get('input', []) spl_num = np.int(inputSpec['spl_num'][1:-1]) poly = np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',') grid = np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',') # Define the statistics names for HDF5 output sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \ 'Standard_Deviation','Histogram_Counts','Jhisto_vs_'] # Pass system arguments to the function # sts_switch = np.array(sys.argv[7:14],dtype=np.int) sts_switch = np.fromstring(inputSpec['sts_switch'], dtype=np.int, sep=',') sts_switch = np.array((sts_switch == 1)) #varlist = inputSpec['varlist'] # Read the variable names from the variable name list #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['varlist']))) varnames = text_file[:, 0] if sts_switch[5] == True: intervals_1d = text_file[:, 1] # This is a string interval arrays else: intervals_1d = [0] if sts_switch[6] == True: # Read the joint histogram names from the variable name list #jvarlist = inputSpec['jvarlist'] #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array( pd.DataFrame.from_dict(json.loads( inputSpec['jvarlist']))) #open(varlist, "r") histnames = text_file[:, 1] var_idx = text_file[:, 2] #This is the index of the input variable name which is used for 2D histogram intervals_2d = text_file[:, 3] else: intervals_2d, var_idx = [0], [0] #-------------STEP 1: Set up the specific directory -------- # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True)) data_path_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file']))) if data_path_file is not None: MYD06_dir = data_path_file[ 0, 0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/' MYD06_prefix = data_path_file[0, 1] #'MYD06_L2.A' MYD03_dir = data_path_file[ 1, 0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/' MYD03_prefix = data_path_file[1, 1] #'MYD03.A' else: #autodownloading the MYD03 and MYD06 files from the website. start = time.time() M03_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD03/2008/001/" M06_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD06_L2/2008/001/" M03_localpath = "../new_data/MYD03" M06_localpath = "../new_data/MYD06_L2" token = "F43033A6-B1DB-11EA-9C3C-E3E73909D347" if not os.path.exists(M03_localpath): os.makedirs(M03_localpath) if not os.path.exists(M06_localpath): os.makedirs(M06_localpath) sync(M03_source_url, M03_localpath, token) sync(M06_source_url, M06_localpath, token) print("\n\nCompleted download in " + str(time.time() - start) + " seconds") MYD03_dir = M03_localpath MYD03_prefix = "MYD03.A" MYD06_dir = M06_localpath MYD06_prefix = "MYD06_L2.A" fileformat = 'hdf' # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True)) output_path_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['output_path']))) output_dir = output_path_file[0, 0] output_prefix = output_path_file[0, 1] #-------------STEP 2: Set up spactial and temporal resolution & variable names---------- NTA_lats = [poly[0], poly[1]] #[ 0,40] #[-90,90] #[-30,30] NTA_lons = [poly[2], poly[3]] #[-40,60] #[-180,180] #[-60,60] gap_x, gap_y = grid[1], grid[0] #0.5,0.625 if ((NTA_lons[-1] - NTA_lons[0]) % gap_x != 0) | ( (NTA_lats[-1] - NTA_lats[0]) % gap_y != 0): print( "Grid size should be dividable by the dimension of the selected region." ) print( "If you choose the region of latitude from -40 to 40, then you gird size (Latitude ) should be dividable by 80." ) print( "If you choose the region of longitude from 20 to 35, then you gird size (Longitude) should be dividable by 55." ) print("Please try again!") sys.exit() map_lon = np.arange(NTA_lons[0], NTA_lons[1], gap_x) map_lat = np.arange(NTA_lats[0], NTA_lats[1], gap_y) Lon, Lat = np.meshgrid(map_lon, map_lat) grid_lon = np.int((NTA_lons[-1] - NTA_lons[0]) / gap_x) grid_lat = np.int((NTA_lats[-1] - NTA_lats[0]) / gap_y) #--------------STEP 3: Create arrays for level-3 statistics data------------------------- t_grid_data = {} bin_num1 = np.zeros(len(varnames)).astype(np.int) bin_num2 = np.zeros(len(varnames)).astype(np.int) key_idx = 0 for key in varnames: if sts_switch[0] == True: t_grid_data[key + '_' + sts_name[0]] = np.zeros( grid_lat * grid_lon) + np.inf if sts_switch[1] == True: t_grid_data[key + '_' + sts_name[1]] = np.zeros( grid_lat * grid_lon) - np.inf if (sts_switch[2] == True) | (sts_switch[3] == True) | (sts_switch[4] == True): t_grid_data[key + '_' + sts_name[2]] = np.zeros(grid_lat * grid_lon) t_grid_data[key + '_' + sts_name[3]] = np.zeros(grid_lat * grid_lon) t_grid_data[key + '_' + sts_name[4]] = np.zeros(grid_lat * grid_lon) if sts_switch[5] == True: bin_interval1 = np.fromstring(intervals_1d[key_idx], dtype=np.float, sep=',') bin_num1[key_idx] = bin_interval1.shape[0] - 1 t_grid_data[key + '_' + sts_name[5]] = np.zeros( (grid_lat * grid_lon, bin_num1[key_idx])) if sts_switch[6] == True: bin_interval2 = np.fromstring(intervals_2d[key_idx], dtype=np.float, sep=',') bin_num2[key_idx] = bin_interval2.shape[0] - 1 t_grid_data[key + '_' + sts_name[6] + histnames[key_idx]] = np.zeros( (grid_lat * grid_lon, bin_num1[key_idx], bin_num2[key_idx])) key_idx += 1 # Sort the dictionary by alphabetizing t_grid_data = OrderedDict( sorted(t_grid_data.items(), key=lambda x: x[0])) print("t_grid_data") print(t_grid_data) print("Output Variables for Level-3 File:") for key in t_grid_data: print(key) # Read the filename list for different time period fname1, fname2 = [], [] start_date = np.fromstring(inputSpec['start_date'], dtype=np.int, sep='/') end_date = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/') start = date(start_date[0], start_date[1], start_date[2]) until = date(end_date[0], end_date[1], end_date[2]) for dt in rrule(DAILY, interval=1, dtstart=start, until=until): year = np.int(dt.strftime("%Y")) month = np.int(dt.strftime("%m")) day = np.int(dt.strftime("%d")) data = datetime(year, month, day) daynew = data.toordinal() yearstart = datetime(year, 1, 1) day_yearstart = yearstart.toordinal() day_in_year = (daynew - day_yearstart) + 1 yc = '%04i' % year dc = '%03i' % day_in_year fname_tmp1 = series.read_filelist(MYD06_dir, MYD06_prefix, yc, dc, fileformat) fname_tmp2 = series.read_filelist(MYD03_dir, MYD03_prefix, yc, dc, fileformat) fname1 = np.append(fname1, fname_tmp1) fname2 = np.append(fname2, fname_tmp2) print('***********year************') print(year, month) print('***********month************') filenum = np.arange(len(fname1)) print(len(fname1)) #--------------STEP 6: Start Aggregation------------------------------------------------ print('***********calling operate************') xds = self.operate(fname1,fname2,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \ t_grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \ sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \ output_dir, output_prefix) #resultDataset = xa.DataArray(xds, name='test') return TaskResult(kwargs, [xds])
def cacheResult(self, header: Dict, data: Optional[xa.Dataset]): self.logger.info("Caching result: " + str(header)) dataList = [] if data is None else [data] self.cached_results.put(TaskResult(header, dataList))
def execute(self, **kwargs) -> TaskResult: """ Executes the operation. Creates an Executable for each analytics operation The operation request is available as self.request. The operation inputs are available as self.inputs. Returns: TaskResult: The result of the operation. """ print(f"Executing request {self.request}") # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \ # <Polygon boundaries> <Lat & Lon Grid Size > \ # <Sampling number larger than 0> \ # <1/0> <1/0> <1/0> \ # <1/0> <1/0> <1/0> \ # <1/0> <Variable Imput File> <JHist Variable Imput File>") inputSpec = self.request.get('input', []) num_nodes = np.int(inputSpec['num_nodes']) spl_num = np.int(inputSpec['spl_num'][1:-1]) poly=np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',' ) grid=np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',' ) # Define the statistics names for HDF5 output sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \ 'Standard_Deviation','Histogram_Counts','Jhisto_vs_'] # Pass system arguments to the function # sts_switch = np.array(sys.argv[7:14],dtype=np.int) sts_switch = np.fromstring(inputSpec['sts_switch'],dtype=np.int, sep=',') sts_switch = np.array((sts_switch == 1)) #varlist = inputSpec['varlist'] # Read the variable names from the variable name list #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['varlist']))) varnames = text_file[:,0] if sts_switch[5] == True: intervals_1d = text_file[:,1] # This is a string interval arrays else: intervals_1d = [0] if sts_switch[6] == True: # Read the joint histogram names from the variable name list #jvarlist = inputSpec['jvarlist'] #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['jvarlist']))) #open(varlist, "r") histnames = text_file[:,1] var_idx = text_file[:,2] #This is the index of the input variable name which is used for 2D histogram intervals_2d = text_file[:,3] else: intervals_2d,var_idx = [0],[0] #-------------STEP 1: Set up the specific directory -------- # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True)) data_path_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file']))) if data_path_file is not None: MYD06_dir = data_path_file[0,0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/' MYD06_prefix = data_path_file[0,1] #'MYD06_L2.A' MYD03_dir = data_path_file[1,0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/' MYD03_prefix = data_path_file[1,1] #'MYD03.A' else: #autodownloading the MYD03 and MYD06 files from the website. start = time.time() M03_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD03/2008/001/" M06_source_url = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD06_L2/2008/001/" M03_localpath = "../new_data/MYD03" M06_localpath = "../new_data/MYD06_L2" token = "F43033A6-B1DB-11EA-9C3C-E3E73909D347" if not os.path.exists(M03_localpath): os.makedirs(M03_localpath) if not os.path.exists(M06_localpath): os.makedirs(M06_localpath) sync(M03_source_url, M03_localpath, token) sync(M06_source_url, M06_localpath, token) print("\n\nCompleted download in " + str(time.time() - start) + " seconds") MYD03_dir = M03_localpath MYD03_prefix = "MYD03.A" MYD06_dir = M06_localpath MYD06_prefix = "MYD06_L2.A" fileformat = 'hdf' # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True)) output_path_file = np.array(pd.DataFrame.from_dict(json.loads(inputSpec['output_path']))) output_dir = output_path_file[0,0] output_prefix = output_path_file[0,1] #-------------STEP 2: Set up spactial and temporal resolution & variable names---------- NTA_lats = [poly[0],poly[1]] #[ 0,40] #[-90,90] #[-30,30] NTA_lons = [poly[2],poly[3]] #[-40,60] #[-180,180] #[-60,60] gap_x, gap_y = grid[1],grid[0] #0.5,0.625 if ((NTA_lons[-1]-NTA_lons[0])%gap_x != 0) | ((NTA_lats[-1]-NTA_lats[0])%gap_y != 0): print("## ERROR!!!") print("Grid size should be dividable by the dimension of the selected region.") print("If you choose the region of latitude from -40 to 40, then you gird size (Latitude ) should be dividable by 80.") print("If you choose the region of longitude from 20 to 35, then you gird size (Longitude) should be dividable by 55.") print("Please try again!") sys.exit() map_lon = np.arange(NTA_lons[0],NTA_lons[1],gap_x) map_lat = np.arange(NTA_lats[0],NTA_lats[1],gap_y) Lon,Lat = np.meshgrid(map_lon,map_lat) grid_lon=np.int((NTA_lons[-1]-NTA_lons[0])/gap_x) grid_lat=np.int((NTA_lats[-1]-NTA_lats[0])/gap_y) #--------------STEP 3: Create arrays for level-3 statistics data------------------------- grid_data = {} bin_num1 = np.zeros(len(varnames)).astype(np.int) bin_num2 = np.zeros(len(varnames)).astype(np.int) key_idx = 0 for key in varnames: if sts_switch[0] == True: grid_data[key+'_'+sts_name[0]] = np.zeros(grid_lat*grid_lon) + np.inf if sts_switch[1] == True: grid_data[key+'_'+sts_name[1]] = np.zeros(grid_lat*grid_lon) - np.inf if (sts_switch[2] == True) | (sts_switch[3] == True) | (sts_switch[4] == True): grid_data[key+'_'+sts_name[2]] = np.zeros(grid_lat*grid_lon) grid_data[key+'_'+sts_name[3]] = np.zeros(grid_lat*grid_lon) grid_data[key+'_'+sts_name[4]] = np.zeros(grid_lat*grid_lon) if sts_switch[5] == True: bin_interval1 = np.fromstring(intervals_1d[key_idx], dtype=np.float, sep=',' ) bin_num1[key_idx] = bin_interval1.shape[0]-1 grid_data[key+'_'+sts_name[5]] = np.zeros((grid_lat*grid_lon,bin_num1[key_idx])) if sts_switch[6] == True: bin_interval2 = np.fromstring(intervals_2d[key_idx], dtype=np.float, sep=',' ) bin_num2[key_idx] = bin_interval2.shape[0]-1 grid_data[key+'_'+sts_name[6]+histnames[key_idx]] = np.zeros((grid_lat*grid_lon,bin_num1[key_idx],bin_num2[key_idx])) key_idx += 1 #--------------STEP 4: Read the filename list for different time period------------------- fname1,fname2 = [],[] start_date = np.fromstring(inputSpec['start_date'], dtype=np.int, sep='/' ) end_date = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/' ) start = date(start_date[0], start_date[1], start_date[2]) until = date(end_date[0], end_date[1], end_date[2]) for dt in rrule(DAILY, interval=1, dtstart=start, until=until): year = np.array([np.int(dt.strftime("%Y"))]) month = np.array([np.int(dt.strftime("%m"))]) day = np.array([np.int(dt.strftime("%d"))]) time = np.arange(24) #np.int(dt.strftime("%H")) daynew = dt.toordinal() yearstart = datetime(year,1,1) yearend = calendar.monthrange(year, 12)[1] day_yearstart = yearstart.toordinal() day_yearend = datetime(year,12,yearend).toordinal() day_in_year = np.array([(daynew-day_yearstart)+1]) end_in_year = np.array([(day_yearend-day_yearstart)+1]) # Adjust to 3 hours previous/after the End Date for the orbit gap/overlap problem if (dt.year == until.year) & (dt.month == until.month) & (dt.day == until.day): shift_hour = 3 time = np.append(np.arange(24),np.arange(shift_hour)) year = [year[0],year[0]] day_in_year = [day_in_year[0],day_in_year[0] + 1] if day_in_year[1] > end_in_year: year[1] -= 1 yearstart = datetime(year[1],1,1) yearend = datetime(year[1],12,31) day_yearstart = yearstart.toordinal() day_yearend = yearend.toordinal() day_in_year[1] = (day_yearend-day_yearstart)+1 # Start reading Level-2 files fname_tmp1,fname_tmp2 = series.read_filelist(MYD06_dir,MYD06_prefix,MYD03_dir,MYD03_prefix,year,day_in_year,time,fileformat) fname1 = np.append(fname1,fname_tmp1) fname2 = np.append(fname2,fname_tmp2) #print(fname1.shape,fname2.shape) filenum = np.arange(len(fname1)) #print(len(fname1)) #--------------STEP 5: Read Attributes of each variables---------------------------------- unit_list = [] scale_list = [] offst_list = [] longname_list = [] fillvalue_list = [] ncfile=Dataset(fname1[0],'r') # Read the User-defined variables from MYD06 product tmp_idx = 0 for key in varnames: if key == 'cloud_fraction': name_idx = tmp_idx continue #Ignoreing Cloud_Fraction from the input file else: tmp_data,data_dim,lonam,unit,fill,scale,offst = series.readEntry(key,ncfile,spl_num) unit_list = np.append(unit_list,unit) scale_list = np.append(scale_list,scale) offst_list = np.append(offst_list,offst) longname_list = np.append(longname_list, lonam) fillvalue_list = np.append(fillvalue_list, fill) tmp_idx += 1 # Add the long name of cloud freaction at the first row CM_unit = 'none' CM_longname = 'Cloud Fraction from Cloud Mask (cloudy & prob cloudy)' CM_fillvalue = -9999 CM_scale_factor = 0.0001 CM_add_offset = 0.0 unit_list = np.insert(unit_list, name_idx, CM_unit) scale_list = np.insert(scale_list, name_idx, CM_scale_factor) offst_list = np.insert(offst_list, name_idx, CM_add_offset) longname_list = np.insert(longname_list, name_idx, CM_longname) fillvalue_list = np.insert(fillvalue_list, name_idx, CM_fillvalue) ncfile.close() #--------------STEP 6: Start Aggregation------------------------------------------------ xds = self.operate(fname1,fname2,day_in_year,shift_hour,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \ grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \ sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \ unit_list, scale_list, offst_list, longname_list, fillvalue_list,output_dir, output_prefix, num_nodes) #resultDataset = xa.DataArray(xds, name='test') return TaskResult(kwargs, [xds])
def execute(self, **kwargs) -> TaskResult: """ Executes the operation. Creates an Executable for each analytics operation The operation request is available as self.request. The operation inputs are available as self.inputs. Returns: TaskResult: The result of the operation. """ print(f"Executing request {self.request}") # python3 baseline_series_v8.py data_path.csv 2008/01/01 2008/01/01 [-90,90,-180,180] [1,1] [5] 1 1 1 1 1 1 1 input_file.csv input_Jhist.csv # print("usage: python aggre_stats_mpi.py <Data Path> <Start Date> <End Date> \ # <Polygon boundaries> <Lat & Lon Grid Size > \ # <Sampling number larger than 0> \ # <1/0> <1/0> <1/0> \ # <1/0> <1/0> <1/0> \ # <1/0> <Variable Imput File> <JHist Variable Imput File>") inputSpec = self.request.get('input', []) spl_num = np.int(inputSpec['spl_num'][1:-1]) poly = np.fromstring(inputSpec['poly'][1:-1], dtype=np.int, sep=',') grid = np.fromstring(inputSpec['grid'][1:-1], dtype=np.float, sep=',') # Define the statistics names for HDF5 output sts_name = ['Minimum','Maximum','Mean','Pixel_Counts', \ 'Standard_Deviation','Histogram_Counts','Jhisto_vs_'] # Pass system arguments to the function # sts_switch = np.array(sys.argv[7:14],dtype=np.int) sts_switch = np.fromstring(inputSpec['sts_switch'], dtype=np.int, sep=',') sts_switch = np.array((sts_switch == 1)) #varlist = inputSpec['varlist'] # Read the variable names from the variable name list #text_file = np.array(pd.read_csv(varlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['varlist']))) varnames = text_file[:, 0] if sts_switch[5] == True: intervals_1d = text_file[:, 1] # This is a string interval arrays else: intervals_1d = [0] if sts_switch[6] == True: # Read the joint histogram names from the variable name list #jvarlist = inputSpec['jvarlist'] #text_file = np.array(pd.read_csv(jvarlist, header=0, delim_whitespace=True)) #open(varlist, "r") text_file = np.array( pd.DataFrame.from_dict(json.loads( inputSpec['jvarlist']))) #open(varlist, "r") histnames = text_file[:, 1] var_idx = text_file[:, 2] #This is the index of the input variable name which is used for 2D histogram intervals_2d = text_file[:, 3] else: intervals_2d, var_idx = [0], [0] #-------------STEP 1: Set up the specific directory -------- # data_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=0, delim_whitespace=True)) data_path_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['data_path_file']))) MYD06_dir = data_path_file[ 0, 0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD06_L2/' MYD06_prefix = data_path_file[0, 1] #'MYD06_L2.A' MYD03_dir = data_path_file[ 1, 0] #'/umbc/xfs1/cybertrn/common/Data/Satellite_Observations/MODIS/MYD03/' MYD03_prefix = data_path_file[1, 1] #'MYD03.A' fileformat = 'hdf' # output_path_file = np.array(pd.read_csv(inputSpec['data_path_file'], header=3, delim_whitespace=True)) output_path_file = np.array( pd.DataFrame.from_dict(json.loads(inputSpec['output_path']))) output_dir = output_path_file[0, 0] output_prefix = output_path_file[0, 1] #-------------STEP 2: Set up spactial and temporal resolution & variable names---------- NTA_lats = [poly[0], poly[1]] #[ 0,40] #[-90,90] #[-30,30] NTA_lons = [poly[2], poly[3]] #[-40,60] #[-180,180] #[-60,60] gap_x, gap_y = grid[1], grid[0] #0.5,0.625 if ((NTA_lons[-1] - NTA_lons[0]) % gap_x != 0) | ( (NTA_lats[-1] - NTA_lats[0]) % gap_y != 0): print( "Grid size should be dividable by the dimension of the selected region." ) print( "If you choose the region of latitude from -40 to 40, then you gird size (Latitude ) should be dividable by 80." ) print( "If you choose the region of longitude from 20 to 35, then you gird size (Longitude) should be dividable by 55." ) print("Please try again!") sys.exit() map_lon = np.arange(NTA_lons[0], NTA_lons[1], gap_x) map_lat = np.arange(NTA_lats[0], NTA_lats[1], gap_y) Lon, Lat = np.meshgrid(map_lon, map_lat) grid_lon = np.int((NTA_lons[-1] - NTA_lons[0]) / gap_x) grid_lat = np.int((NTA_lats[-1] - NTA_lats[0]) / gap_y) #--------------STEP 3: Create arrays for level-3 statistics data------------------------- grid_data = {} bin_num1 = np.zeros(len(varnames)).astype(np.int) bin_num2 = np.zeros(len(varnames)).astype(np.int) key_idx = 0 for key in varnames: if sts_switch[0] == True: grid_data[key + '_' + sts_name[0]] = np.zeros(grid_lat * grid_lon) + np.inf if sts_switch[1] == True: grid_data[key + '_' + sts_name[1]] = np.zeros(grid_lat * grid_lon) - np.inf if (sts_switch[2] == True) | (sts_switch[3] == True) | (sts_switch[4] == True): grid_data[key + '_' + sts_name[2]] = np.zeros(grid_lat * grid_lon) grid_data[key + '_' + sts_name[3]] = np.zeros(grid_lat * grid_lon) grid_data[key + '_' + sts_name[4]] = np.zeros(grid_lat * grid_lon) if sts_switch[5] == True: bin_interval1 = np.fromstring(intervals_1d[key_idx], dtype=np.float, sep=',') bin_num1[key_idx] = bin_interval1.shape[0] - 1 grid_data[key + '_' + sts_name[5]] = np.zeros( (grid_lat * grid_lon, bin_num1[key_idx])) if sts_switch[6] == True: bin_interval2 = np.fromstring(intervals_2d[key_idx], dtype=np.float, sep=',') bin_num2[key_idx] = bin_interval2.shape[0] - 1 grid_data[key + '_' + sts_name[6] + histnames[key_idx]] = np.zeros( (grid_lat * grid_lon, bin_num1[key_idx], bin_num2[key_idx])) key_idx += 1 #--------------STEP 4: Read the filename list for different time period------------------- fname1, fname2 = [], [] start_date = np.fromstring(inputSpec['start_date'], dtype=np.int, sep='/') end_date = np.fromstring(inputSpec['end_date'], dtype=np.int, sep='/') start = date(start_date[0], start_date[1], start_date[2]) until = date(end_date[0], end_date[1], end_date[2]) for dt in rrule(DAILY, interval=1, dtstart=start, until=until): year = np.int(dt.strftime("%Y")) month = np.int(dt.strftime("%m")) day = np.int(dt.strftime("%d")) data = datetime(year, month, day) daynew = data.toordinal() yearstart = datetime(year, 1, 1) day_yearstart = yearstart.toordinal() day_in_year = (daynew - day_yearstart) + 1 yc = '%04i' % year dc = '%03i' % day_in_year fname_tmp1 = series.read_filelist(MYD06_dir, MYD06_prefix, yc, dc, fileformat) fname_tmp2 = series.read_filelist(MYD03_dir, MYD03_prefix, yc, dc, fileformat) fname1 = np.append(fname1, fname_tmp1) fname2 = np.append(fname2, fname_tmp2) print('***********year************') print(year, month) print('***********month************') filenum = np.arange(len(fname1)) print(len(fname1)) #--------------STEP 5: Read Attributes of each variables---------------------------------- unit_list = [] scale_list = [] offst_list = [] longname_list = [] fillvalue_list = [] ncfile = Dataset(fname1[0], 'r') # Read the User-defined variables from MYD06 product tmp_idx = 0 for key in varnames: if key == 'Cloud_Fraction': name_idx = tmp_idx continue #Ignoreing Cloud_Fraction from the input file else: #tmp_data,lonam,unit,fill,scale,offst = series.readEntry(key,ncfile,spl_num) tmp_data, lonam = series.readEntry(key, ncfile, spl_num) #unit_list = np.append(unit_list,unit) #scale_list = np.append(scale_list,scale) #offst_list = np.append(offst_list,offst) longname_list = np.append(longname_list, lonam) #fillvalue_list = np.append(fillvalue_list, fill) tmp_idx += 1 # Add the long name of cloud freaction at the first row CM_unit = 'none' CM_longname = 'Cloud Fraction from Cloud Mask (cloudy & prob cloudy)' CM_fillvalue = -9999 CM_scale_factor = 0.0001 CM_add_offset = 0.0 #unit_list = np.insert(unit_list, name_idx, CM_unit) #scale_list = np.insert(scale_list, name_idx, CM_scale_factor) #offst_list = np.insert(offst_list, name_idx, CM_add_offset) longname_list = np.insert(longname_list, name_idx, CM_longname) #fillvalue_list = np.insert(fillvalue_list, name_idx, CM_fillvalue) ncfile.close() #--------------STEP 6: Start Aggregation------------------------------------------------ print('***********calling operate************') xds = self.operate(fname1,fname2,NTA_lats,NTA_lons,grid_lon,grid_lat,gap_x,gap_y,filenum, \ grid_data,sts_switch,varnames,intervals_1d,intervals_2d,var_idx, spl_num, \ sts_name, histnames, bin_num1, bin_num2, year, month, map_lat, map_lon, \ longname_list, output_dir, output_prefix) #resultDataset = xa.DataArray(xds, name='test') return TaskResult(kwargs, [xds])