def compute_indicator(args):

    # extract the arguments
    lon_index_start = args[0]
    lat_index = args[1]

    # turn the shared arrays into numpy arrays
    input_data = np.ctypeslib.as_array(input_shared_array)
    input_data = input_data.reshape(input_data_shape)
    output_data = np.ctypeslib.as_array(output_shared_array)
    output_data = output_data.reshape(output_data_shape)

    for lon_index in range(lons_per_chunk):

        # only process non-empty grid cells, i.e. input_data array contains at least some non-NaN values
        if (isinstance(input_data[:, lon_index, lat_index], np.ma.MaskedArray) and input_data[:, lon_index, lat_index].mask.all()) \
            or np.isnan(input_data[:, lon_index, lat_index]).all() or (input_data[:, lon_index, lat_index] <= 0).all():

#             logger.info('No input_data at lon/lat: {0}/{1}'.format(lon_index_start + lon_index, lat_index))
            pass

        else:  # we have some valid values to work with

            logger.info('Processing longitude/latitude: {}/{}'.format(lon_index_start + lon_index, lat_index))
    
            for scale_index, month_scale in enumerate(month_scales):
            
                # perform a fitting to gamma
                output_data[scale_index, :, lon_index, lat_index] = indices.spi_gamma(input_data[:, lon_index, lat_index],
                                                                                      month_scale,
                                                                                      valid_min,
                                                                                      valid_max)
Esempio n. 2
0
def compute_indicator_by_lons(input_dataset,
                              output_dataset,
                              input_var_name,
                              output_var_name,
                              month_scale,
                              valid_min, 
                              valid_max,
                              dim1_index,   # typically lon, for example with gridded datasets
                              dim2_index):  # typically lat, for example with gridded datasets
    
    # slice out the period of record for the longitude slice
    data = input_dataset.variables[input_var_name][:, dim1_index, :]
    
    # keep the original data shape, we'll use this to reshape later
    original_shape = input_dataset.variables[input_var_name].shape
    
    for dim2_index in range(input_dataset.variables[input_var_name].shape[2]):
        
        # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
        if isinstance(data[:, dim2_index], np.ma.MaskedArray) and data[:, dim2_index].mask.all():
    
            pass         
         
        else:  # we have some valid values to work with
    
            logger.info('Processing x/y {}/{}'.format(dim1_index, dim2_index))
    
            # perform a fitting to gamma     
            data[:, dim2_index] = indices.spi_gamma(data[:, dim2_index],
                                                    month_scale, 
                                                    valid_min, 
                                                    valid_max)
        
    # assign values for period of record to the longitude slice
    output_dataset.variables[output_var_name][:, dim1_index, :] = np.reshape(data, (original_shape[0], 1, original_shape[2]))
def compute_indicator_by_lons(input_dataset,
                              output_dataset,
                              input_var_name,
                              output_var_name,
                              month_scale,
                              valid_min, 
                              valid_max,
                              dim1_index,   # typically lon, for example with gridded datasets
                              dim2_index):  # typically lat, for example with gridded datasets
    
    # lock the thread when doing I/O
    lock.acquire()
    
    # slice out the period of record for the x/y point
    data = input_dataset.variables[input_var_name][:, dim1_index, :]
    
    # release the lock since we'll not share anything else until doing the I/O to the output dataset                        
    lock.release()
    
    # keep the original data shape, we'll use this to reshape later
    original_shape = input_dataset.variables[input_var_name].shape
    
    for dim2_index in range(input_dataset.variables[input_var_name].shape[2]):
        
        # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
        if isinstance(data[:, dim2_index], np.ma.MaskedArray) and data[:, dim2_index].mask.all():
    
            pass         
         
        else:  # we have some valid values to work with
    
            logger.info('Processing x/y {}/{}'.format(dim1_index, dim2_index))
    
            # perform a fitting to gamma     
            data[:, dim2_index] = indices.spi_gamma(data[:, dim2_index],
                                                    month_scale, 
                                                    valid_min, 
                                                    valid_max)
        
    # reacquire the thread lock for doing NetCDF I/O
    lock.acquire()
    
    # slice out the period of record for the x/y point
    output_dataset.variables[output_var_name][:, dim1_index, :] = np.reshape(data, (original_shape[0], 1, original_shape[2]))
    
    # release the lock since we'll not share anything else until doing the I/O to the output dataset                        
    lock.release()
    def compute_indicator(self):
         
        while True:
              
            # get a list of arguments from the queue
            arguments = self.queue.get()
              
            # if we didn't get one we keep looping
            if arguments is None:
                break
  
            # process the arguments here
            index = arguments[0] 
            month_scale = arguments[1] 
            valid_min = arguments[2] 
            valid_max = arguments[3] 
                 
            # turn the shared array into a numpy array     
            data = np.ctypeslib.as_array(self.shared_array)
            data = data.reshape(self.data_shape)
                
            # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
            if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \
                or np.isnan(data[:, index]).all() or (data[:, index] < 0).all():
             
                pass         
                  
            else:  # we have some valid values to work with
             
                logger.info('Processing latitude: {}'.format(index))
             
                # perform a fitting to gamma     
                fitted_values = indices.spi_gamma(data[:, index],
                                                  month_scale, 
                                                  valid_min, 
                                                  valid_max)
 
                # update the shared array
                data[:, index] = fitted_values
                
            # indicate that the task has completed
            self.queue.task_done()
def compute_indicator(args):
         
    # extract the arguments
    index = args[0]
    
    # turn the shared array into a numpy array
    data = np.ctypeslib.as_array(shared_array)
    data = data.reshape(data_shape)
                 
    # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
    if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \
        or np.isnan(data[:, index]).all() or (data[:, index] <= 0).all():
              
        pass         
                   
    else:  # we have some valid values to work with
              
        logger.info('Processing latitude: {}'.format(index))
              
        # perform a fitting to gamma     
        data[:, index] = indices.spi_gamma(data[:, index],
                                           month_scale, 
                                           valid_min, 
                                           valid_max)
                spi_gamma_datasets[variable_name_spi_gamma] = spi_gamma_dataset
                
            # loop over the grid cells
            for x in range(precip_dataset.variables[x_dim_name].size):
                for y in range(precip_dataset.variables[y_dim_name].size):

                    logger.info('Processing x/y {}/{}'.format(x, y))
                    
                    # slice out the period of record for the x/y point
                    precip_data = precip_dataset.variables[precip_var_name][:, x, y]
                                           
                    # only process non-empty grid cells, i.e. the data array contains at least some non-NaN values
                    if (isinstance(precip_data, np.ma.MaskedArray)) and precip_data.mask.all():
                        
                            continue
                    
                    else:  # we have some valid values to work with
                        
                        for month_scale_index, month_scale_var_name in enumerate(sorted(spi_gamma_datasets.keys())):

                            # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset
                            spi_gamma_datasets[month_scale_var_name].variables[month_scale_var_name][:, x, y] = \
                                indices.spi_gamma(precip_data,
                                                  month_scales[month_scale_index], 
                                                  valid_min, 
                                                  valid_max)
            
    except Exception, e:
        logger.error('Failed to complete', exc_info=True)
        raise
Esempio n. 7
0
import sys
sys.path.insert(0, "/Users/marcoventurini/Downloads/indices_rc1")
import compute
import indices as ind
import pandas as pd
import json
import numpy as np
from scipy.stats import norm

df = pd.read_csv(
    '/Users/marcoventurini/Documents/spark-2.0.0-bin-hadoop2.7/data/MonthlyPrp_lat-14_lon35.csv'
)
arrayD = np.asarray(df.PrpSummed)

arraySPI3 = ind.spi_gamma(arrayD, 3)
df['SPI3'] = np.NaN
for index, row in df.iterrows():
    df.loc[index, 'SPI3'] = arraySPI3[index]

arraySPI12 = ind.spi_gamma(arrayD, 12)
df['SPI12'] = np.NaN
for index, row in df.iterrows():
    df.loc[index, 'SPI12'] = arraySPI12[index]

df.to_csv(
    '/Users/marcoventurini/Documents/spark-2.0.0-bin-hadoop2.7/SPIshort.csv',
    columns=['Year', 'Month', 'SPI3', 'SPI12'],
    index=False)
Esempio n. 8
0
def compute_worker(args):
         
    # extract the arguments
    lat_index = args[0]
    
    # turn the shared array into a numpy array
    data = np.ctypeslib.as_array(shared_array)
    data = data.reshape(data_shape)
                 
    # data now expected to be in shape: (indicators, distributions, month_scales, times, lats)
    #
    # with indicator (spi: 0,  spei: 1)
    #      distribution (gamma: 0,  pearson: 1)
    #      month_scales (0, month_scales)
    #
    # with data[0, 0, 0] indicating the longitude slice with shape: (times, lats) with values for precipitation 
    # with data[1, 0, 0] indicating the longitude slice with shape: (times, lats) with values for temperature 
    
    # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
    if (isinstance(data[0, 0, 0, :, lat_index], np.ma.MaskedArray) and data[0, 0, 0, :, lat_index].mask.all()) \
        or np.isnan(data[0, 0, 0, :, lat_index]).all() or (data[0, 0, 0, :, lat_index] <= 0).all():
              
        pass         
                   
    else:  # we have some valid values to work with
              
        logger.info('Processing latitude: {}'.format(lat_index))
              
        for month_index, month_scale in enumerate(month_scales):
            
            # only process month scales after 0 since month_scale = 0 is reserved for the input data 
            if month_index > 0:
                
                # loop over all specified indicators
                for indicator in indicators:

                    # loop over all specified distributions
                    for distribution in distributions:
                        
                        if indicator == 'spi':
                            
                            if distribution == 'gamma':
                                    # perform a fitting to gamma     
                                    data[0, 0, month_index, :, lat_index] = indices.spi_gamma(data[0, 0, 0, :, lat_index],
                                                                                              month_scale, 
                                                                                              valid_min, 
                                                                                              valid_max)
                            elif distribution == 'pearson':
                                    # perform a fitting to Pearson type III     
                                    data[0, 1, month_index, :, lat_index] = indices.spi_pearson(data[0, 0, 0, :, lat_index], 
                                                                                                month_scale, 
                                                                                                valid_min, 
                                                                                                valid_max, 
                                                                                                data_start_year, 
                                                                                                data_end_year, 
                                                                                                calibration_start_year, 
                                                                                                calibration_end_year)
    
                        elif indicator == 'spei':
                            
                            if distribution == 'gamma':
                                    # perform a fitting to gamma     
                                    data[1, 0, month_index, :, lat_index] = indices.spei_gamma(data[0, 0, 0, :, lat_index],
                                                                                               data[0, 0, 1, :, lat_index],
                                                                                               data_start_year,
                                                                                               lats_array[lat_index],
                                                                                               month_scale, 
                                                                                               valid_min, 
                                                                                               valid_max)
                            elif distribution == 'pearson':
                                    # perform a fitting to Pearson type III     
                                    data[1, 1, month_index, :, lat_index] = indices.spei_pearson(data[0, 0, 0, :, lat_index],
                                                                                                 data[0, 0, 1, :, lat_index],
                                                                                                 month_scale, 
                                                                                                 lats_array[lat_index],
                                                                                                 valid_min, 
                                                                                                 valid_max,
                                                                                                 data_start_year,
                                                                                                 data_end_year,
                                                                                                 calibration_start_year, 
                                                                                                 calibration_end_year)
                                    
                            else:
                                raise ValueError('Invalid distribution specified: {}'.format(distribution))
                        else:
                            raise ValueError('Invalid indicator specified: {}'.format(indicator))
Esempio n. 9
0
                                                                                                                           calibration_end_year)
            
                                # perform the SPEI computation (fit to the Gamma distribution) and assign the values into the dataset
                                spei_gamma_dataset.variables[variable_name_spei_gamma][:, x, y] = indices.spei_gamma(precip_data,
                                                                                                                     temp_data,
                                                                                                                     data_start_date.year,
                                                                                                                     latitude,
                                                                                                                     month_scale, 
                                                                                                                     valid_min, 
                                                                                                                     valid_max)
                                
                                # perform the SPI computation (fit to the Pearson distribution) and assign the values into the dataset
                                spi_pearson_dataset[variable_name_spi_pearson][:, x, y] = indices.spi_pearson(precip_data, 
                                                                                                              month_scale, 
                                                                                                              valid_min, 
                                                                                                              valid_max, 
                                                                                                              data_start_date.year, 
                                                                                                              data_end_date.year, 
                                                                                                              calibration_start_year, 
                                                                                                              calibration_end_year)

                                # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset
                                spi_gamma_dataset.variables[variable_name_spi_gamma][:, x, y] = indices.spi_gamma(precip_data,
                                                                                                                  month_scale, 
                                                                                                                  valid_min, 
                                                                                                                  valid_max)
            
    except Exception, e:
        logger.error('Failed to complete', exc_info=True)
        raise
Esempio n. 10
0
                                        
                    # slice out the period of record for the x/y point
                    precip_data = precip_dataset.variables[precip_var_name][:, x, y]
                                           
                    # only process non-empty grid cells, i.e. data array contains at least some non-NaN values
                    if (isinstance(precip_data, np.ma.MaskedArray)) and precip_data.mask.all():
                        
                            continue
                    
                    else:  # we have some valid values to work with
                        
                        logger.info('Processing x/y {}/{}'.format(x, y))

                        # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset
                        data = indices.spi_gamma(precip_data, 
                                                 month_scale, 
                                                 valid_min, 
                                                 valid_max)
                        output_dataset.variables[variable_name][:, x, y] = data
#                         output_dataset.variables[variable_name][:, x, y] = indices.spi_gamma(precip_data, 
#                                                                                              month_scale, 
#                                                                                              valid_min, 
#                                                                                              valid_max)
            
        # report on the elapsed time
        end_datetime = datetime.now()
        logger.info("End time: {}".format(end_datetime, '%x'))
        elapsed = end_datetime - start_datetime
        logger.info("Elapsed time: {}".format(elapsed, '%x'))

    except Exception, e:
        logger.error('Failed to complete', exc_info=True)