Exemple #1
0
def main(start_date, end_date, gridding_method, grid_name, data_path):

    # 1. Define a grid
    # (a) by giving lower-left and upper-right corner
    grid = omi.Grid(
        llcrnrlat=19.6, urcrnrlat=25.6,
        llcrnrlon=108.8, urcrnrlon=117.6, resolution=0.01
    )
    # (b) or by reading this data from a JSON file
    #    (the default file can be found in omi/data/gridds.json)
    grid = omi.Grid.by_name(grid_name)

    # 2. Define parameter for PSM
    #    - gamma (smoothing parameter)
    #    - rho_est (typical maximum value of distribution)
    rho_est = 4e16
    if gridding_method == 'psm':
        # gamma is computed as function of pixel overlap
        gamma = omi.compute_smoothing_parameter(1.0, 10.0)

    # 3. Define a mapping which maps a key to the path in the
    #    HDF file. The function
    #    >>> omi.he5.create_name2dataset(path, list_of_dataset_names, dict)
    #    can be helpful (see above).
    name2datasets = [NAME2DATASET_NO2, NAME2DATASET_PIXEL]

    # 4a) data in OMI files can be read by
    # >>> data = omi.he5.read_datasets(filename, name2dataset)

    # 4b) or by iterating over orbits from start to end date at the following
    #   location: 
    #       os.path.join(data_path, product, 'level2', year, doy, '*.he5')
    #
    #   (see omi.he5 module for details)
    products = ['OMNO2.003', 'OMPIXCOR.003']
    for timestamp, orbit, data in omi.he5.iter_orbits(
            start_date, end_date, products, name2datasets, data_path
        ):

        # 5) Check for missing corner coordinates, i.e. the zoom product,
        #    which is currently not supported
        if (data['TiledCornerLongitude'].mask.any() or
            data['TiledCornerLatitude'].mask.any()
        ):
            continue

        # 6) Clip orbit to grid domain
        lon = data['FoV75CornerLongitude']
        lat = data['FoV75CornerLatitude']
        data = omi.clip_orbit(grid, lon, lat, data, boundary=(2,2))

        if data['ColumnAmountNO2Trop'].size == 0:
            continue

        # 7) Use a self-written function to preprocess the OMI data and
        #    to create the following arrays MxN:
        #    - measurement values 
        #    - measurement errors (currently only CVM grids errors)
        #    - estimate of stddev (used in PSM)
        #    - weight of each measurement
        #    (see the function preprocessing for an example)
        values, errors, stddev, weights = preprocessing(gridding_method, **data)
        missing_values = values.mask.copy()

        if np.all(values.mask):
            continue


        # 8) Grid orbit using PSM or CVM:
        print 'time: %s, orbit: %d' % (timestamp, orbit)
        if gridding_method == 'psm':
            grid = omi.psm_grid(grid,
                data['Longitude'], data['Latitude'],
                data['TiledCornerLongitude'], data['TiledCornerLatitude'],
                values, errors, stddev, weights, missing_values,
                data['SpacecraftLongitude'], data['SpacecraftLatitude'],
                data['SpacecraftAltitude'],
                gamma[data['ColumnIndices']],
                rho_est
            )
        else:
            grid = omi.cvm_grid(grid, data['FoV75CornerLongitude'], data['FoV75CornerLatitude'],
            values, errors, weights, missing_values)


    # 9) The distribution of values and errors has to be normalised
    #    with the weight.
    grid.norm()

    # 10) The Level 3 product can be saved as HDF5 file
    #     or converted to an image (requires matplotlib and basemap)
    grid.save_as_he5('test_%s.he5' % gridding_method)
    grid.save_as_image('test_%s.png' % gridding_method, vmin=0, vmax=rho_est)

    # 11) It is possible to set values, errors and weights to zero.
    grid.zero()
def grid_orbit(data,
               grid_info,
               gridded_quantity,
               gridding_method='psm',
               preproc_method='generic',
               verbosity=0):
    # Input checking
    if not isinstance(data, dict):
        raise TypeError('data must be a dict')
    elif gridded_quantity not in data.keys():
        raise KeyError(
            'data does not have a key matching the gridded_quantity "{}"'.
            format(gridded_quantity))
    else:
        missing_keys = [
            k for k in behr_datasets(gridding_method) if k not in data.keys()
        ]
        if len(missing_keys) > 0:
            raise KeyError(
                'data is missing the following expected keys: {0}'.format(
                    ', '.join(missing_keys)))

    grid = make_grid(grid_info)
    wgrid = make_grid(grid_info)

    # 5) Check for missing corner coordinates, i.e. the zoom product,
    #    which is currently not supported
    if data['TiledCornerLongitude'].mask.any(
    ) or data['TiledCornerLatitude'].mask.any():
        return None, None

    # 6) Clip orbit to grid domain
    lon = data['FoV75CornerLongitude']
    lat = data['FoV75CornerLatitude']
    data = omi.clip_orbit(grid, lon, lat, data, boundary=(2, 2))

    if data['BEHRColumnAmountNO2Trop'].size == 0:
        return None, None

    # Preprocess the BEHR data to create the following arrays MxN:
    #    - measurement values
    #    - measurement errors (used in CVM, not PSM)
    #    - estimate of stddev (used in PSM)
    #    - weight of each measurement
    if verbosity > 1:
        print('    Doing {} preprocessing for {}'.format(
            preproc_method, gridded_quantity))
    if preproc_method.lower() == 'behr':
        values, errors, stddev, weights = behr_preprocessing(
            gridding_method, **data)
    elif preproc_method.lower() == 'sp':
        values, errors, stddev, weights = sp_preprocessing(
            gridding_method, **data)
    elif preproc_method.lower() == 'generic':
        # I'm copying the values before passing them because I intend this to be able to called multiple times to grid
        # different fields, and I don't want to risk values in data being altered.
        values, errors, stddev, weights = generic_preprocessing(
            gridding_method, gridded_quantity, data[gridded_quantity].copy(),
            **data)
    else:
        raise NotImplementedError(
            'No preprocessing option for column_product={}'.format(
                gridded_quantity))

    missing_values = values.mask.copy()

    if np.all(values.mask):
        return None, None  # two outputs expected, causes a "None object not iterable" error if only one given

    new_weight = weights  # JLL 9 Aug 2017 - Seems unnecessary now

    if verbosity > 1:
        print('    Gridding {}'.format(gridded_quantity))

    is_flag_field = gridded_quantity in flag_fields

    if gridding_method == 'psm':
        gamma = omi.compute_smoothing_parameter(40.0, 40.0)
        rho_est = np.max(new_weight) * 1.2
        if verbosity > 1:
            print('    Gridding weights')

        try:
            wgrid = omi.psm_grid(wgrid, data['Longitude'], data['Latitude'],
                                 data['TiledCornerLongitude'],
                                 data['TiledCornerLatitude'], new_weight,
                                 errors, new_weight * 0.9, weights,
                                 missing_values, data['SpacecraftLongitude'],
                                 data['SpacecraftLatitude'],
                                 data['SpacecraftAltitude'],
                                 gamma[data['ColumnIndices']], rho_est)
            # The 90% of new_weight = std. dev. is a best guess comparing uncertainty
            # over land and sea
        except QhullError as err:
            print("Cannot interpolate, QhullError: {0}".format(err.args[0]))
            return None, None

        rho_est = np.max(values) * 1.2
        gamma = omi.compute_smoothing_parameter(1.0, 10.0)
        try:
            grid = omi.psm_grid(grid, data['Longitude'], data['Latitude'],
                                data['TiledCornerLongitude'],
                                data['TiledCornerLatitude'], values, errors,
                                stddev, weights, missing_values,
                                data['SpacecraftLongitude'],
                                data['SpacecraftLatitude'],
                                data['SpacecraftAltitude'],
                                gamma[data['ColumnIndices']], rho_est)
        except QhullError as err:
            print("Cannot interpolate, QhullError: {0}".format(err.args[0]))
            return None, None

        grid.norm(
        )  # divides by the weights (at this point, the values in the grid are multiplied by the weights)
        # Replace by new weights in a bit
        wgrid.norm()  # in the new version, wgrid is also normalized.

        # Clip the weights so that only positive weights are allowed. Converting things back to np.array removes any
        # masking
        wgrid.values = np.clip(np.array(wgrid.values), 0.01,
                               np.max(np.array(wgrid.values)))

        wgrid_values = np.array(wgrid.values)
        grid_values = np.array(grid.values)
    elif gridding_method == 'cvm':
        try:
            grid = omi.cvm_grid(grid,
                                data['FoV75CornerLongitude'],
                                data['FoV75CornerLatitude'],
                                values,
                                errors,
                                weights,
                                missing_values,
                                is_flag=is_flag_field)
        except QhullError as err:
            print("Cannot interpolate, QhullError: {0}".format(err.args[0]))
            return None, None

        if not is_flag_field:
            wgrid_values = grid.weights
            grid.norm()
        else:
            wgrid_values = np.ones_like(grid.values)
        grid_values = grid.values
    else:
        raise NotImplementedError(
            'gridding method {0} not understood'.format(gridding_method))

    # At this point, grid.values is a numpy array, not a masked array. Using nan_to_num should also automatically set
    # the value to 0, but that doesn't guarantee that the same values in weights will be made 0. So we manually multiply
    # the weights by 0 for any invalid values in the NO2 grid. This prevents reducing the final column when we divide by
    # the total weights outside this function.
    good_values = ~np.ma.masked_invalid(grid_values).mask
    grid_values = np.nan_to_num(grid_values)
    wgrid_values = np.nan_to_num(wgrid_values) * good_values
    return grid_values, wgrid_values
Exemple #3
0
def main(start_date):
    #print start_date, type(start_date)
    grid_name = 'europe'
    try:
        gridding_method = 'psm'
        start_date_str = start_date.strftime("%Y-%m-%d")
        year = int(start_date_str[0:4])
        month = int(start_date_str[5:7])
        day = int(start_date_str[8:10])
        grid = omi.Grid(llcrnrlat=30.0,
                        urcrnrlat=46.0,
                        llcrnrlon=124.0,
                        urcrnrlon=146.0,
                        resolution=0.004)
        grid_name = 'Japan'  #7500*12500

        if day == lenghtmonth(year, month):
            day2 = 1
            month2 = month + 1
            year2 = year
            if month == 12:
                if day == 31:
                    year2 = year + 1
                    month2 = 1
        else:
            day2 = day + 1
            month2 = month
            year2 = year

        end_date = datetime(year2, month2, day2)

        name = '/usr/users/annette.schuett/Masterarbeit/omi-master/Japan/%s_%s_%s_%s_%s.he5' % (
            grid_name, str(start_date)[8:10], str(start_date)[5:7],
            str(start_date)[0:4], gridding_method)
        #print name
        n = 0

        if os.path.isfile(name) == True:
            if os.stat(name).st_size > 10e5:
                status = 'File_Exist'
                #print name
                #print status
                n = n + 1
            else:
                status = 'Do_again'
                #print status, ", the following file is not existing: ", name
        else:
            status = 'Do_again'

        if status == 'Do_again':
            print status, name

            data_path = '/home/zoidberg/OMI'
            datapath = data_path

            start_date_str = start_date.strftime("%Y-%m-%d")
            year = int(start_date_str[0:4])
            month = int(start_date_str[5:7])
            day = int(start_date_str[8:10])

            if day == lenghtmonth(year, month):
                day2 = 1
                month2 = month + 1
                year2 = year
                if month == 12:
                    if day == 31:
                        year2 = year + 1
                        month2 = 1
            else:
                day2 = day + 1
                month2 = month
                year2 = year

            end_date = datetime(year2, month2, day2)

            # 1. Define a grid
            # (a) by giving lower-left and upper-right corner

            #grid = omi.Grid(llcrnrlat=-60.0, urcrnrlat=20.0, llcrnrlon=-83.0, urcrnrlon=-32.0, resolution=0.1); grid_name = 'South_America'

            # (b) or by reading this data from a JSON file
            #    (the default file can be found in omi/data/gridds.json)
            #grid = omi.Grid.by_name(grid_name)

            # 2. Define parameter for PSM
            #    - gamma (smoothing parameter)
            #    - rho_est (typical maximum value of distribution)
            rho_est = 4e15  # !!!!!!!!!!!!! Normalerweise: 4e16
            gridding_method = 'psm'
            # gamma is computed as function of pixel overlap
            gamma = omi.compute_smoothing_parameter(1.0, 10.0)

            # 3. Define a mapping which maps a key to the path in the
            #    HDF file. The function
            #    >>> omi.he5.create_name2dataset(path, list_of_dataset_names, dict)
            #    can be helpful (see above).
            name2datasets = [NAME2DATASET_NO2, NAME2DATASET_PIXEL]

            # 4a) data in OMI files can be read by
            # >>> data = omi.he5.read_datasets(filename, name2dataset)

            # 4b) or by iterating over orbits from start to end date at the following
            #   location:
            #       os.path.join(data_path, product, 'level2', year, doy, '*.he5')
            #
            #   (see omi.he5 module for details)
            products = ['OMNO2.003', 'OMPIXCOR.003']
            for timestamp, orbit, data in omi.he5.iter_orbits(
                    start_date, end_date, products, name2datasets, data_path):

                # 5) Check for missing corner coordinates, i.e. the zoom product,
                #    which is currently not supported
                if (data['TiledCornerLongitude'].mask.any()
                        or data['TiledCornerLatitude'].mask.any()):
                    continue

                # 6) Clip orbit to grid domain
                lon = data['FoV75CornerLongitude']
                lat = data['FoV75CornerLatitude']
                data = omi.clip_orbit(grid, lon, lat, data, boundary=(2, 2))

                if data['ColumnAmountNO2Trop'].size == 0:
                    continue

                # 7) Use a self-written function to preprocess the OMI data and
                #    to create the following arrays MxN:
                #    - measurement values
                #    - measurement errors (currently only CVM grids errors)
                #    - estimate of stddev (used in PSM)
                #    - weight of each measurement
                #    (see the function preprocessing for an example)
                values, errors, stddev, weights = preprocessing(
                    gridding_method, **data)
                missing_values = values.mask.copy()

                if np.all(values.mask):
                    continue

                # 8) Grid orbit using PSM or CVM:
                print 'time: %s, orbit: %d' % (timestamp, orbit)

                grid = omi.psm_grid(grid, data['Longitude'], data['Latitude'],
                                    data['TiledCornerLongitude'],
                                    data['TiledCornerLatitude'], values,
                                    errors, stddev, weights, missing_values,
                                    data['SpacecraftLongitude'],
                                    data['SpacecraftLatitude'],
                                    data['SpacecraftAltitude'],
                                    gamma[data['ColumnIndices']], rho_est)

            # 9) The distribution of values and errors has to be normalised
            #    with the weight.
            grid.norm()

            # 10) The Level 3 product can be saved as HDF5 file
            #     or converted to an image (requires matplotlib and basemap
            grid.save_as_he5(
                '/usr/users/annette.schuett/Masterarbeit/omi-master/Japan/%s_%s_%s_%s_%s.he5'
                % (grid_name, str(start_date)[8:10], str(start_date)[5:7],
                   str(start_date)[0:4], gridding_method))
            #grid.save_as_image('/home/zoidberg/OMI/Germay/%s_%s_%s_%s_%s.png' % (grid_name, str(start_date)[8:10],str(start_date)[5:7],  str(start_date)[0:4], gridding_method), vmin=0, vmax=rho_est)
            #grid.save_as_image('%s_%s_%s_%s_%s.he5' % ( str(start_date)[8:10],  str(start_date)[5:7],  str(start_date)[0:4], grid_name, gridding_method), vmin=0, vmax=rho_est)

            # 11) It is possible to set values, errors and weights to zero.
            grid.zero()
    except:
        print "Oh je, da funktioniert der Algorithmus noch nicht: Noch Mal anschauen: Monat", month, "den", day, ".ten Tag"
Exemple #4
0
def main(start_date, end_date, grid_name, data_path, save_path):

    # 1. Define a grid

    #grid = omi.Grid(llcrnrlat=40.0, urcrnrlat=55.0,llcrnrlon=-5.0, urcrnrlon=20.0, resolution=0.002); grid_name = 'Germany'#7500*12500
    #grid = omi.Grid.by_name(grid_name)
    #grid = gridname2grid(grid_name)

    grid = gridname2grid(grid_name)
    gridcoll = gridname2grid(grid_name)
    wgrid = gridname2grid(grid_name)

    grid.values = grid.values * 0.0
    wgrid.values = wgrid.values * 0.0
    gridcoll.values = gridcoll.values * 0.0
    gridcoll.weights = gridcoll.weights * 0.0

    filename = generate_filename(save_path, start_date, grid_name)

    fname = '%s.he5' % (filename)

    if os.path.isfile(fname) == True:
        print('Existing file:         ', fname)

    else:

        try:

            # 2. Define parameter for PSM
            #    - gamma (smoothing parameter)
            #    - rho_est (typical maximum value of distribution)
            rho_est = 4e16

            # gamma is computed as function of pixel overlap
            gamma = omi.compute_smoothing_parameter(1.0, 10.0)

            # 3. Define a mapping which maps a key to the path in the
            #    HDF file. The function
            #    >>> omi.he5.create_name2dataset(path, list_of_dataset_names, dict)
            #    can be helpful (see above).
            name2datasets = [NAME2DATASET_NO2, NAME2DATASET_PIXEL]

            # 4a) data in OMI files can be read by
            # >>> data = omi.he5.read_datasets(filename, name2dataset)

            # 4b) or by iterating over orbits from start to end date at the following
            #   location:
            #       os.path.join(data_path, product, 'level2', year, doy, '*.he5')
            #
            #   (see omi.he5 module for details)
            #products = ['OMNO2.003', 'OMPIXCOR.003']
            products = ['BEHR-PSM', 'BEHR-PSM']
            pdb.set_trace()
            for timestamp, orbit, data in omi.he5.iter_orbits(
                    start_date, end_date, products, name2datasets, data_path):
                print('time: %s, orbit: %d' % (timestamp, orbit))
                grid = gridname2grid(grid_name)
                wgrid = gridname2grid(grid_name)

                #print '1'

                # 5) Check for missing corner coordinates, i.e. the zoom product,
                #    which is currently not supported
                if (data['TiledCornerLongitude'].mask.any()
                        or data['TiledCornerLatitude'].mask.any()):
                    continue

                # 6) Clip orbit to grid domain
                lon = data['FoV75CornerLongitude']
                lat = data['FoV75CornerLatitude']
                data = omi.clip_orbit(grid, lon, lat, data, boundary=(2, 2))

                if data['ColumnAmountNO2Trop'].size == 0:
                    continue

                #print '2'

                # 7) Use a self-written function to preprocess the OMI data and
                #    to create the following arrays MxN:
                #    - measurement values
                #    - measurement errors (currently only CVM grids errors)
                #    - estimate of stddev (used in PSM)
                #    - weight of each measurement
                #    (see the function preprocessing for an example)
                values, errors, stddev, weights = preprocessing(**data)
                missing_values = values.mask.copy()

                if np.all(values.mask):
                    continue

                #new_weight = 1/np.sqrt(np.abs((errors/1e15) * (1+2*data['CloudRadianceFraction']**2)))#**(1.0/2.0)
                new_weight = weights / np.sqrt((np.abs(
                    (errors / 1e15) *
                    (1 + 2 * data['CloudRadianceFraction']**2))))  #**(1.0/2.0)

                #print 'time: %s, orbit: %d' % (timestamp, orbit)
                #print '-----------------------------'

                rho_est = 4e16
                gamma = omi.compute_smoothing_parameter(1.0, 10.0)
                grid = omi.psm_grid(grid, data['Longitude'], data['Latitude'],
                                    data['TiledCornerLongitude'],
                                    data['TiledCornerLatitude'], values,
                                    errors, stddev, weights, missing_values,
                                    data['SpacecraftLongitude'],
                                    data['SpacecraftLatitude'],
                                    data['SpacecraftAltitude'],
                                    gamma[data['ColumnIndices']], rho_est)

                #print '3'
                gamma = omi.compute_smoothing_parameter(40.0, 40.0)
                rho_est = 4
                wgrid = omi.psm_grid(
                    wgrid, data['Longitude'], data['Latitude'],
                    data['TiledCornerLongitude'], data['TiledCornerLatitude'],
                    new_weight, errors, new_weight * 0.9, weights,
                    missing_values, data['SpacecraftLongitude'],
                    data['SpacecraftLatitude'], data['SpacecraftAltitude'],
                    gamma[data['ColumnIndices']], rho_est)
                # The 90% of new_weight = std. dev. is a best guess comparing uncertainty
                # over land and sea
                #print '4'

                grid.norm(
                )  # divides by the weights (at this point, the values in the grid are multiplied by the weights)
                # Replace by the new weights later
                #wgrid.norm() # if you normalize wgrid the data is not as smooth as it could be
                wgrid.values = np.nan_to_num(np.array(wgrid.values))
                grid.values = np.nan_to_num(np.array(grid.values))

                #grid.values = grid.values/grid.weights
                #wgrid.values = wgrid.values/wgrid.weights

                #print 'counter = ', counter, ':', np.max(gridcoll.values), np.max(grid.values), np.max(wgrid.values)
                gridcoll.values += np.nan_to_num(grid.values) * np.nan_to_num(
                    wgrid.values)
                gridcoll.weights += wgrid.values
                grid.zero()
                wgrid.zero()

            # 9) The distribution of values and errors has to be normalised
            #    with the weight.
            gridcoll.norm()
            #grid.norm()

            # 10) The Level 3 product can be saved as HDF5 file
            #     or converted to an image (requires matplotlib and basemap

            rho_est = 4e16
            gridcoll.save_as_he5('%s.he5' % (filename))
            #gridcoll.save_as_image('%s.png' % (filename), vmin=0, vmax=rho_est)

        except:
            print('No datas available at following day:', start_date)

    #grid.save_as_he5('%s_%s_%s_%s_%s.he5' % (grid_name, str(start_date)[8:10],  str(start_date)[5:7],  str(start_date)[0:4]))
    #grid.save_as_image('%s_%s_%s_%s_%s.png' % (grid_name, str(start_date)[8:10],str(start_date)[5:7],  str(start_date)[0:4]), vmin=0, vmax=rho_est)
    #grid.save_as_image('%s_%s_%s_%s_%s.he5' % ( str(start_date)[8:10],  str(start_date)[5:7],  str(start_date)[0:4], grid_name, gridding_method), vmin=0, vmax=rho_est)

    # 11) It is possible to set values, errors and weights to zero.
    grid.zero()
def main(start_date, end_date, grid_name, data_path, save_path, x_convolution_number, y_convolution_number, x_convolution_mid, y_convolution_mid):
    
    # 1. Define a grid
    
    grid = gridname2grid(grid_name)
    gridcoll = gridname2grid(grid_name)
    wgrid = gridname2grid(grid_name)
    
    grid.values = grid.values * 0.0
    wgrid.values = wgrid.values * 0.0
    gridcoll.values = gridcoll.values * 0.0
    
    
    
    filename  = generate_filename(save_path, start_date,grid_name)
    
    
    #fname = '%s.he5' % (filename)
    fname = '%s.he5' % (filename)
    if os.path.isfile(fname) == True:
        print 'Existing file:         ',  fname
    
    
    else:
        #print x_convolution_number, y_convolution_number, x_convolution_mid, y_convolution_mid
        
        try:
            print filename
            # 2. Define parameter for PSM
            #    - gamma (smoothing parameter)
            #    - rho_est (typical maximum value of distribution)
            rho_est = 4e16
            
            # gamma is computed as function of pixel overlap
            gamma = omi.compute_smoothing_parameter(1.0, 10.0)
            
            # 3. Define a mapping which maps a key to the path in the
            #    HDF file. The function
            #    >>> omi.he5.create_name2dataset(path, list_of_dataset_names, dict)
            #    can be helpful (see above).
            name2datasets = [NAME2DATASET_NO2, NAME2DATASET_PIXEL]
            
            # 4a) data in OMI files can be read by
            # >>> data = omi.he5.read_datasets(filename, name2dataset)
            
            
            # 4b) or by iterating over orbits from start to end date at the following
            #   location: 
            #       os.path.join(data_path, product, 'level2', year, doy, '*.he5')
            #
            #   (see omi.he5 module for details)
            
            products = ['OMNO2.003', 'OMPIXCOR.003']
            for timestamp, orbit, data in omi.he5.iter_orbits(
                    start_date, end_date, products, name2datasets, data_path
                ):
                print 'time: %s, orbit: %d' % (timestamp, orbit)
                grid = gridname2grid(grid_name)
                wgrid = gridname2grid(grid_name)
                
                
                
                # 5) Check for missing corner coordinates, i.e. the zoom product,
                #    which is currently not supported
                if (data['TiledCornerLongitude'].mask.any() or
                    data['TiledCornerLatitude'].mask.any()
                ):
                    continue
                
                
                # 6) Clip orbit to grid domain
                lon = data['FoV75CornerLongitude']
                lat = data['FoV75CornerLatitude']
                data = omi.clip_orbit(grid, lon, lat, data, boundary=(2,2))
                
                
                if data['ColumnAmountNO2Trop'].size == 0:
                    continue
                
                
                
                
                values, errors, stddev, weights, cf = preprocessing(**data)
                missing_values = values.mask.copy()
                
                #np.save('values.npy',values)
                values.dump('values.npy')
                weights.dump('weights.npy')

                # JLL 9 Aug 2017: for the Gaussian smoothing, it's necessary that all values being smoothed are valid.
                # From Annette: use a value that is typical of the area in question. So I will probably look at the
                # background average for the US to find an appropriate value here.
                #
                # This matters because the unmasked data is passed to the convolution, redo_Convolution(A.data, ...)
                # Afterwards the previously masked values are remasked, so these values should only be used in the
                # Gaussian smoothing.
                values.data[values.data<-1e29]=1e15
                values.data[values.data==np.nan] = 1e15
                
                valuesmask =  values.mask
                
                
                meanvalue = np.nanmean(values)
                
                #print 'mask', np.shape(valuesmask)
                if np.all(values.mask):
                    continue
                
                
                b = np.where(values >4*np.std(values))
                try:
                    # JLL 08 Aug 2017: For each value that is above the threshold, find the 3x3 grid of values around it
                    #
                    for i in range(len(b[0])):
                        #print i
                        #print b[0][i], b[1][i], values[b[0][i]][b[1][i]]
                        m = b[0][i]
                        n = b[1][i]
                        B = values[m-1:m+2,n-1:n+2]
                        #print B ,m,n
                        B0 = B*1.0 # JLL 08 Aug 2017: I'm assuming this is to make B0 independent of B?
                        B0[1][1] = np.nan
                        #print B[1][1]/np.nanmean(B[0])
                        if B[1][1]/np.nanmean(B[0])>= 30:
                            pdb.set_trace()
                            A = values[m-8:m+9,n-1:n+2]
                            amean = np.nanmean(A)
                            replace = redo_Convolution(A.data, x_convolution_number, y_convolution_number, x_convolution_mid, y_convolution_mid)
                            replace2 = replace*(amean/np.nanmean(replace))
                            values[m-8:m+9,n-1:n+2] = replace2
                except:
                    print 'no std to high'
                
                values = ma.array(values, mask = valuesmask)
                new_weight = weights


                #mask0 = valuesmask
                #mask0 |= values.data ==np.nan
                #values = ma.array(values, mask = valuesmask*mask0)
                #meanconvalues = np.nanmean(values)
                #values = ma.array(values, mask = valuesmask*mask0)*(meanvalue/meanconvalues)
                #print np.shape(values), type(values)
                #values.dump('values3.npy')
                
                
                #print 'mean', np.nanmean(values), meanvalue
                
                
                #print np.max(weights), np.min(weights), np.max(values), np.min(values)
                
                print 'time: %s, orbit: %d' % (timestamp, orbit)
                
                gamma = omi.compute_smoothing_parameter(40.0, 40.0)
                #rho_est = 0.01
                rho_est = np.max(new_weight)*1.2
                wgrid = omi.psm_grid(wgrid,
                    data['Longitude'], data['Latitude'],
                    data['TiledCornerLongitude'], data['TiledCornerLatitude'],
                    new_weight, errors,new_weight*0.9, weights, missing_values,
                    data['SpacecraftLongitude'], data['SpacecraftLatitude'],
                    data['SpacecraftAltitude'],
                    gamma[data['ColumnIndices']],
                    rho_est
                )
                
                print 'wgrid vorbei'
                
                #rho_est = 4e16
                rho_est = np.max(values)*1.2
                gamma = omi.compute_smoothing_parameter(1.0, 10.0)
                grid = omi.psm_grid(grid,
                    data['Longitude'], data['Latitude'],
                    data['TiledCornerLongitude'], data['TiledCornerLatitude'],
                    values, errors, stddev, weights, missing_values,
                    data['SpacecraftLongitude'], data['SpacecraftLatitude'],
                    data['SpacecraftAltitude'],
                    gamma[data['ColumnIndices']],
                    rho_est
                )
                
                print 'grid vorbei'
                grid.norm()
                wgrid.norm()
                
                
                
                wgrid.values = np.clip(np.array(wgrid.values), 0.01, np.max(np.array(wgrid.values)))
                
                pdb.set_trace()
                mask = ~np.ma.masked_invalid(grid.values).mask
                gridcoll.values += np.nan_to_num(grid.values)*np.nan_to_num(wgrid.values)*mask
                gridcoll.weights += np.nan_to_num(wgrid.values)*mask
                
                grid.zero()
                wgrid.zero()
                print 'gridzero, wgridzero'
                
                        
                        
            


            # 9) The distribution of values and errors has to be normalised
            #    with the weight.
            print 'doof'
            gridcoll.norm()

            print 'doof2'

            print filename
            gridcoll.save_as_he5('%s_clip_end.he5' % (filename))
            #gridcoll.save_as_image('%s.png' % (filename), vmin=0, vmax=rho_est)
            print 'geschafft'
            gridcoll.zero()

        except:
            print 'No datas available at following day:', start_date
Exemple #6
0
def main(start_date, end_date, gridding_method, grid_name, data_path):

    # 1. Define a grid
    # (a) by giving lower-left and upper-right corner
    #grid_name = "NewAsia"
    #grid = omi.Grid(llcrnrlat=40.0, urcrnrlat=55.0,llcrnrlon=-5.0, urcrnrlon=20.0, resolution=0.002); grid_name = 'Germany'#7500*12500
    #grid = omi.Grid(llcrnrlat= 17.8 , urcrnrlat=53.6 ,llcrnrlon=96.9 , urcrnrlon= 106.8, resolution=0.01); #grid_name = 'Northamerica'#6000*4000
    grid = omi.Grid(llcrnrlat=25,
                    urcrnrlat=50.05,
                    llcrnrlon=-125,
                    urcrnrlon=-64.95,
                    resolution=0.05)
    #grid_name = 'Northamerica'#6000*4000
    # (b) or by reading this data from a JSON file
    #    (the default file can be found in omi/data/gridds.json)
    #grid = omi.Grid.by_name(grid_name)

    # 2. Define parameter for PSM
    #    - gamma (smoothing parameter)
    #    - rho_est (typical maximum value of distribution)
    rho_est = 4e16
    if gridding_method == 'psm':
        # gamma is computed as function of pixel overlap
        gamma = omi.compute_smoothing_parameter(1.0, 10.0)

    # 3. Define a mapping which maps a key to the path in the
    #    HDF file. The function
    #    >>> omi.he5.create_name2dataset(path, list_of_dataset_names, dict)
    #    can be helpful (see above).
    name2datasets = [NAME2DATASET_NO2, NAME2DATASET_PIXEL]

    # 4a) data in OMI files can be read by
    # >>> data = omi.he5.read_datasets(filename, name2dataset)

    # 4b) or by iterating over orbits from start to end date at the following
    #   location:
    #       os.path.join(data_path, product, 'level2', year, doy, '*.he5')
    #
    #   (see omi.he5 module for details)
    products = ['OMNO2.003', 'OMPIXCOR.003']  #part of the path
    for timestamp, orbit, data in omi.he5.iter_orbits(start_date, end_date,
                                                      products, name2datasets,
                                                      data_path):

        # debugging check: this loop is occasionally getting what I consider night time swaths that just barely cross
        # the top of the domain. I deliberately remove those, even though they may be illuminated
        print 'timestamp =', timestamp
        if timestamp.hour < 15:
            continue

        # 5) Check for missing corner coordinates, i.e. the zoom product,
        #    which is currently not supported
        if (data['TiledCornerLongitude'].mask.any()
                or data['TiledCornerLatitude'].mask.any()):
            continue

        # 6) Clip orbit to grid domain
        lon = data['FoV75CornerLongitude']
        lat = data['FoV75CornerLatitude']
        data = omi.clip_orbit(grid, lon, lat, data, boundary=(2, 2))

        if data['ColumnAmountNO2Trop'].size == 0:
            continue

        # 7) Use a self-written function to preprocess the OMI data and
        #    to create the following arrays MxN:
        #    - measurement values
        #    - measurement errors (currently only CVM grids errors)
        #    - estimate of stddev (used in PSM)
        #    - weight of each measurement
        #    (see the function preprocessing for an example)
        values, errors, stddev, weights = preprocessing(
            gridding_method, **data)
        missing_values = values.mask.copy()

        if np.all(values.mask):
            continue

        # 8) Grid orbit using PSM or CVM:
        print 'time: %s, orbit: %d' % (timestamp, orbit)
        if gridding_method == 'psm':
            grid = omi.psm_grid(grid, data['Longitude'], data['Latitude'],
                                data['TiledCornerLongitude'],
                                data['TiledCornerLatitude'], values, errors,
                                stddev, weights, missing_values,
                                data['SpacecraftLongitude'],
                                data['SpacecraftLatitude'],
                                data['SpacecraftAltitude'],
                                gamma[data['ColumnIndices']], rho_est)
        else:
            grid = omi.cvm_grid(grid, data['FoV75CornerLongitude'],
                                data['FoV75CornerLatitude'], values, errors,
                                weights, missing_values)

    # 9) The distribution of values and errors has to be normalised
    #    with the weight.
    grid.norm()

    # 10) The Level 3 product can be saved as HDF5 file
    #     or converted to an image (requires matplotlib and basemap
    grid.save_as_he5('%s_%s_%s_%s_%s.he5' %
                     (grid_name, str(start_date)[8:10], str(start_date)[5:7],
                      str(start_date)[0:4], gridding_method))
    grid.save_as_image('%s_%s_%s_%s_%s.png' %
                       (grid_name, str(start_date)[8:10], str(start_date)[5:7],
                        str(start_date)[0:4], gridding_method),
                       vmin=0,
                       vmax=1e16)
    #grid.save_as_image('%s_%s_%s_%s_%s.he5' % ( str(start_date)[8:10],  str(start_date)[5:7],  str(start_date)[0:4], grid_name, gridding_method), vmin=0, vmax=rho_est)

    # 11) It is possible to set values, errors and weights to zero.
    grid.zero()