Esempio n. 1
0
def get_points_from_datasets(dataset_list,
                             wgs84_bounds,
                             variable_name,
                             coordinate_wkt,
                             min_points=None,
                             max_points=None):

    all_coordinates = []
    all_values = []
    for dataset, coordinates, values in read_datasets(dataset_list,
                                                      wgs84_bounds,
                                                      variable_name,
                                                      coordinate_wkt,
                                                      min_points, max_points):

        all_coordinates += list(
            transform_coords(
                netcdf_line_utils.xycoords[spatial_selection_indices],
                netcdf_line_utils.wkt, coordinate_wkt))
        all_values += list(values)

    print("Converting lists to arrays")
    all_values = np.array(all_values)
    all_coordinates = np.array(all_coordinates)
    assert all_values.shape[0] == all_coordinates.shape[
        0], 'Mismatched coordinate and value counts'
    print("A total of {} points were read from {} line datasets".format(
        all_values.shape[0], len(dataset_list)))

    return all_coordinates, all_values
Esempio n. 2
0
    def grid_points(self,
                    coordinates,
                    coordinate_wkt,
                    values,
                    grid_wkt,
                    grid_bounds,
                    grid_resolution,
                    resampling_method='linear',
                    point_step=1):
        '''
        Return geotransform CRS WKT, and interpolated grid from supplied coordinates and points
        '''

        # Determine spatial grid bounds rounded out to nearest GRID_RESOLUTION multiple
        pixel_centre_bounds = (
            round((math.floor(grid_bounds[0] / grid_resolution) + 0.5) *
                  grid_resolution, 6),
            round((math.floor(grid_bounds[1] / grid_resolution) + 0.5) *
                  grid_resolution, 6),
            round((math.floor(grid_bounds[2] / grid_resolution) - 0.5) *
                  grid_resolution, 6),
            round((math.floor(grid_bounds[3] / grid_resolution) - 0.5) *
                  grid_resolution, 6))

        print("Reprojecting coordinates")
        grid_coordinates = np.array(
            transform_coords(coordinates, coordinate_wkt, grid_wkt))
        #print('grid_coordinates = {}'.format(grid_coordinates))

        # Create grids of Y and X values. Note YX ordering and inverted Y for image
        # Note GRID_RESOLUTION/2.0 fudge to avoid truncation due to rounding error
        print("Generating grid coordinates")
        grid_y, grid_x = np.mgrid[
            pixel_centre_bounds[3]:pixel_centre_bounds[1] -
            grid_resolution / 2.0:-grid_resolution,
            pixel_centre_bounds[0]:pixel_centre_bounds[2] +
            grid_resolution / 2.0:grid_resolution]

        # Skip points to reduce memory requirements
        print("Generating point subset mask")
        point_subset_mask = np.zeros(shape=values.shape, dtype=bool)
        point_subset_mask[0:-1:point_step] = True

        grid_coordinates = grid_coordinates[point_subset_mask]
        values = values[point_subset_mask]

        # Interpolate required values to the grid - Note yx ordering and inverted y for image
        print("Interpolating {} points".format(grid_coordinates.shape[0]))
        grid_array = griddata(grid_coordinates[:, ::-1],
                              values, (grid_y, grid_x),
                              method=resampling_method)

        print("Interpolation complete")
        #  crs:GeoTransform = "109.1002342895272 0.00833333 0 -9.354948067227777 0 -0.00833333 "
        geotransform = [
            pixel_centre_bounds[0] - grid_resolution / 2.0, grid_resolution, 0,
            pixel_centre_bounds[3] + grid_resolution / 2.0, 0, -grid_resolution
        ]

        return grid_array, grid_wkt, geotransform
Esempio n. 3
0
    def reproject_bounds(self, bounds, from_crs_wkt, to_crs_wkt):
        '''
        Function to return orthogonal bounds reprojected to new CRS
        '''
        if from_crs_wkt == to_crs_wkt:  # No change
            return bounds

        bounding_box = ((bounds[0], bounds[1]), (bounds[2], bounds[1]),
                        (bounds[2], bounds[3]), (bounds[0], bounds[3]))

        reprojected_bounding_box = np.array(
            transform_coords(bounding_box, from_crs_wkt, to_crs_wkt))

        reprojected_bounds = (min(reprojected_bounding_box[:, 0]),
                              min(reprojected_bounding_box[:, 1]),
                              max(reprojected_bounding_box[:, 0]),
                              max(reprojected_bounding_box[:, 1]))

        return reprojected_bounds
def main():
    '''
    Main function
    '''
    def get_xml_text(xml_template_path, metadata_object):
        '''Helper function to perform substitutions on XML template text
        '''
        template_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'templates')
        jinja_environment = Environment(
            loader=FileSystemLoader(template_dir or './'),
            autoescape=select_autoescape(['html', 'xml']
                                         )
                                        )
            
        xml_template = jinja_environment.get_template(xml_template_path, parent=None)
            
        value_dict = dict(metadata_object.metadata_dict['Template']) # Copy template values
        
        # Convert multiple sets of comma-separated lists to lists of strings to a list of dicts
        #TODO: Make this slicker
        value_dict['keywords'] = []
        for keyword_list_key in [key for key in value_dict.keys() if re.match('^KEYWORD_\w+_LIST$', key)]:
            keywords = [keyword.strip() for keyword in value_dict[keyword_list_key].split(',')]
            keyword_code = value_dict[re.sub('_LIST$', '_CODE', keyword_list_key)]
            
            value_dict['keywords'] += [{'value': keyword,
                                        'code': keyword_code
                                        } for keyword in keywords
                                       ]
        
        # Create dict containing distribution info for DOI if required
        value_dict['distributions'] = []
        dataset_doi = metadata_object.get_metadata(['Calculated', 'DOI'])
        if dataset_doi:
            try:
                distribution_dict = {'formatSpecification': 'html',
                                     'distributor_name': metadata_object.get_metadata(['Template', 'ORGANISATION_NAME']),
                                     'distributor_telephone': metadata_object.get_metadata(['Template', 'ORGANISATION_PHONE']),
                                     'distributor_address': metadata_object.get_metadata(['Template', 'ORGANISATION_ADDRESS']),
                                     'distributor_city': metadata_object.get_metadata(['Template', 'ORGANISATION_CITY']),
                                     'distributor_state': metadata_object.get_metadata(['Template', 'ORGANISATION_STATE']),
                                     'distributor_postcode': metadata_object.get_metadata(['Template', 'ORGANISATION_POSTCODE']),
                                     'distributor_country': metadata_object.get_metadata(['Template', 'ORGANISATION_COUNTRY']),
                                     'distributor_email': metadata_object.get_metadata(['Template', 'ORGANISATION_EMAIL']),
                                     'url': dataset_doi,
                                     'protocol': 'WWW:LINK-1.0-http--link',
                                     'name': 'Digital Object Identifier for dataset %s' % metadata_object.get_metadata(['Calculated', 'UUID']),
                                     'description': 'Dataset DOI'
                                     }
                
                for key, value in distribution_dict.iteritems():
                    assert value, '%s has no value defined' % key
                
                value_dict['distributions'].append(distribution_dict)
            except Exception as e:
                print 'WARNING: Unable to create DOI distribution: %s' % e.message
        
        return xml_template.render(**value_dict)
    
    def str2datetimelist(multi_datetime_string):
        '''
        Helper function to convert comma-separated string containing dates to a list of datetimes
        '''
        datetime_format_list = ['%d-%b-%y', 
                                '%Y-%m-%dT%H:%M:%S', 
                                '%Y-%m-%dT%H:%M:%S.%f', 
                                '%Y-%m-%dT%H:%M:%S%z', 
                                '%Y-%m-%dT%H:%M:%S.%f%z'
                                ]
        date_list = []
        for datetime_string in multi_datetime_string.split(','):
            for datetime_format in datetime_format_list:
                try:
                    date_list.append(datetime.strptime(datetime_string.strip(), datetime_format))
                    break
                except:
                    continue
        return date_list

    def str2datelist(multi_date_string):
        '''
        Helper function to convert comma-separated string containing dates to a list of dates
        '''
        return [datetime_value.date() for datetime_value in str2datetimelist(multi_date_string)]
    
# Start of main function
    assert len(
        sys.argv) >= 4 and len(sys.argv) <= 8, 'Usage: %s <json_text_template_path> <xml_template_path> <netcdf_path> [<xml_output_dir>]' % sys.argv[0]
    json_text_template_path = sys.argv[1]
    xml_template_path = sys.argv[2]
    netcdf_path = sys.argv[3]
    if len(sys.argv) >= 5:
        xml_dir = sys.argv[4]
    else:
        xml_dir = '.'
        
    # Optional arguments for DB connection - not required at NCI
    if len(sys.argv) == 8:
        db_user = sys.argv[5]
        db_password = sys.argv[6]
        db_alias = sys.argv[7]
    else:
        db_user = None
        db_password = None
        db_alias = None
        
    xml_path = os.path.abspath(os.path.join(xml_dir, os.path.splitext(os.path.basename(netcdf_path))[0] + '.xml'))
    print xml_dir, xml_path

    metadata_object = Metadata()

    netcdf_metadata = NetCDFMetadata(netcdf_path)
    metadata_object.merge_root_metadata_from_object(netcdf_metadata)

    nc_dataset = netCDF4.Dataset(netcdf_path, 'r+') # Allow for updating of netCDF attributes like uuid
    
    # JetCat and Survey metadata can either take a list of survey IDs as source(s) or a filename from which to parse them
    try:
        survey_ids = nc_dataset.survey_id
        print 'Survey ID "%s" found in netCDF attributes' % survey_ids
        source = [int(value_string.strip()) for value_string in survey_ids.split(',') if value_string.strip()]
    except:
        source = netcdf_path

#    jetcat_metadata = JetCatMetadata(source, jetcat_path=jetcat_path)
#    metadata_object.merge_root_metadata_from_object(jetcat_metadata)

    try:
        survey_metadata = SurveyMetadata(source)
        metadata_object.merge_root_metadata_from_object(survey_metadata)
    except Exception as e:
        print 'Unable to read from Survey API:\n%s\nAttempting direct Oracle DB read' % e.message
        try:
            survey_metadata = ArgusMetadata(db_user, db_password, db_alias, source) # This will fail if we haven't been able to import ArgusMetadata 
            metadata_object.merge_root_metadata('Survey', survey_metadata.metadata_dict, overwrite=True) # Fake Survey metadata from DB query
        except Exception as e:
            print 'Unable to perform direct Oracle DB read: %s' % e.message

    # Add some calculated values to the metadata
    calculated_values = {}
    metadata_object.metadata_dict['Calculated'] = calculated_values
    
    calculated_values['FILENAME'] = os.path.basename(netcdf_path)
    
    try: # Try to treat this as a gridded dataset
        nc_grid_utils = NetCDFGridUtils(nc_dataset)
        print '%s is a gridded dataset' % netcdf_path
    
        #calculated_values['CELLSIZE'] = str((nc_grid_utils.pixel_size[0] + nc_grid_utils.pixel_size[1]) / 2.0)
        calculated_values['CELLSIZE_M'] = str(int(round((nc_grid_utils.nominal_pixel_metres[0] + nc_grid_utils.nominal_pixel_metres[1]) / 20.0) * 10))
        calculated_values['CELLSIZE_DEG'] = str(round((nc_grid_utils.nominal_pixel_degrees[0] + nc_grid_utils.nominal_pixel_degrees[1]) / 2.0, 8))
        
        WGS84_bbox = transform_coords(nc_grid_utils.native_bbox, nc_grid_utils.crs, 'EPSG:4326')

    except: # Try to treat this as a line dataset
        nc_line_utils = NetCDFLineUtils(nc_dataset)
        print '%s is a line dataset' % netcdf_path
        
        WGS84_bbox = transform_coords(nc_line_utils.native_bbox, nc_line_utils.crs, 'EPSG:4326')
        
    WGS84_extents = [min([coordinate[0] for coordinate in WGS84_bbox]),
                     min([coordinate[1] for coordinate in WGS84_bbox]),
                     max([coordinate[0] for coordinate in WGS84_bbox]),
                     max([coordinate[1] for coordinate in WGS84_bbox])
                     ]
        
    calculated_values['WLON'] = str(WGS84_extents[0])
    calculated_values['SLAT'] = str(WGS84_extents[1])
    calculated_values['ELON'] = str(WGS84_extents[2])
    calculated_values['NLAT'] = str(WGS84_extents[3])
    
    try:
        calculated_values['START_DATE'] = min(str2datelist(str(metadata_object.get_metadata(['Survey', 'STARTDATE'])))).isoformat()
    except ValueError:
        calculated_values['START_DATE'] = None   
    
    try:
        calculated_values['END_DATE'] = max(str2datelist(str(metadata_object.get_metadata(['Survey', 'ENDDATE'])))).isoformat()
    except ValueError:
        calculated_values['END_DATE'] = None 
    
    # Find survey year from end date isoformat string
    try:
        calculated_values['YEAR'] = re.match('^(\d{4})-', calculated_values['END_DATE']).group(1)
    except:
        calculated_values['YEAR'] = 'UNKNOWN' 
    
    #history = "Wed Oct 26 14:34:42 2016: GDAL CreateCopy( /local/el8/axi547/tmp/mWA0769_770_772_773.nc, ... )"
    #date_modified = "2016-08-29T10:51:42"
    try:
        try:
            conversion_datetime_string = re.match('^(.+):.*', str(metadata_object.get_metadata(['NetCDF', 'history']))).group(1)
            conversion_datetime_string = datetime.strptime(conversion_datetime_string, '%a %b %d %H:%M:%S %Y').isoformat()
        except:
            conversion_datetime_string = metadata_object.get_metadata(['NetCDF', 'date_modified']) or 'UNKNOWN'
    except:
        conversion_datetime_string = 'UNKNOWN'
        
    calculated_values['CONVERSION_DATETIME'] = conversion_datetime_string
    
    survey_id = metadata_object.get_metadata(['Survey', 'SURVEYID'])
    try:
        dataset_survey_id = str(nc_dataset.survey_id)
        assert (set([int(value_string.strip()) for value_string in dataset_survey_id.split(',') if value_string.strip()]) == 
                set([int(value_string.strip()) for value_string in survey_id.split(',') if value_string.strip()])), 'NetCDF survey ID %s is inconsistent with %s' % (dataset_survey_id, survey_id)
    except:
        nc_dataset.survey_id = str(survey_id)
        nc_dataset.sync()
        print 'Survey ID %s written to netCDF file' % survey_id

    dataset_uuid = metadata_object.get_metadata(['NetCDF', 'uuid'])
    if not dataset_uuid: # Create a new UUID and write it to the netCDF file 
        dataset_uuid = str(uuid.uuid4())
        print dataset_uuid, type(dataset_uuid)
        nc_dataset.uuid = dataset_uuid
        nc_dataset.sync()
        print 'Fresh UUID %s generated and written to netCDF file' % dataset_uuid
        
    calculated_values['UUID'] = str(dataset_uuid)   
    
    dataset_doi = metadata_object.get_metadata(['NetCDF', 'doi'])
    calculated_values['DOI'] = str(dataset_doi)
    
    # Need template info to mint DOI
    template_metadata_object = TemplateMetadata(json_text_template_path, metadata_object)
    
    if not dataset_doi: #TODO: Mint a new DOI and write it to the netCDF file 
        try:
            doi_minter = Minter(DOI_MINTING_MODE)       
            doi_success, ecat_id, new_doi = doi_minter.get_a_doi( 
                                                                ecatid=template_metadata_object.get_metadata(["ECAT_ID"]), 
                                                                author_names=template_metadata_object.list_from_string(template_metadata_object.get_metadata(["DATASET_AUTHOR"])), 
                                                                title=template_metadata_object.get_metadata(["DATASET_TITLE"]),
                                                                resource_type='Dataset', 
                                                                publisher=template_metadata_object.get_metadata(["ORGANISATION_NAME"]), 
                                                                publication_year=datetime.now().year, 
                                                                subjects=template_metadata_object.list_from_string(template_metadata_object.get_metadata(["KEYWORD_THEME_LIST"])), 
                                                                description=template_metadata_object.get_metadata(["LINEAGE_SOURCE"]), 
                                                                record_url=None, # Use default URI format
                                                                output_file_path=None
                                                                )
            
            if doi_success:
                dataset_doi = str(new_doi)
                nc_dataset.doi = dataset_doi
                nc_dataset.sync()
                print 'Fresh DOI %s generated and written to netCDF file' % dataset_uuid
            else:
                print 'WARNING: DOI minting failed with response code %s' % ecat_id
        except Exception as e:
            print 'WARNING: Error minting DOI: %s' % e.message
               
    if dataset_doi:
        calculated_values['DOI'] = dataset_doi
        template_metadata_object.metadata_dict['DOI'] = dataset_doi
    else:
        print 'WARNING: DOI not defined'
        
    #template_class = None
    template_metadata_object = TemplateMetadata(json_text_template_path, metadata_object)
    metadata_object.merge_root_metadata_from_object(template_metadata_object)

    #pprint(metadata_object.metadata_dict)
    
    xml_text = get_xml_text(xml_template_path, metadata_object)
    #print xml_text
    xml_file = open(xml_path, 'w')
    xml_file.write(xml_text)
    xml_file.close()
    print 'XML written to %s' % xml_path
Esempio n. 5
0
def dataset_point_generator(dataset_list,
                            wgs84_bounds,
                            variable_name,
                            coordinate_wkt,
                            flight_lines_only=True,
                            min_points=None,
                            max_points=None):
    '''
    Generator yielding coordinates and values of the specified variable for all points from the supplied dataset list 
    which fall within bounds
    '''
    line_dataset_count = 0
    for dataset in dataset_list:
        line_data = {}
        print('Reading and reprojecting points from line dataset %s'.format(
            dataset))
        try:
            nc_dataset = Dataset(dataset)
            mag_awags_variable = nc_dataset.variables[variable_name]
            netcdf_line_utils = NetCDFLineUtils(nc_dataset)

            reprojected_bounds = netcdf_line_utils.get_reprojected_bounds(
                wgs84_bounds, WGS84_WKT, netcdf_line_utils.wkt)
            #print netcdf_line_utils.__dict__

            if flight_lines_only:
                print('Excluding tie-lines')
                line_numbers = nc_dataset.variables['line'][
                    nc_dataset.variables['flag_linetype'][:] == 2]
                line_mask = np.zeros(
                    shape=nc_dataset.variables[variable_name].shape,
                    dtype=bool)
                for _line_number, single_line_mask in netcdf_line_utils.get_line_masks(
                        line_numbers):
                    line_mask = np.logical_or(line_mask, single_line_mask)
            else:
                line_mask = np.ones(
                    shape=nc_dataset.variables[variable_name].shape,
                    dtype=bool)

            print('Computing spatial mask')
            selection_indices = np.where(
                np.logical_and(
                    netcdf_line_utils.get_spatial_mask(reprojected_bounds),
                    line_mask))[0]
            print('{}/{} points found in bounding box'.format(
                len(selection_indices), len(mag_awags_variable)))

            # Enforce min/max point counts
            if min_points and len(selection_indices) < min_points:
                print('Skipping dataset with < {} points'.format(min_points))
                continue
            if max_points and len(selection_indices) > max_points:
                print('Skipping dataset with > {} points'.format(max_points))
                continue

            coordinates = np.array(
                transform_coords(netcdf_line_utils.xycoords[selection_indices],
                                 netcdf_line_utils.wkt, coordinate_wkt))
            values = mag_awags_variable[selection_indices]

            mask = np.ma.getmask(values)
            if mask is not np.ma.nomask:
                print('Discarding %d invalid values'.format(
                    np.count_nonzero(mask)))
                values = values[~mask].data
                coordinates = coordinates[~mask]
                print("{} valid points were found".format(values.shape[0]))

            line_dataset_count += 1
            yield dataset, coordinates, values

        except Exception as e:
            print('Unable to read line dataset {}: {}'.format(
                dataset, e.message))
        finally:
            del netcdf_line_utils
def main():
    '''
    Main function
    '''
    def get_xml_text(xml_template_path, metadata_object):
        '''Helper function to perform substitutions on XML template text
        '''
        template_dir = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
            'templates')
        jinja_environment = Environment(
            loader=FileSystemLoader(template_dir or './'),
            autoescape=select_autoescape(['html', 'xml']))

        xml_template = jinja_environment.get_template(xml_template_path,
                                                      parent=None)

        value_dict = dict(
            metadata_object.metadata_dict['Template'])  # Copy template values

        # Convert multiple sets of comma-separated lists to lists of strings to a list of dicts
        #TODO: Make this slicker
        value_dict['keywords'] = []
        for keyword_list_key in [
                key for key in value_dict.keys()
                if re.match('^KEYWORD_\w+_LIST$', key)
        ]:
            keywords = [
                keyword.strip()
                for keyword in value_dict[keyword_list_key].split(',')
            ]
            keyword_code = value_dict[re.sub('_LIST$', '_CODE',
                                             keyword_list_key)]

            value_dict['keywords'] += [{
                'value': keyword,
                'code': keyword_code
            } for keyword in keywords]

        # Create dict containing distribution info for DOI if required
        value_dict['distributions'] = []
        dataset_doi = metadata_object.get_metadata(['Calculated', 'DOI'])
        if dataset_doi:
            try:
                distribution_dict = {
                    'formatSpecification':
                    'html',
                    'distributor_name':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_NAME']),
                    'distributor_telephone':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_PHONE']),
                    'distributor_address':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_ADDRESS']),
                    'distributor_city':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_CITY']),
                    'distributor_state':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_STATE']),
                    'distributor_postcode':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_POSTCODE']),
                    'distributor_country':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_COUNTRY']),
                    'distributor_email':
                    metadata_object.get_metadata(
                        ['Template', 'ORGANISATION_EMAIL']),
                    'url':
                    dataset_doi,
                    'protocol':
                    'WWW:LINK-1.0-http--link',
                    'name':
                    'Digital Object Identifier for dataset %s' %
                    metadata_object.get_metadata(['Calculated', 'UUID']),
                    'description':
                    'Dataset DOI'
                }

                for key, value in distribution_dict.iteritems():
                    assert value, '%s has no value defined' % key

                value_dict['distributions'].append(distribution_dict)
            except Exception as e:
                print 'WARNING: Unable to create DOI distribution: %s' % e.message

        return xml_template.render(**value_dict)

    def str2datetimelist(multi_datetime_string):
        '''
        Helper function to convert comma-separated string containing dates to a list of datetimes
        '''
        datetime_format_list = [
            '%d-%b-%y', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f',
            '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S.%f%z'
        ]
        date_list = []
        for datetime_string in multi_datetime_string.split(','):
            for datetime_format in datetime_format_list:
                try:
                    date_list.append(
                        datetime.strptime(datetime_string.strip(),
                                          datetime_format))
                    break
                except:
                    continue
        return date_list

    def str2datelist(multi_date_string):
        '''
        Helper function to convert comma-separated string containing dates to a list of dates
        '''
        return [
            datetime_value.date()
            for datetime_value in str2datetimelist(multi_date_string)
        ]

# Start of main function

    assert len(sys.argv) >= 4 and len(
        sys.argv
    ) <= 8, 'Usage: %s <json_text_template_path> <xml_template_path> <netcdf_path> [<xml_output_dir>]' % sys.argv[
        0]
    json_text_template_path = sys.argv[1]
    xml_template_path = sys.argv[2]
    netcdf_path = sys.argv[3]
    if len(sys.argv) >= 5:
        xml_dir = sys.argv[4]
    else:
        xml_dir = '.'

    # Optional arguments for DB connection - not required at NCI
    if len(sys.argv) == 8:
        db_user = sys.argv[5]
        db_password = sys.argv[6]
        db_alias = sys.argv[7]
    else:
        db_user = None
        db_password = None
        db_alias = None

    xml_path = os.path.abspath(
        os.path.join(
            xml_dir,
            os.path.splitext(os.path.basename(netcdf_path))[0] + '.xml'))
    print xml_dir, xml_path

    metadata_object = Metadata()

    netcdf_metadata = NetCDFMetadata(netcdf_path)
    metadata_object.merge_root_metadata_from_object(netcdf_metadata)

    nc_dataset = netCDF4.Dataset(
        netcdf_path, 'r+')  # Allow for updating of netCDF attributes like uuid

    # JetCat and Survey metadata can either take a list of survey IDs as source(s) or a filename from which to parse them
    try:
        survey_ids = nc_dataset.survey_id
        print 'Survey ID "%s" found in netCDF attributes' % survey_ids
        source = [
            int(value_string.strip()) for value_string in survey_ids.split(',')
            if value_string.strip()
        ]
    except:
        source = netcdf_path


#    jetcat_metadata = JetCatMetadata(source, jetcat_path=jetcat_path)
#    metadata_object.merge_root_metadata_from_object(jetcat_metadata)

    try:
        survey_metadata = SurveyMetadata(source)
        metadata_object.merge_root_metadata_from_object(survey_metadata)
    except Exception as e:
        print 'Unable to read from Survey API:\n%s\nAttempting direct Oracle DB read' % e.message
        try:
            survey_metadata = ArgusMetadata(
                db_user, db_password, db_alias, source
            )  # This will fail if we haven't been able to import ArgusMetadata
            metadata_object.merge_root_metadata(
                'Survey', survey_metadata.metadata_dict,
                overwrite=True)  # Fake Survey metadata from DB query
        except Exception as e:
            print 'Unable to perform direct Oracle DB read: %s' % e.message

    # Add some calculated values to the metadata
    calculated_values = {}
    metadata_object.metadata_dict['Calculated'] = calculated_values

    calculated_values['FILENAME'] = os.path.basename(netcdf_path)

    try:  # Try to treat this as a gridded dataset
        nc_grid_utils = NetCDFGridUtils(nc_dataset)
        print '%s is a gridded dataset' % netcdf_path

        #calculated_values['CELLSIZE'] = str((nc_grid_utils.pixel_size[0] + nc_grid_utils.pixel_size[1]) / 2.0)
        calculated_values['CELLSIZE_M'] = str(
            int(
                round((nc_grid_utils.nominal_pixel_metres[0] +
                       nc_grid_utils.nominal_pixel_metres[1]) / 20.0) * 10))
        calculated_values['CELLSIZE_DEG'] = str(
            round((nc_grid_utils.nominal_pixel_degrees[0] +
                   nc_grid_utils.nominal_pixel_degrees[1]) / 2.0, 8))

        WGS84_bbox = transform_coords(nc_grid_utils.native_bbox,
                                      nc_grid_utils.crs, 'EPSG:4326')

    except:  # Try to treat this as a line dataset
        nc_line_utils = NetCDFLineUtils(nc_dataset)
        print '%s is a line dataset' % netcdf_path

        WGS84_bbox = transform_coords(nc_line_utils.native_bbox,
                                      nc_line_utils.crs, 'EPSG:4326')

    WGS84_extents = [
        min([coordinate[0] for coordinate in WGS84_bbox]),
        min([coordinate[1] for coordinate in WGS84_bbox]),
        max([coordinate[0] for coordinate in WGS84_bbox]),
        max([coordinate[1] for coordinate in WGS84_bbox])
    ]

    calculated_values['WLON'] = str(WGS84_extents[0])
    calculated_values['SLAT'] = str(WGS84_extents[1])
    calculated_values['ELON'] = str(WGS84_extents[2])
    calculated_values['NLAT'] = str(WGS84_extents[3])

    try:
        calculated_values['START_DATE'] = min(
            str2datelist(
                str(metadata_object.get_metadata(['Survey',
                                                  'STARTDATE'])))).isoformat()
    except ValueError:
        calculated_values['START_DATE'] = None

    try:
        calculated_values['END_DATE'] = max(
            str2datelist(
                str(metadata_object.get_metadata(['Survey',
                                                  'ENDDATE'])))).isoformat()
    except ValueError:
        calculated_values['END_DATE'] = None

    # Find survey year from end date isoformat string
    try:
        calculated_values['YEAR'] = re.match(
            '^(\d{4})-', calculated_values['END_DATE']).group(1)
    except:
        calculated_values['YEAR'] = 'UNKNOWN'

    #history = "Wed Oct 26 14:34:42 2016: GDAL CreateCopy( /local/el8/axi547/tmp/mWA0769_770_772_773.nc, ... )"
    #date_modified = "2016-08-29T10:51:42"
    try:
        try:
            conversion_datetime_string = re.match(
                '^(.+):.*',
                str(metadata_object.get_metadata(['NetCDF',
                                                  'history']))).group(1)
            conversion_datetime_string = datetime.strptime(
                conversion_datetime_string,
                '%a %b %d %H:%M:%S %Y').isoformat()
        except:
            conversion_datetime_string = metadata_object.get_metadata(
                ['NetCDF', 'date_modified']) or 'UNKNOWN'
    except:
        conversion_datetime_string = 'UNKNOWN'

    calculated_values['CONVERSION_DATETIME'] = conversion_datetime_string

    survey_id = metadata_object.get_metadata(['Survey', 'SURVEYID'])
    try:
        dataset_survey_id = str(nc_dataset.survey_id)
        assert (set([
            int(value_string.strip())
            for value_string in dataset_survey_id.split(',')
            if value_string.strip()
        ]) == set([
            int(value_string.strip()) for value_string in survey_id.split(',')
            if value_string.strip()
        ])), 'NetCDF survey ID %s is inconsistent with %s' % (
            dataset_survey_id, survey_id)
    except:
        nc_dataset.survey_id = str(survey_id)
        nc_dataset.sync()
        print 'Survey ID %s written to netCDF file' % survey_id

    dataset_uuid = metadata_object.get_metadata(['NetCDF', 'uuid'])
    if not dataset_uuid:  # Create a new UUID and write it to the netCDF file
        dataset_uuid = str(uuid.uuid4())
        print dataset_uuid, type(dataset_uuid)
        nc_dataset.uuid = dataset_uuid
        nc_dataset.sync()
        print 'Fresh UUID %s generated and written to netCDF file' % dataset_uuid

    calculated_values['UUID'] = str(dataset_uuid)

    dataset_doi = metadata_object.get_metadata(['NetCDF', 'doi'])
    calculated_values['DOI'] = str(dataset_doi)

    # Need template info to mint DOI
    template_metadata_object = TemplateMetadata(json_text_template_path,
                                                metadata_object)

    if not dataset_doi:  #TODO: Mint a new DOI and write it to the netCDF file
        try:
            doi_minter = Minter(DOI_MINTING_MODE)
            doi_success, ecat_id, new_doi = doi_minter.get_a_doi(
                ecatid=template_metadata_object.get_metadata(["ECAT_ID"]),
                author_names=template_metadata_object.list_from_string(
                    template_metadata_object.get_metadata(["DATASET_AUTHOR"])),
                title=template_metadata_object.get_metadata(["DATASET_TITLE"]),
                resource_type='Dataset',
                publisher=template_metadata_object.get_metadata(
                    ["ORGANISATION_NAME"]),
                publication_year=datetime.now().year,
                subjects=template_metadata_object.list_from_string(
                    template_metadata_object.get_metadata(
                        ["KEYWORD_THEME_LIST"])),
                description=template_metadata_object.get_metadata(
                    ["LINEAGE_SOURCE"]),
                record_url=None,  # Use default URI format
                output_file_path=None)

            if doi_success:
                dataset_doi = str(new_doi)
                nc_dataset.doi = dataset_doi
                nc_dataset.sync()
                print 'Fresh DOI %s generated and written to netCDF file' % dataset_uuid
            else:
                print 'WARNING: DOI minting failed with response code %s' % ecat_id
        except Exception as e:
            print 'WARNING: Error minting DOI: %s' % e.message

    if dataset_doi:
        calculated_values['DOI'] = dataset_doi
        template_metadata_object.metadata_dict['DOI'] = dataset_doi
    else:
        print 'WARNING: DOI not defined'

    #template_class = None
    template_metadata_object = TemplateMetadata(json_text_template_path,
                                                metadata_object)
    metadata_object.merge_root_metadata_from_object(template_metadata_object)

    #pprint(metadata_object.metadata_dict)

    xml_text = get_xml_text(xml_template_path, metadata_object)
    #print xml_text
    xml_file = open(xml_path, 'w')
    xml_file.write(xml_text)
    xml_file.close()
    print 'XML written to %s' % xml_path
Esempio n. 7
0
    def dataset_value_generator(self,
                                variable_name_list,
                                dataset_list,
                                bounding_box,
                                min_points=None,
                                max_points=None):
        '''
        Generator yielding coordinates and values of the specified variable for all points from the supplied dataset list 
        which fall within bounds
        '''
        for dataset in dataset_list:
            try:
                try:
                    nc_dataset = Dataset(dataset)
                except:
                    nc_dataset = Dataset(
                        dataset + '#fillmismatch'
                    )  # Note work-around for bad _FillValue: https://github.com/Unidata/netcdf-c/issues/1299

                netcdf_point_utils = NetCDFPointUtils(nc_dataset)

                #print netcdf_point_utils.__dict__
                #print(nc_dataset.variables.keys())
                #print('Computing spatial mask')
                spatial_mask = netcdf_point_utils.get_spatial_mask(
                    bounding_box, self.grid_crs_wkt)
                point_count = np.count_nonzero(spatial_mask)

                print('{}/{} points found in expanded bounding box for {}'.
                      format(point_count, netcdf_point_utils.point_count,
                             dataset))

                if not point_count:
                    continue

                # Enforce min/max point counts
                if min_points and point_count < min_points:
                    print(
                        'Skipping dataset with < {} points'.format(min_points))
                    continue
                if max_points and point_count > max_points:
                    print(
                        'Skipping dataset with > {} points'.format(max_points))
                    continue

                dataset_value_dict = {
                    'coordinates':
                    transform_coords(
                        netcdf_point_utils.xycoords[spatial_mask],
                        get_wkt_from_spatial_ref(
                            get_spatial_ref_from_wkt(netcdf_point_utils.wkt)),
                        self.grid_crs_wkt)
                }

                # Read all variable attributes and values
                for variable_name in variable_name_list:
                    variable = nc_dataset.variables[variable_name]
                    if (
                            variable.dimensions[0] != 'point'
                    ):  # Variable is NOT of point dimension - must be lookup
                        dataset_value_dict[
                            variable_name] = netcdf_point_utils.expand_lookup_variable(
                                lookup_variable_name=variable_name,
                                mask=spatial_mask)
                    else:  # 'point' is in variable.dimensions - "normal" variable
                        dataset_value_dict[variable_name] = variable[
                            spatial_mask]

                yield dataset, dataset_value_dict

            except Exception as e:
                print('Unable to read point dataset {}: {}'.format(dataset, e))