def custom_view(self): url = self.request.params.get('url') if url is None: url = self.wizard_state.get('wizard_threddsservice')['url'] # TODO: back url handling needs to be changed back_url = None prev = self.request.params.get('prev') back_links = self.wizard_state.get('wizard_threddsservice').get('back_links', []) if prev: back_links.append(prev) elif len(back_links) > 0: back_links.pop() self.wizard_state.get('wizard_threddsservice')['back_links'] = back_links self.session.changed() if len(back_links) > 0: back_url = self.request.route_path('wizard_threddsbrowser', _query=[('url', back_links[-1])]) logger.debug("wizard state: %s", self.wizard_state.get('wizard_threddsservice')) catalog = threddsclient.read_url(url) items = [] items.extend( catalog.flat_references() ) items.extend( catalog.flat_datasets() ) fields = ['name', 'size', 'modified'] grid = Grid(self.request, items, fields, ) return dict(title=catalog.url, grid=grid, back_url=back_url)
def custom_view(self): url = self.request.params.get('url') if url is None: url = self.wizard_state.get('wizard_threddsservice')['url'] # TODO: back url handling needs to be changed back_url = None prev = self.request.params.get('prev') back_links = self.wizard_state.get('wizard_threddsservice').get( 'back_links', []) if prev: back_links.append(prev) elif len(back_links) > 0: back_links.pop() self.wizard_state.get( 'wizard_threddsservice')['back_links'] = back_links self.session.changed() if len(back_links) > 0: back_url = self.request.route_path('wizard_threddsbrowser', _query=[('url', back_links[-1]) ]) LOGGER.debug("wizard state: %s", self.wizard_state.get('wizard_threddsservice')) catalog = threddsclient.read_url(url) items = [] items.extend(catalog.flat_references()) items.extend(catalog.flat_datasets()) fields = ['name', 'size', 'modified'] grid = Grid( self.request, items, fields, ) return dict(title=catalog.url, grid=grid, back_url=back_url)
def test_noaa(): cat = read_url('http://www.esrl.noaa.gov/psd/thredds/catalog.xml') assert cat.name == 'THREDDS PSD Test Catalog' assert cat.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog.xml' assert len(cat.references) == 2 cat2 = cat.references[0].follow() assert cat2.name == 'Datasets' assert cat2.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog/Datasets/catalog.xml'
def _fetch_thredds_metadata(url, title=None): """Fetch capabilities metadata from thredds catalog service and return record dict.""" # TODO: maybe use thredds siphon import threddsclient tds = threddsclient.read_url(url) title = title or tds.name or "Unknown" record = dict( type='service', title=title, abstract="", source=url, format=THREDDS_TYPE, creator='', keywords=['thredds'], rights='', # subjects = '', references=[]) return record
def harvest_service(request, url, service_type, service_name=None): if service_type == 'thredds_catalog': import threddsclient tds = threddsclient.read_url(url) title = tds.name if service_name and len(service_name.strip()) > 2: title = service_name elif len(title.strip()) == 0: title = url record = dict( title = title, abstract = "", source = url, format = "THREDDS", creator = '', keywords = 'thredds', rights = '') publish(request, record) else: # ogc services request.csw.harvest(source=url, resourcetype=service_type)
def thredds_get_resources(url, depth): cat = threddsclient.read_url(url) name = self._resource_id(cat) if depth == self.max_depth: name = self.service_name resource_type = 'directory' if cat.datasets and cat.datasets[ 0].content_type != "application/directory": resource_type = 'file' tree_item = { name: { 'children': {}, 'resource_type': resource_type } } if depth > 0: for reference in cat.flat_references(): tree_item[name]['children'].update( thredds_get_resources(reference.url, depth - 1)) return tree_item
def test_invalid_url(): with pytest.raises(requests.ConnectionError): read_url('http://example.invalid')
def download_netcdf_file_from_met(data_source=DATA_SOURCE_NEW): archive_url = None if data_source == DATA_SOURCE_NEW: print("Traversing operational archive") archive_url = OPERATIONAL_ARCHIVE_URL elif data_source == DATA_SOURCE_OLD: print("Traversing historical archive") archive_url = HISTORICAL_ARCHIVE_URL else: raise ValueError("Invalid data source provided") data_catalog = threddsclient.read_url(archive_url) # Newest YEAR = operational_data_catalog.flat_references()[0], FRA nyeste år er .flat_references()[0] NYESTE MÅNED newest_catalog_ref = \ data_catalog.flat_references()[0].follow().flat_references()[0].follow().flat_references()[0] if data_source == DATA_SOURCE_NEW else \ data_catalog.flat_references()[1].follow().flat_references()[0].follow().flat_references()[0] download_directory_data = threddsclient.read_url(newest_catalog_ref.url) datasets = download_directory_data.datasets[0].datasets configuration_file = json.load(open(CONFIG_FILE_NAME)) if isfile(CONFIG_FILE_NAME) else None repository_root_path = configuration_file["windData"]["windDataRepositoryDirectory"] variables_to_keep = configuration_file["windData"]["windDataVariablesToKeep"] existing_files = read_added_files() tmp_file_path = "{root_path}{filename}".format(root_path=repository_root_path, filename=TMP_NETCDF_FILE_NAME) for year_idx, year in enumerate(data_catalog.flat_references()): if year.name == "raw_grid": continue for month_idx, month in enumerate(data_catalog.flat_references()[year_idx].follow().flat_references()): for day_idx, day in enumerate(data_catalog.flat_references()[year_idx].follow().flat_references()[ month_idx].follow().flat_references()): date_time = year.name + "-" + month.name + "-" + day.name if date_time in existing_files: continue datasets = \ data_catalog.flat_references()[year_idx].follow().flat_references()[ month_idx].follow().flat_references()[ day_idx].follow().flat_datasets() if datasets: file_index = 0 # Which file to download for that given day distance_in_hours_from_12 = 9999 for idx, file in enumerate(datasets): # List of all files for that given day if "forecast" in file.name or "latest" in file.name: continue timestamp = file.name[file.name.rindex("_") + 1:].replace(".nc", "") # Get hours from timestamp hours = timestamp[9:11] current_distance = calculate_distance_to_number(12, int(hours)) if current_distance == 0: file_index = idx break if current_distance < distance_in_hours_from_12: distance_in_hours_from_12 = current_distance file_index = idx # Download! download_url = datasets[file_index].download_url() req = requests.get(download_url) if req.status_code == 200: # Store file print("--\nWriting tmp file to disk with location: {root_path}{file_name}".format(root_path=repository_root_path, file_name=TMP_NETCDF_FILE_NAME)) with open(tmp_file_path, "wb") as outfile: outfile.write(req.content) file_name = "{file_name}_reduced.nc".format(file_name=datasets[file_index].name[:-3]) drop_attributes_and_write_to_disk(data_file_path=tmp_file_path, result_file_path="{root_path}{filename}".format(root_path=repository_root_path, filename=file_name), columns_to_keep=variables_to_keep) with open("added_files", "a+") as myfile: myfile.write(date_time + "\n") existing_files.append(date_time) if os.path.exists("{root_path}{file_name}".format(root_path=repository_root_path, file_name=TMP_NETCDF_FILE_NAME)): os.remove("{root_path}{file_name}".format(root_path=repository_root_path, file_name=TMP_NETCDF_FILE_NAME))