Esempio n. 1
0
    def custom_view(self):
        url = self.request.params.get('url')
        if url is None:
            url = self.wizard_state.get('wizard_threddsservice')['url']
        # TODO: back url handling needs to be changed
        back_url = None
        prev = self.request.params.get('prev')
        back_links = self.wizard_state.get('wizard_threddsservice').get('back_links', [])
        if prev:
            back_links.append(prev)
        elif len(back_links) > 0:
            back_links.pop()
        self.wizard_state.get('wizard_threddsservice')['back_links'] = back_links
        self.session.changed()
        if len(back_links) > 0:
            back_url = self.request.route_path('wizard_threddsbrowser', _query=[('url', back_links[-1])])

        logger.debug("wizard state: %s", self.wizard_state.get('wizard_threddsservice'))
        catalog = threddsclient.read_url(url)
        items = []
        items.extend( catalog.flat_references() )
        items.extend( catalog.flat_datasets() )
        fields = ['name', 'size', 'modified']
    
        grid = Grid(self.request, items, fields, )
        return dict(title=catalog.url, grid=grid, back_url=back_url)
    def custom_view(self):
        url = self.request.params.get('url')
        if url is None:
            url = self.wizard_state.get('wizard_threddsservice')['url']
        # TODO: back url handling needs to be changed
        back_url = None
        prev = self.request.params.get('prev')
        back_links = self.wizard_state.get('wizard_threddsservice').get(
            'back_links', [])
        if prev:
            back_links.append(prev)
        elif len(back_links) > 0:
            back_links.pop()
        self.wizard_state.get(
            'wizard_threddsservice')['back_links'] = back_links
        self.session.changed()
        if len(back_links) > 0:
            back_url = self.request.route_path('wizard_threddsbrowser',
                                               _query=[('url', back_links[-1])
                                                       ])

        LOGGER.debug("wizard state: %s",
                     self.wizard_state.get('wizard_threddsservice'))
        catalog = threddsclient.read_url(url)
        items = []
        items.extend(catalog.flat_references())
        items.extend(catalog.flat_datasets())
        fields = ['name', 'size', 'modified']

        grid = Grid(
            self.request,
            items,
            fields,
        )
        return dict(title=catalog.url, grid=grid, back_url=back_url)
Esempio n. 3
0
def test_noaa():
    cat = read_url('http://www.esrl.noaa.gov/psd/thredds/catalog.xml')
    assert cat.name == 'THREDDS PSD Test Catalog'
    assert cat.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog.xml'
    assert len(cat.references) == 2
    cat2 = cat.references[0].follow()
    assert cat2.name == 'Datasets'
    assert cat2.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog/Datasets/catalog.xml'
Esempio n. 4
0
def test_noaa():
    cat = read_url('http://www.esrl.noaa.gov/psd/thredds/catalog.xml')
    assert cat.name == 'THREDDS PSD Test Catalog'
    assert cat.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog.xml'
    assert len(cat.references) == 2
    cat2 = cat.references[0].follow()
    assert cat2.name == 'Datasets'
    assert cat2.url == 'http://www.esrl.noaa.gov/psd/thredds/catalog/Datasets/catalog.xml'
Esempio n. 5
0
def _fetch_thredds_metadata(url, title=None):
    """Fetch capabilities metadata from thredds catalog service and return record dict."""
    # TODO: maybe use thredds siphon
    import threddsclient
    tds = threddsclient.read_url(url)
    title = title or tds.name or "Unknown"
    record = dict(
        type='service',
        title=title,
        abstract="",
        source=url,
        format=THREDDS_TYPE,
        creator='',
        keywords=['thredds'],
        rights='',
        # subjects = '',
        references=[])
    return record
Esempio n. 6
0
def harvest_service(request, url, service_type, service_name=None):
    if service_type == 'thredds_catalog':
        import threddsclient
        tds = threddsclient.read_url(url)
        title = tds.name
        if service_name and len(service_name.strip()) > 2:
            title = service_name
        elif len(title.strip()) == 0:
            title = url
        record = dict(
            title = title,
            abstract = "",
            source = url,
            format = "THREDDS",
            creator = '',
            keywords = 'thredds',
            rights = '')
        publish(request, record)
    else: # ogc services
        request.csw.harvest(source=url, resourcetype=service_type)
Esempio n. 7
0
        def thredds_get_resources(url, depth):
            cat = threddsclient.read_url(url)
            name = self._resource_id(cat)
            if depth == self.max_depth:
                name = self.service_name
            resource_type = 'directory'
            if cat.datasets and cat.datasets[
                    0].content_type != "application/directory":
                resource_type = 'file'

            tree_item = {
                name: {
                    'children': {},
                    'resource_type': resource_type
                }
            }

            if depth > 0:
                for reference in cat.flat_references():
                    tree_item[name]['children'].update(
                        thredds_get_resources(reference.url, depth - 1))

            return tree_item
Esempio n. 8
0
def test_invalid_url():
    with pytest.raises(requests.ConnectionError):
        read_url('http://example.invalid')
def download_netcdf_file_from_met(data_source=DATA_SOURCE_NEW):
    archive_url = None

    if data_source == DATA_SOURCE_NEW:
        print("Traversing operational archive")
        archive_url = OPERATIONAL_ARCHIVE_URL
    elif data_source == DATA_SOURCE_OLD:
        print("Traversing historical archive")
        archive_url = HISTORICAL_ARCHIVE_URL
    else:
        raise ValueError("Invalid data source provided")

    data_catalog = threddsclient.read_url(archive_url)
    # Newest YEAR = operational_data_catalog.flat_references()[0], FRA nyeste år er .flat_references()[0] NYESTE MÅNED
    newest_catalog_ref = \
        data_catalog.flat_references()[0].follow().flat_references()[0].follow().flat_references()[0] if data_source == DATA_SOURCE_NEW else \
        data_catalog.flat_references()[1].follow().flat_references()[0].follow().flat_references()[0]
    download_directory_data = threddsclient.read_url(newest_catalog_ref.url)
    datasets = download_directory_data.datasets[0].datasets
    configuration_file = json.load(open(CONFIG_FILE_NAME)) if isfile(CONFIG_FILE_NAME) else None
    repository_root_path = configuration_file["windData"]["windDataRepositoryDirectory"]
    variables_to_keep = configuration_file["windData"]["windDataVariablesToKeep"]
    existing_files = read_added_files()
    tmp_file_path = "{root_path}{filename}".format(root_path=repository_root_path, filename=TMP_NETCDF_FILE_NAME)

    for year_idx, year in enumerate(data_catalog.flat_references()):
        if year.name == "raw_grid":
            continue
        for month_idx, month in enumerate(data_catalog.flat_references()[year_idx].follow().flat_references()):
            for day_idx, day in enumerate(data_catalog.flat_references()[year_idx].follow().flat_references()[
                                              month_idx].follow().flat_references()):
                date_time = year.name + "-" + month.name + "-" + day.name
                if date_time in existing_files:
                    continue
                datasets = \
                    data_catalog.flat_references()[year_idx].follow().flat_references()[
                        month_idx].follow().flat_references()[
                        day_idx].follow().flat_datasets()
                if datasets:
                    file_index = 0  # Which file to download for that given day
                    distance_in_hours_from_12 = 9999
                    for idx, file in enumerate(datasets):  # List of all files for that given day
                        if "forecast" in file.name or "latest" in file.name:
                            continue

                        timestamp = file.name[file.name.rindex("_") + 1:].replace(".nc", "")
                        # Get hours from timestamp
                        hours = timestamp[9:11]
                        current_distance = calculate_distance_to_number(12, int(hours))
                        if current_distance == 0:
                            file_index = idx
                            break
                        if current_distance < distance_in_hours_from_12:
                            distance_in_hours_from_12 = current_distance
                            file_index = idx

                    # Download!
                    download_url = datasets[file_index].download_url()

                    req = requests.get(download_url)
                    if req.status_code == 200:
                        # Store file
                        print("--\nWriting tmp file to disk with location: {root_path}{file_name}".format(root_path=repository_root_path,
                                                                                                          file_name=TMP_NETCDF_FILE_NAME))
                        with open(tmp_file_path, "wb") as outfile:
                            outfile.write(req.content)
                        file_name = "{file_name}_reduced.nc".format(file_name=datasets[file_index].name[:-3])

                        drop_attributes_and_write_to_disk(data_file_path=tmp_file_path,
                                                          result_file_path="{root_path}{filename}".format(root_path=repository_root_path,
                                                                                                          filename=file_name),
                                                          columns_to_keep=variables_to_keep)

                with open("added_files", "a+") as myfile:
                    myfile.write(date_time + "\n")
                existing_files.append(date_time)

    if os.path.exists("{root_path}{file_name}".format(root_path=repository_root_path, file_name=TMP_NETCDF_FILE_NAME)):
        os.remove("{root_path}{file_name}".format(root_path=repository_root_path, file_name=TMP_NETCDF_FILE_NAME))