def test_when_the_url_returns_a_eula_error_it_returns_a_human_readable_message(
        faker):
    access_token = faker.password(length=42, special_chars=False)
    url = 'https://example.com/file.txt'
    config = config_fixture()
    responses.add(responses.POST,
                  'https://uat.urs.earthdata.nasa.gov/oauth/tokens/user',
                  status=200,
                  match_querystring=False)
    responses.add(
        responses.GET,
        url,
        status=403,
        body=(
            '{"status_code":403,"error_description":"EULA Acceptance Failure",'
            '"resolution_url":"https://example.com/approve_app?client_id=foo"}'
        ))

    with mock.patch('builtins.open', mock.mock_open()):
        with pytest.raises(ForbiddenException) as e:
            util.download(url, '/tmp', access_token=access_token, cfg=config)
        assert e.value.message == (
            f'Request could not be completed because you need to agree to the EULA '
            f'at https://example.com/approve_app?client_id=foo')
        assert len(responses.calls) == 2
def test_when_given_unknown_url_it_raises_exception(faker):
    access_token = faker.password(length=40, special_chars=False)
    config = config_fixture()

    with mock.patch('builtins.open', mock.mock_open()):
        with pytest.raises(Exception):
            util.download('msdos:choplifter.bas',
                          'tmp',
                          access_token=access_token,
                          cfg=config)
def test_when_given_an_http_url_it_downloads_the_url(monkeypatch, mocker,
                                                     faker):
    access_token = faker.password(length=40, special_chars=False)
    http_download = mocker.Mock()
    monkeypatch.setattr(util.http, 'download', http_download)
    config = config_fixture()

    with mock.patch('builtins.open', mock.mock_open()):
        util.download('https://example.com/file.txt',
                      'tmp',
                      access_token=access_token,
                      cfg=config)

    http_download.assert_called()
def test_when_the_url_returns_a_403_it_throws_a_forbidden_exception(faker):
    access_token = faker.password(length=41, special_chars=False)
    url = 'https://example.com/file.txt'
    config = config_fixture()
    responses.add(responses.POST,
                  'https://uat.urs.earthdata.nasa.gov/oauth/tokens/user',
                  status=200,
                  match_querystring=False)
    responses.add(responses.GET, url, status=403)

    with mock.patch('builtins.open', mock.mock_open()):
        with pytest.raises(ForbiddenException) as e:
            util.download(url, '/tmp', access_token=access_token, cfg=config)
        assert e.value.message.startswith('Forbidden')
        assert len(responses.calls) == 2
def test_when_the_url_returns_a_500_it_does_not_raise_a_forbidden_exception_and_does_not_return_details_to_user(
        faker):
    access_token = faker.password(length=43, special_chars=False)
    url = 'https://example.com/file.txt'
    config = config_fixture()
    responses.add(responses.POST,
                  'https://uat.urs.earthdata.nasa.gov/oauth/tokens/user',
                  status=200,
                  match_querystring=False)
    responses.add(responses.GET, url, status=500)

    with mock.patch('builtins.open', mock.mock_open()):
        with pytest.raises(Exception) as e:
            util.download(url, '/tmp', access_token=access_token, cfg=config)
        assert e.type != ForbiddenException and e.type == Exception
        assert len(responses.calls) == 2
    def process_item(self, item, source):
        """
        Converts an input STAC Item's data into Zarr, returning an output STAC item

        Parameters
        ----------
        item : pystac.Item
            the item that should be converted
        source : harmony.message.Source
            the input source defining the variables, if any, to subset from the item

        Returns
        -------
        pystac.Item
            a STAC item containing the Zarr output
        """
        result = item.clone()
        result.assets = {}

        # Create a temporary dir for processing we may do
        workdir = mkdtemp()
        try:
            # Get the data file
            asset = next(v for k, v in item.assets.items()
                         if 'data' in (v.roles or []))
            input_filename = download(asset.href,
                                      workdir,
                                      logger=self.logger,
                                      access_token=self.message.accessToken,
                                      cfg=self.config)

            name = generate_output_filename(asset.href, ext='.zarr')
            root = self.message.stagingLocation + name

            try:
                store = self.s3.get_mapper(root=root, check=False, create=True)
                netcdf_to_zarr(input_filename, store)
            except Exception as e:
                # Print the real error and convert to user-facing error that's more digestible
                self.logger.error(e, exc_info=1)
                filename = asset.href.split('?')[0].rstrip('/').split('/')[-1]
                raise ZarrException('Could not convert file to Zarr: %s' %
                                    (filename))

            # Update the STAC record
            result.assets['data'] = Asset(root,
                                          title=name,
                                          media_type='application/x-zarr',
                                          roles=['data'])

            # Return the STAC record
            return result
        finally:
            # Clean up any intermediate resources
            shutil.rmtree(workdir)
def test_when_given_a_file_path_it_returns_the_file_path(
        monkeypatch, mocker, faker):
    access_token = faker.password(length=40, special_chars=False)
    http_download = mocker.Mock()
    monkeypatch.setattr(util.http, 'download', http_download)
    config = config_fixture()

    with mock.patch('builtins.open', mock.mock_open()):
        destination_path = util.download('file:///var/logs/example/file.txt',
                                         '/put/file/here/',
                                         access_token=access_token,
                                         cfg=config)

        assert destination_path.startswith('/var/logs/example/')
        assert destination_path.endswith('.txt')
예제 #8
0
    def process_item(self, item, source):
        """
        Processes a single input item.  Services that are not aggregating multiple input files
        should prefer to implement this method rather than #invoke

        This example copies its input to the output, marking "dpi" and "variables" message
        attributes as having been processed

        Parameters
        ----------
        item : pystac.Item
            the item that should be processed
        source : harmony.message.Source
            the input source defining the variables, if any, to subset from the item

        Returns
        -------
        pystac.Item
            a STAC catalog whose metadata and assets describe the service output
        """
        result = item.clone()
        result.assets = {}

        # Create a temporary dir for processing we may do
        workdir = mkdtemp()
        try:
            # Get the data file
            asset = next(v for k, v in item.assets.items()
                         if 'data' in (v.roles or []))
            input_filename = download(asset.href,
                                      workdir,
                                      logger=self.logger,
                                      access_token=self.message.accessToken)

            # Mark any fields the service processes so later services do not repeat work
            dpi = self.message.format.process('dpi')
            # Variable subsetting
            variables = source.process('variables')

            # Do the work here!
            var_names = [v.name for v in variables]
            print('Processing item %s, DPI=%d, vars=[%s]' %
                  (item.id, dpi, ', '.join(var_names)))
            working_filename = os.path.join(workdir, 'tmp.txt')
            shutil.copyfile(input_filename, working_filename)

            # Stage the output file with a conventional filename
            output_filename = generate_output_filename(asset.href,
                                                       ext=None,
                                                       variable_subset=None,
                                                       is_regridded=False,
                                                       is_subsetted=False)
            url = stage(working_filename,
                        output_filename,
                        'text/plain',
                        location=self.message.stagingLocation,
                        logger=self.logger)

            # Update the STAC record
            result.assets['data'] = Asset(url,
                                          title=output_filename,
                                          media_type='text/plain',
                                          roles=['data'])
            # Other metadata updates may be appropriate, such as result.bbox and result.geometry
            # if a spatial subset was performed

            # Return the STAC record
            return result
        finally:
            # Clean up any intermediate resources
            shutil.rmtree(workdir)