Ejemplo n.º 1
0
def test_create_symlinks_ASTER():
    file_refs = [
        FileRef('./test/test_data/ASTGTM2_N36W006_dem.tif', None, None,
                'image/tiff'),
        FileRef('./test/test_data/ASTGTM2_N36W007_dem.tif', None, None,
                'image/tiff')
    ]

    expected_sym_link_name_1 = os.path.join(_FOLDER, 'ASTGTM2_N36W006_dem.tif')
    expected_sym_link_name_2 = os.path.join(_FOLDER, 'ASTGTM2_N36W006_dem.tif')
    try:
        assert not os.path.exists(_FOLDER)

        create_sym_links(file_refs, _FOLDER)

        new_list = os.listdir(_FOLDER)
        assert 2 == len(new_list)
        assert 'ASTGTM2_N36W006_dem.tif' in new_list
        assert os.path.islink(expected_sym_link_name_1)
        assert 'ASTGTM2_N36W007_dem.tif' in new_list
        assert os.path.islink(expected_sym_link_name_2)
    finally:
        if os.path.islink('./test/test_data/out/ASTGTM2_N36W006_dem.tif'):
            os.unlink('./test/test_data/out/ASTGTM2_N36W006_dem.tif')
        if os.path.islink('./test/test_data/out/ASTGTM2_N36W007_dem.tif'):
            os.unlink('./test/test_data/out/ASTGTM2_N36W007_dem.tif')
        if os.path.exists(_FOLDER):
            shutil.rmtree(_FOLDER)
Ejemplo n.º 2
0
def test_sort_file_ref_list():
    file_refs = [
        FileRef(url='loc1',
                start_time='2017-06-04',
                end_time='2017-06-07',
                mime_type='unknown mime type'),
        FileRef(url='loc2',
                start_time='2017-06-01',
                end_time='2017-06-06',
                mime_type='unknown mime type'),
        FileRef(url='loc3',
                start_time='2017-06-03',
                end_time='2017-06-10',
                mime_type='unknown mime type'),
        FileRef(url='loc4',
                start_time='2017-06-02',
                end_time='2017-06-09',
                mime_type='unknown mime type'),
        FileRef(url='loc5',
                start_time='2017-06-05',
                end_time='2017-06-08',
                mime_type='unknown mime type')
    ]
    observations_factory = ObservationsFactory()
    observations_factory.sort_file_ref_list(file_refs)
    assert 5, len(file_refs)
    assert 'loc2', file_refs[0]
    assert 'loc4', file_refs[1]
    assert 'loc3', file_refs[2]
    assert 'loc1', file_refs[3]
    assert 'loc5', file_refs[4]
Ejemplo n.º 3
0
    def get(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
        file_refs = []
        if os.path.exists(data_set_meta_info.identifier):
            mime_type = get_mime_type(data_set_meta_info.identifier)
            file_refs.append(
                FileRef(data_set_meta_info.identifier,
                        data_set_meta_info.start_time,
                        data_set_meta_info.end_time, mime_type))
            return file_refs
        relative_path = (self.path + self.pattern).replace('//', '/')
        relative_path = relative_path.replace(
            '/{}/'.format(_DATA_TYPE_PATTERN),
            '/{}/'.format(data_set_meta_info.data_type))
        if _DAY_PATTERN not in self.pattern and _MONTH_PATTERN not in self.pattern and \
                _YEAR_PATTERN not in self.pattern:
            if os.path.exists(relative_path):
                file_names = glob.glob(relative_path + '/**', recursive=True)
                for file_name in file_names:
                    file_name = file_name.replace('\\', '/')
                    if data_set_meta_info.identifier in file_name and \
                            data_validation.is_valid(file_name, data_set_meta_info.data_type):
                        mime_type = get_mime_type(file_name)
                        file_refs.append(
                            FileRef(file_name, data_set_meta_info.start_time,
                                    data_set_meta_info.end_time, mime_type))
            return file_refs
        if data_set_meta_info.start_time is None and data_set_meta_info.end_time is None:
            mime_type = get_mime_type(relative_path)
            file_refs.append(
                FileRef(relative_path, data_set_meta_info.start_time,
                        data_set_meta_info.end_time, mime_type))
            return file_refs

        # todo consider (weird) case when a start time but no end time is given
        start_time = get_time_from_string(data_set_meta_info.start_time)
        end_time = get_time_from_string(data_set_meta_info.end_time)
        time = start_time
        while time <= end_time:
            path = relative_path
            path = path.replace('/{}/'.format(_YEAR_PATTERN),
                                '/{:04d}/'.format(time.year))
            path = path.replace('/{}/'.format(_MONTH_PATTERN),
                                '/{:02d}/'.format(time.month))
            path = path.replace('/{}/'.format(_DAY_PATTERN),
                                '/{:02d}/'.format(time.day))
            time = self._get_next_time_step(time)
            if not os.path.exists(path):
                continue
            file_names = glob.glob(path + '/**', recursive=True)
            for file_name in file_names:
                file_name = file_name.replace('\\', '/')
                if data_set_meta_info.identifier in file_name and \
                        data_validation.is_valid(file_name, data_set_meta_info.data_type):
                    mime_type = get_mime_type(file_name)
                    file_refs.append(
                        FileRef(file_name, data_set_meta_info.start_time,
                                data_set_meta_info.end_time, mime_type))
        return file_refs
def test_can_read():
    file_ref = FileRef(url=FAULTY_BASE_FILE, start_time='2017-09-10', end_time='2017-09-10',
                       mime_type='unknown mime type')
    assert not S2ObservationsCreator.can_read(file_ref)
    file_ref = FileRef(url=MISSING_TILE_ID_BASE_FILE, start_time='2017-09-10', end_time='2017-09-10',
                       mime_type='unknown mime type')
    assert not S2ObservationsCreator.can_read(file_ref)
    file_ref = FileRef(url=S2_AWS_BASE_FILE, start_time='2017-09-10', end_time='2017-09-10',
                       mime_type='unknown mime type')
    assert S2ObservationsCreator.can_read(file_ref)
Ejemplo n.º 5
0
def test_create_symlinks_s2_aws_data_type_given():
    file_refs = [
        FileRef('./test/test_data/29/S/QB/2017/9/4/0', None, None,
                'application/x-directory'),
        FileRef('./test/test_data/30/T/XZ/2016/2/2/1/', None, None,
                'application/x-directory')
    ]
    expected_sym_links = [
        os.path.join(_FOLDER, '29/S/QB/2017/9/4/0/'),
        os.path.join(_FOLDER, '30/T/XZ/2016/2/2/1/')
    ]
    try:
        assert not os.path.exists(_FOLDER)

        create_sym_links(file_refs, _FOLDER, 'AWS_S2_L1C')
        for expected_sym_link in expected_sym_links:
            new_list = os.listdir(expected_sym_link)
            assert 15 == len(new_list)
            assert 'B01.jp2' in new_list
            assert os.path.islink('{}/B01.jp2'.format(expected_sym_link))
            assert 'B02.jp2' in new_list
            assert os.path.islink('{}/B02.jp2'.format(expected_sym_link))
            assert 'B03.jp2' in new_list
            assert os.path.islink('{}/B03.jp2'.format(expected_sym_link))
            assert 'B04.jp2' in new_list
            assert os.path.islink('{}/B04.jp2'.format(expected_sym_link))
            assert 'B05.jp2' in new_list
            assert os.path.islink('{}/B05.jp2'.format(expected_sym_link))
            assert 'B06.jp2' in new_list
            assert os.path.islink('{}/B06.jp2'.format(expected_sym_link))
            assert 'B07.jp2' in new_list
            assert os.path.islink('{}/B07.jp2'.format(expected_sym_link))
            assert 'B08.jp2' in new_list
            assert os.path.islink('{}/B08.jp2'.format(expected_sym_link))
            assert 'B09.jp2' in new_list
            assert os.path.islink('{}/B09.jp2'.format(expected_sym_link))
            assert 'B8A.jp2' in new_list
            assert os.path.islink('{}/B8A.jp2'.format(expected_sym_link))
            assert 'B10.jp2' in new_list
            assert os.path.islink('{}/B10.jp2'.format(expected_sym_link))
            assert 'B11.jp2' in new_list
            assert os.path.islink('{}/B11.jp2'.format(expected_sym_link))
            assert 'B12.jp2' in new_list
            assert os.path.islink('{}/B12.jp2'.format(expected_sym_link))
            assert 'metadata.xml' in new_list
            assert os.path.islink('{}/metadata.xml'.format(expected_sym_link))
            assert 'qi' in new_list
            assert os.path.islink('{}/qi/some_file'.format(expected_sym_link))
    finally:
        if os.path.exists(_FOLDER):
            shutil.rmtree(_FOLDER)
Ejemplo n.º 6
0
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     if data_set_meta_info.data_type not in _DATA_TYPE_PARAMETER_DICTS:
         logging.warning(
             f'Data Type {data_set_meta_info.data_type} not supported by MUNDI DIAS File System '
             f'implementation.')
         return []
     buckets = self._get_bucket_names(data_set_meta_info)
     prefix = self._get_prefix(data_set_meta_info)
     file_refs = []
     for bucket in buckets:
         file_url = _REST_BASE_URL.format(bucket, prefix,
                                          data_set_meta_info.identifier)
         excludes = _DATA_TYPE_PARAMETER_DICTS[
             data_set_meta_info.data_type]['excludes']
         success = self._download_url(file_url,
                                      data_set_meta_info.identifier, bucket,
                                      excludes)
         if success:
             url = glob.glob(
                 f"{self._temp_dir}/{data_set_meta_info.identifier}*")[0]
             file_refs.append(
                 FileRef(url, data_set_meta_info.start_time,
                         data_set_meta_info.end_time, get_mime_type(url)))
             logging.info('Downloaded {}'.format(
                 data_set_meta_info.identifier))
             break
     return file_refs
Ejemplo n.º 7
0
def test_create_sym_link_s2_aws_data():
    file_ref = FileRef('./test/test_data/29/S/QB/2017/9/4/0', None, None,
                       'application/x-directory')
    expected_sym_link_name = os.path.join(_FOLDER, '29/S/QB/2017/9/4/0/')
    try:
        assert not os.path.exists(_FOLDER)

        create_sym_link(file_ref, _FOLDER)

        new_list = os.listdir(expected_sym_link_name)
        assert 15 == len(new_list)
        assert 'B01.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B01.jp2')
        assert 'B02.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B02.jp2')
        assert 'B03.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B03.jp2')
        assert 'B04.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B04.jp2')
        assert 'B05.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B05.jp2')
        assert 'B06.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B06.jp2')
        assert 'B07.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B07.jp2')
        assert 'B08.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B08.jp2')
        assert 'B09.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B09.jp2')
        assert 'B8A.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B8A.jp2')
        assert 'B10.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B10.jp2')
        assert 'B11.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B11.jp2')
        assert 'B12.jp2' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/B12.jp2')
        assert 'metadata.xml' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/metadata.xml')
        assert 'qi' in new_list
        assert os.path.islink(
            './test/test_data/out/29/S/QB/2017/9/4/0/qi/some_file')
    finally:
        if os.path.exists('./test/test_data/out/29/S/QB/2017/9/4/0/'):
            shutil.rmtree('./test/test_data/out/')
Ejemplo n.º 8
0
 def _get_from_wrapped(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     file_refs = []
     file_url = _DOWNLOAD_URL.format(data_set_meta_info.referenced_data)
     request = urllib2.Request(file_url)
     authorization = base64.encodebytes(str.encode('{}:{}'.format(self._username, self._password))). \
         replace(b'\n', b'').decode()
     request.add_header('Authorization', 'Basic {}'.format(authorization))
     try:
         cj = CookieJar()
         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
         remote_file = opener.open(request)
         status = remote_file.status
         while status != 200:
             logging.info(f"Request '{file_url}' awaiting status")
             time.sleep(10)
             remote_file = opener.open(request)
             status = remote_file.status
         total_size_in_bytes = int(remote_file.info()['Content-Length'])
         # todo check in advance whether there is enough disk space left
         file_name = data_set_meta_info.identifier
         cdheader = remote_file.getheader('content-disposition')
         if cdheader:
             split_header = cdheader.split('"')
             if file_name in cdheader and len(split_header) > 1:
                 file_name = split_header[-2]
         temp_url = f'{self._temp_dir}/{file_name}'
         logging.info('Downloading {}'.format(data_set_meta_info.identifier))
         with open(temp_url, 'wb') as temp_file:
             one_percent = total_size_in_bytes / 100
             downloaded_bytes = 0
             next_threshold = one_percent
             length = 1024 * 1024
             buf = remote_file.read(length)
             while buf:
                 temp_file.write(buf)
                 downloaded_bytes += 1024 * 1024
                 if downloaded_bytes > next_threshold:
                     stdout.write('\r{} %'.format(int(next_threshold / one_percent)))
                     stdout.flush()
                     next_threshold += one_percent
                 buf = remote_file.read(length)
         temp_file.close()
         remote_file.close()
         logging.info('Downloaded {}'.format(data_set_meta_info.identifier))
         if _DATA_TYPE_PARAMETER_DICTS[data_set_meta_info.data_type]['unzip'] and file_name.endswith('.zip'):
             with ZipFile(temp_url) as zipfile:
                 zipfile.extractall(self._temp_dir)
             os.remove(temp_url)
         temp_content = glob.glob(f'{self._temp_dir}/*')
         if len(temp_content) > 0:
             id = temp_content[0]
             file_refs.append(FileRef(id, data_set_meta_info.start_time, data_set_meta_info.end_time,
                                      get_mime_type(temp_url)))
         opener.close()
     except HTTPError as e:
         logging.info(f"Could not download from url '{file_url}'. {e.reason}")
     return file_refs
Ejemplo n.º 9
0
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     file_refs = []
     if data_set_meta_info.identifier == 'some_wrapped_file':
         file_refs.append(
             FileRef(path_to_wrapped_file, data_set_meta_info.start_time,
                     data_set_meta_info.end_time,
                     get_mime_type(path_to_wrapped_file)))
     return file_refs
Ejemplo n.º 10
0
def test_bands_per_observation():
    destination_srs = osr.SpatialReference()
    destination_srs.ImportFromWkt(EPSG_32232_WKT)
    bounds_srs = osr.SpatialReference()
    bounds_srs.SetWellKnownGeogCS('EPSG:4326')
    bounds = [7.8, 53.5, 8.8, 53.8]
    reprojection = Reprojection(bounds=bounds, x_res=50, y_res=100, destination_srs=destination_srs,
                                bounds_srs=bounds_srs, resampling_mode=None)
    file_ref = FileRef(url=S2_AWS_BASE_FILE, start_time='2017-09-10', end_time='2017-09-10',
                       mime_type='unknown mime type')
    s2_observations = S2Observations(file_ref, reprojection, emulator_folder=EMULATOR_FOLDER)

    assert s2_observations.bands_per_observation == 10
Ejemplo n.º 11
0
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     from com.obs.client.obs_client import ObsClient
     if data_set_meta_info.data_type not in _DATA_TYPE_PARAMETER_DICTS:
         logging.warning(
             f'Data Type {data_set_meta_info.data_type} not supported by MUNDI DIAS File System '
             f'implementation.')
         return []
     buckets = self._get_bucket_names(data_set_meta_info)
     prefix = self._get_prefix(data_set_meta_info)
     obs_client = ObsClient(access_key_id=self._access_key_id,
                            secret_access_key=self._secret_access_key,
                            server=_MUNDI_SERVER)
     keys = []
     excludes = _DATA_TYPE_PARAMETER_DICTS[
         data_set_meta_info.data_type]['excludes']
     right_bucket = None
     for bucket in buckets:
         right_bucket = bucket
         objects = obs_client.listObjects(bucketName=bucket, prefix=prefix)
         if objects.status < 300:
             for content in objects.body.contents:
                 if data_set_meta_info.identifier in content.key:
                     move_on = False
                     for exclude in excludes:
                         if content.key.endswith(exclude):
                             move_on = True
                     if not move_on:
                         keys.append(content.key)
             if len(keys) > 0:
                 break
     if len(keys) == 0:
         return []
     data_set_id = data_set_meta_info.identifier
     for key in keys:
         relative_path_to_file = key.split(data_set_meta_info.identifier)[1]
         target_file = f'{self._temp_dir}/{data_set_meta_info.identifier}{relative_path_to_file}'
         if len(keys) == 1:
             data_set_id = f'{data_set_meta_info.identifier}{relative_path_to_file}'
         resp = obs_client.getObject(right_bucket,
                                     key,
                                     downloadPath=target_file)
         if resp.status >= 300:
             return []
     obs_client.close()
     file_ref = FileRef(f'{self._temp_dir}/{data_set_id}',
                        data_set_meta_info.start_time,
                        data_set_meta_info.end_time,
                        get_mime_type(data_set_meta_info.identifier))
     return [file_ref]
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     file_refs = []
     file_name = data_set_meta_info.identifier.split('/')[-1]
     url = '{}/{}'.format(self._url, file_name)
     logging.info(f"Attempting to download from {url}")
     success = self._download_url(url, self._temp_dir, file_name)
     if success:
         destination = os.path.join(self._temp_dir, file_name)
         file_refs.append(
             FileRef(destination, data_set_meta_info.start_time,
                     data_set_meta_info.end_time, get_mime_type(file_name)))
         logging.info('Downloaded {}'.format(file_name))
     return file_refs
Ejemplo n.º 13
0
 def get(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     if data_set_meta_info.referenced_data is None:
         return []
     required_datasets = []
     referenced_data_sets = data_set_meta_info.referenced_data.split(';')
     for data_set in referenced_data_sets:
         # coverage is wrong here. We leave it as it makes no difference.
         file_refs = self._file_system.get(DataSetMetaInfo(data_set_meta_info.coverage, None, None,
                                                           self._encapsulated_data_type, data_set))
         for file_ref in file_refs:
             if file_ref.url not in required_datasets:
                 required_datasets.append(file_ref.url.replace('//', '/'))
     vrt_dataset = gdal.BuildVRT(self._path_to_vrt_file, required_datasets)
     vrt_dataset.SetMetadataItem('COVERAGE', data_set_meta_info.coverage)
     vrt_dataset.FlushCache()
     self._set_absolute_sources(required_datasets)
     return [FileRef(self._path_to_vrt_file, None, None, get_mime_type(self._path_to_vrt_file))]
Ejemplo n.º 14
0
def test_create_sym_link_single_file():
    file_ref = FileRef('./test/test_data/ASTGTM2_N36W006_dem.tif', None, None,
                       'image/tiff')
    expected_sym_link_name = os.path.join(_FOLDER, 'ASTGTM2_N36W006_dem.tif')
    try:
        assert not os.path.exists(_FOLDER)

        create_sym_link(file_ref, _FOLDER)

        new_list = os.listdir(_FOLDER)
        assert 1 == len(new_list)
        assert 'ASTGTM2_N36W006_dem.tif' in new_list
        assert os.path.islink(expected_sym_link_name)
    finally:
        if os.path.islink('./test/test_data/out/ASTGTM2_N36W006_dem.tif'):
            os.unlink('./test/test_data/out/ASTGTM2_N36W006_dem.tif')
        if os.path.exists(_FOLDER):
            shutil.rmtree(_FOLDER)
Ejemplo n.º 15
0
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     file_refs = []
     time = get_time_from_string(data_set_meta_info.start_time)
     file_url = '{}/{}/{}/{}.{:02d}.{:02d}/{}'.format(
         _BASE_URL, _PLATFORM, data_set_meta_info.data_type, time.year,
         time.month, time.day, data_set_meta_info.identifier)
     request = urllib2.Request(file_url)
     authorization = base64.encodebytes(str.encode('{}:{}'.format(self._username, self._password))). \
         replace(b'\n', b'').decode()
     request.add_header('Authorization', 'Basic {}'.format(authorization))
     remote_file = self._opener.open(request)
     temp_url = '{}/{}'.format(self._temp_dir,
                               data_set_meta_info.identifier)
     logging.info('Downloading {}'.format(data_set_meta_info.identifier))
     with open(temp_url, 'wb') as temp_file:
         total_size_in_bytes = int(remote_file.info()['Content-Length'])
         one_percent = total_size_in_bytes / 100
         downloaded_bytes = 0
         next_threshold = one_percent
         length = 1024
         buf = remote_file.read(length)
         while buf:
             temp_file.write(buf)
             buf = remote_file.read(length)
             downloaded_bytes += 1024
             if downloaded_bytes > next_threshold:
                 stdout.write('\r{} %'.format(
                     int(next_threshold / one_percent)))
                 stdout.flush()
                 next_threshold += one_percent
     logging.info('Downloaded {}'.format(data_set_meta_info.identifier))
     file_refs.append(
         FileRef(temp_url, data_set_meta_info.start_time,
                 data_set_meta_info.end_time, get_mime_type(temp_url)))
     return file_refs
Ejemplo n.º 16
0
def test_get_band_data():
    destination_srs = osr.SpatialReference()
    destination_srs.ImportFromWkt(EPSG_32232_WKT)
    bounds_srs = osr.SpatialReference()
    bounds_srs.SetWellKnownGeogCS('EPSG:4326')
    bounds = [7.8, 53.5, 8.8, 53.8]
    reprojection = Reprojection(bounds=bounds, x_res=50, y_res=100, destination_srs=destination_srs,
                                bounds_srs=bounds_srs, resampling_mode=None)
    file_ref = FileRef(url=S2_AWS_BASE_FILE, start_time='2017-09-10', end_time='2017-09-10',
                       mime_type='unknown mime type')
    s2_observations = S2Observations(file_ref, reprojection, emulator_folder=EMULATOR_FOLDER)
    s2_observation_data = s2_observations.get_band_data(3)
    assert (327, 1328) == s2_observation_data.observations.shape
    assert 4, len(s2_observation_data.metadata.keys())
    assert 'sza' in s2_observation_data.metadata.keys()
    assert 61.3750584241536, s2_observation_data.metadata['sza']
    assert 'saa' in s2_observation_data.metadata.keys()
    assert 160.875894634785, s2_observation_data.metadata['saa']
    assert 'vza' in s2_observation_data.metadata.keys()
    assert 2.776727292381147, s2_observation_data.metadata['vza']
    assert 'vaa' in s2_observation_data.metadata.keys()
    assert 177.40153095962427, s2_observation_data.metadata['vaa']
    assert (327, 1328) == s2_observation_data.mask.shape
    assert (434256, 434256) == s2_observation_data.uncertainty.shape
Ejemplo n.º 17
0
 def _get_file_ref(self,
                   data_set_meta_info: DataSetMetaInfo,
                   bands=None,
                   metafiles=None) -> Optional[FileRef]:
     """auxiliary method to delimit the number of downloaded files for testing"""
     if not self._is_valid_identifier(data_set_meta_info.identifier):
         # consider throwing an exception
         return None
     from sentinelhub import AwsTileRequest
     tile_name = self._get_tile_name(data_set_meta_info.identifier)
     start_time_as_datetime = get_time_from_string(
         data_set_meta_info.start_time)
     time = start_time_as_datetime.strftime('%Y-%m-%d')
     aws_index = self._get_aws_index(data_set_meta_info.identifier)
     request = AwsTileRequest(tile=tile_name,
                              time=time,
                              aws_index=aws_index,
                              bands=bands,
                              metafiles=metafiles,
                              data_folder=self._temp_dir)
     year = start_time_as_datetime.year
     month = start_time_as_datetime.month
     day = start_time_as_datetime.day
     logging.info('Downloading S2 Data from {}-{}-{}'.format(
         month, day, year))
     request.save_data()
     saved_dir = '{}/{},{}-{:02d}-{:02d},{}/'.format(
         self._temp_dir, tile_name, year, month, day, aws_index)
     new_dir = '{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/'.format(
         self._temp_dir, tile_name[0:2], tile_name[2:3], tile_name[3:5],
         year, month, day, aws_index)
     copy_tree(saved_dir, new_dir)
     logging.info('Downloaded S2 Data from {}-{}-{}'.format(
         month, day, year))
     return FileRef(new_dir, data_set_meta_info.start_time,
                    data_set_meta_info.end_time, get_mime_type(new_dir))
Ejemplo n.º 18
0
def test_group_file_refs_by_date():
    file_refs = [
        FileRef('1', '1999-01-01', '1999-01-01', 'image/tiff'),
        FileRef('2', '1999-01-01', '1999-01-01', 'image/tiff'),
        FileRef('3', '1999-01-01', '1999-01-01', 'image/tiff'),
        FileRef('4', '1999-01-02', '1999-01-02', 'image/tiff'),
        FileRef('5', '1999-01-03', '1999-01-03', 'image/tiff'),
        FileRef('6', '1999-01-03', '1999-01-03', 'image/tiff'),
    ]
    file_refs_by_date = _group_file_refs_by_date(file_refs)

    assert file_refs_by_date is not None
    assert 3 == len(file_refs_by_date)
    assert '1999-01-01' in file_refs_by_date
    assert '1999-01-02' in file_refs_by_date
    assert '1999-01-03' in file_refs_by_date
    assert 3 == len(file_refs_by_date['1999-01-01'])
    assert 1 == len(file_refs_by_date['1999-01-02'])
    assert 2 == len(file_refs_by_date['1999-01-03'])
Ejemplo n.º 19
0
def test_create_observations():
    class DummyObservations(ProductObservations):
        def get_band_data_by_name(
                self,
                band_name: str,
                retrieve_uncertainty: bool = True) -> ObservationData:
            return ObservationData(observations=np.array([0.5]),
                                   uncertainty=sp.lil_matrix((1, 1)),
                                   mask=np.array([0]),
                                   metadata={},
                                   emulator=None)

        def get_band_data(
                self,
                band_index: int,
                retrieve_uncertainty: bool = True) -> ObservationData:
            return ObservationData(observations=np.array([0.5]),
                                   uncertainty=sp.lil_matrix((1, 1)),
                                   mask=np.array([0]),
                                   metadata={},
                                   emulator=None)

        @property
        def bands_per_observation(self):
            return 15

        @property
        def data_type(self):
            return 'dummy_type'

        def set_no_data_value(self, band: Union[str, int],
                              no_data_value: float):
            pass

    class DummyObservationsCreator(ProductObservationsCreator):
        DUMMY_PATTERN = 'dfghztm_[0-9]{4}_dvfgbh'
        DUMMY_PATTERN_MATCHER = re.compile('dfghztm_[0-9]{4}_dvfgbh')

        @classmethod
        def can_read(cls, file_ref: FileRef) -> bool:
            if os.path.exists(file_ref.url):
                file = open(file_ref.url, 'r')
                return cls.DUMMY_PATTERN_MATCHER.search(file.name) is not None

        @classmethod
        def create_observations(
                cls, file_ref: FileRef, reprojection: Optional[Reprojection],
                emulator_folder: Optional[str]) -> ProductObservations:
            if cls.can_read(file_ref):
                return DummyObservations()

    observations_factory = ObservationsFactory()
    observations_factory.add_observations_creator_to_registry(
        DummyObservationsCreator())

    start_time = '2017-06-04'
    file_refs = [
        FileRef(url=DUMMY_FILE,
                start_time=start_time,
                end_time='2017-06-07',
                mime_type='unknown mime type'),
        FileRef(url='tzzg',
                start_time='2017-06-07',
                end_time='2017-06-10',
                mime_type='unknown mime type')
    ]
    observations_wrapper = observations_factory.create_observations(
        file_refs, None, '')

    assert 1, observations_wrapper.get_num_observations()
    assert 15, observations_wrapper.bands_per_observation(0)
    start_time = get_time_from_string(start_time)
    data = observations_wrapper.get_band_data(start_time, 0)
    assert 1, len(data.observations)
    assert 0.5, data.observations[0]
    other_data = observations_wrapper.get_band_data_by_name(start_time, 'name')
    assert 1, len(other_data.observations)
    assert 0.5, other_data.observations[0]
    assert 'dummy_type' == observations_wrapper.get_data_type(start_time)