def test_vrt_file_system_get():
    parameters = {
        'path_to_vrt_file': PATH_TO_NON_EXISTENT_VRT_FILE,
        'encapsulated_data_type': DataTypeConstants.ASTER,
        'accessed_file_system': 'LocalFileSystem',
        'path': './test/test_data/',
        'pattern': '/dt/'
    }
    file_system = VrtFileSystemAccessor.create_from_parameters(parameters)

    try:
        data_set_meta_info = DataSetMetaInfo('of no concern here', None, None,
                                             DataTypeConstants.ASTER,
                                             PATH_TO_NON_EXISTENT_VRT_FILE,
                                             'ASTGTM2_N36W005_dem.tif')
        file_refs = file_system.get(data_set_meta_info)
        assert 1 == len(file_refs)
        assert PATH_TO_NON_EXISTENT_VRT_FILE == file_refs[0].url
        assert file_refs[0].start_time is None
        assert file_refs[0].end_time is None
        assert 'x-world/x-vrt' == file_refs[0].mime_type

        assert os.path.exists(PATH_TO_NON_EXISTENT_VRT_FILE)
    finally:
        if os.path.exists(PATH_TO_NON_EXISTENT_VRT_FILE):
            os.remove(PATH_TO_NON_EXISTENT_VRT_FILE)
 def _get_data_set_meta_infos_for_tile_description(
         self, tile_description: TileDescription, start_time: datetime,
         end_time: datetime) -> List[DataSetMetaInfo]:
     data_set_meta_infos = []
     current_time = start_time
     while current_time < end_time:
         aws_index = 0
         while aws_index >= 0:
             id = _ID_PATTERN.format(tile_description.tile_id[0:2],
                                     tile_description.tile_id[2:3],
                                     tile_description.tile_id[3:5],
                                     current_time.year, current_time.month,
                                     current_time.day, aws_index)
             tile_info_url = _AWS_BASE_TILE_INFO_URL.format(id)
             request = requests.get(tile_info_url)
             if request.status_code == 200:
                 time = json.loads(request.text)['timestamp'][:-5]
                 data_set_meta_infos.append(
                     DataSetMetaInfo(tile_description.coverage, time, time,
                                     DataTypeConstants.AWS_S2_L1C, id))
                 aws_index += 1
             else:
                 aws_index = -1
                 current_time += timedelta(days=1)
     return data_set_meta_infos
Exemple #3
0
 def _create_data_set_meta_info(self, path: str, manifest_file):
     manifest = XML(manifest_file)
     coverage = self._extract_coverage(manifest)
     start_time = self._extract_start_time(manifest)
     end_time = self._extract_stop_time(manifest)
     id = path.split('/')[-1]
     return DataSetMetaInfo(identifier=id,
                            coverage=coverage,
                            start_time=start_time,
                            end_time=end_time,
                            data_type=DataTypeConstants.S1_SLC)
Exemple #4
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     h = int(path[-27:-25])
     v = int(path[-24:-22])
     tile_coverage = get_tile_coverage(h, v).wkt
     year = int(path[-36:-32])
     doy = int(path[-32:-29])
     start_time = get_time_from_year_and_day_of_year(year, doy)
     end_time = self._get_end_time(year, doy)
     return DataSetMetaInfo(tile_coverage,
                            start_time.strftime('%Y-%m-%d %H:%M:%S'),
                            end_time.strftime('%Y-%m-%d %H:%M:%S'),
                            self.name(), path[path.find('MCD'):])
 def query_local(self, query_string: str) -> List[DataSetMetaInfo]:
     if self._provided_data_type not in self.get_data_types_from_query_string(query_string):
         return []
     roi = self.get_roi_from_query_string(query_string)
     coverages, referenced_data = self._get_coverages_from_local_meta_info_provider()
     coverage = cascaded_union(coverages)
     if not roi.within(coverage):
         return []
     referenced_data = ';'.join(referenced_data)
     data_set_meta_info = DataSetMetaInfo(coverage.wkt, None, None, self._provided_data_type,
                                          self._path_to_vrt_file, referenced_data)
     return [data_set_meta_info]
Exemple #6
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     path_lat_id = path[-14:-12]
     path_lat = float(path[-14:-12])
     if path_lat_id == 'S':
         path_lat *= -1
     path_lon_id = path[-12:-11]
     path_lon = float(path[-11:-8])
     if path_lon_id == 'W':
         path_lon *= -1
     coverage = Polygon([[path_lon, path_lat], [path_lon, path_lat + 1],
                         [path_lon + 1, path_lat + 1],
                         [path_lon + 1, path_lat]])
     return DataSetMetaInfo(coverage.wkt, None, None,
                            DataTypeConstants.ASTER, path)
def test_notify_copied_to_local():
    parameters = {'path': TEMP_DIR, 'pattern': '', 'url': EMUS_TEST_URL, 'temp_dir': TEMP_DIR}
    file_system = HttpFileSystemAccessor.create_from_parameters(parameters)

    path_to_file = './test/test_data/some_file'
    try:
        open(path_to_file, 'w+')
        data_set_meta_info = DataSetMetaInfo('ctfvgb', '2017-09-04', '2017-09-04', 'some_format',
                                             'some_file')
        file_system._notify_copied_to_local(data_set_meta_info)

        assert not os.path.exists(path_to_file)
    finally:
        if os.path.exists(path_to_file):
            os.remove(path_to_file)
Exemple #8
0
 def _query_wrapped_meta_info_provider(self, query_string: str,
                                       local_data_set_meta_infos: List[DataSetMetaInfo]) -> List[DataSetMetaInfo]:
     roi = dumps(self.get_roi_from_query_string(query_string))
     data_types = self.get_data_types_from_query_string(query_string)
     start_time = datetime.strftime(self.get_start_time_from_query_string(query_string), "%Y-%m-%dT%H:%M:%SZ")
     end_time = datetime.strftime(self.get_end_time_from_query_string(query_string), "%Y-%m-%dT%H:%M:%SZ")
     data_set_meta_infos = []
     for data_type in data_types:
         if self.provides_data_type(data_type):
             run = 0
             continue_checking_for_data_sets = True
             while continue_checking_for_data_sets:
                 scihub_query = self._create_scihub_query(roi, data_type, start_time, end_time, run)
                 run += 1
                 response = requests.get(scihub_query, auth=(self._username, self._password))
                 response_xml = XML(response.content)
                 continue_checking_for_data_sets = False
                 for child in response_xml:
                     if child.tag == '{http://www.w3.org/2005/Atom}entry':
                         data_set_meta_info_id = ""
                         data_set_meta_info_start_time = ""
                         data_set_meta_info_end_time = ""
                         data_set_meta_info_coverage = ""
                         data_set_meta_info_reference = ""
                         for child2 in child:
                             if child2.tag == '{http://www.w3.org/2005/Atom}id':
                                 data_set_meta_info_reference = child2.text
                             elif child2.tag == '{http://www.w3.org/2005/Atom}title':
                                 data_set_meta_info_id = child2.text
                             elif child2.tag == '{http://www.w3.org/2005/Atom}date' and 'name' in child2.attrib \
                                     and child2.attrib['name'] == 'beginposition':
                                 data_set_meta_info_start_time = child2.text
                             elif child2.tag == '{http://www.w3.org/2005/Atom}date' and 'name' in child2.attrib \
                                     and child2.attrib['name'] == 'endposition':
                                 data_set_meta_info_end_time = child2.text
                             elif child2.tag == '{http://www.w3.org/2005/Atom}str' and 'name' in child2.attrib \
                                     and child2.attrib['name'] == 'footprint':
                                 data_set_meta_info_coverage = child2.text
                         data_set_meta_info = \
                             DataSetMetaInfo(data_set_meta_info_coverage, data_set_meta_info_start_time,
                                             data_set_meta_info_end_time, data_type, data_set_meta_info_id,
                                             data_set_meta_info_reference)
                         if not self._is_provided_locally(data_set_meta_info, local_data_set_meta_infos):
                             data_set_meta_infos.append(data_set_meta_info)
                         continue_checking_for_data_sets = True
                 response.close()
     return data_set_meta_infos
Exemple #9
0
 def _query_wrapped_meta_info_provider(self, query_string: str, local_data_set_meta_infos: List[DataSetMetaInfo]) -> \
         List[DataSetMetaInfo]:
     roi = dumps(self.get_roi_from_query_string(query_string))
     data_types = self.get_data_types_from_query_string(query_string)
     start_time = datetime.strftime(
         self.get_start_time_from_query_string(query_string),
         "%Y-%m-%dT%H:%M:%SZ")
     end_time = datetime.strftime(
         self.get_end_time_from_query_string(query_string),
         "%Y-%m-%dT%H:%M:%SZ")
     data_set_meta_infos = []
     for data_type in data_types:
         if self.provides_data_type(data_type):
             run = 0
             continue_checking_for_data_sets = True
             while continue_checking_for_data_sets:
                 mundi_query = _create_mundi_query(roi, data_type,
                                                   start_time, end_time,
                                                   run)
                 run += 1
                 response = requests.get(mundi_query)
                 response_xml = XML(response.content)
                 continue_checking_for_data_sets = False
                 for child in response_xml:
                     if child.tag == '{http://www.w3.org/2005/Atom}entry':
                         data_set_meta_info_id = ""
                         data_set_meta_info_time = ""
                         data_set_meta_info_coverage = ""
                         for child2 in child:
                             if child2.tag == '{http://www.w3.org/2005/Atom}id':
                                 data_set_meta_info_id = child2.text
                             elif child2.tag == '{http://www.georss.org/georss}polygon':
                                 data_set_meta_info_coverage = _convert_mundi_coverage(
                                     child2.text)
                             elif child2.tag == '{http://tas/DIAS}sensingStartDate':
                                 data_set_meta_info_time = child2.text
                         data_set_meta_info = DataSetMetaInfo(
                             data_set_meta_info_coverage,
                             data_set_meta_info_time,
                             data_set_meta_info_time, data_type,
                             data_set_meta_info_id)
                         if not self._is_provided_locally(
                                 data_set_meta_info,
                                 local_data_set_meta_infos):
                             data_set_meta_infos.append(data_set_meta_info)
                         continue_checking_for_data_sets = True
     return data_set_meta_infos
 def get(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     if data_set_meta_info.referenced_data is None:
         return []
     required_datasets = []
     referenced_data_sets = data_set_meta_info.referenced_data.split(';')
     for data_set in referenced_data_sets:
         # coverage is wrong here. We leave it as it makes no difference.
         file_refs = self._file_system.get(DataSetMetaInfo(data_set_meta_info.coverage, None, None,
                                                           self._encapsulated_data_type, data_set))
         for file_ref in file_refs:
             if file_ref.url not in required_datasets:
                 required_datasets.append(file_ref.url.replace('//', '/'))
     vrt_dataset = gdal.BuildVRT(self._path_to_vrt_file, required_datasets)
     vrt_dataset.SetMetadataItem('COVERAGE', data_set_meta_info.coverage)
     vrt_dataset.FlushCache()
     self._set_absolute_sources(required_datasets)
     return [FileRef(self._path_to_vrt_file, None, None, get_mime_type(self._path_to_vrt_file))]
 def query(self, query_string: str) -> List[DataSetMetaInfo]:
     if self._provided_data_type not in self.get_data_types_from_query_string(query_string):
         return []
     roi = self.get_roi_from_query_string(query_string)
     coverages, referenced_data = self._get_coverages_from_local_meta_info_provider()
     coverage = cascaded_union(coverages)
     if not roi.within(coverage):
         additional_coverages, additional_files = self._get_coverages_from_wrapped_meta_info_provider(query_string)
         for i in range(len(additional_files)):
             if additional_files[i] not in referenced_data:
                 referenced_data.append(additional_files[i])
                 coverages.append(additional_coverages[i])
         coverage = cascaded_union(coverages)
     referenced_data = ';'.join(referenced_data)
     data_set_meta_info = DataSetMetaInfo(coverage.wkt, None, None, self._provided_data_type,
                                          self._path_to_vrt_file, referenced_data)
     return [data_set_meta_info]
def test_aws_s2_file_system_get_file_ref():
    try:
        parameters = {'temp_dir': OUTPUT_DIR, 'path': './test/test_data/aws_s2_data/', 'pattern': ''}
        aws_s2_file_system = AwsS2FileSystemAccessor.create_from_parameters(parameters)
        data_set_meta_info = DataSetMetaInfo('doesnt matter here', '2016-04-01', '2016-04-01',
                                             DataTypeConstants.AWS_S2_L1C,
                                             '30/S/WJ/2016/4/1/0')
        metafiles = ['metadata', 'tileInfo']
        file_ref = aws_s2_file_system._get_file_ref(data_set_meta_info, bands=[], metafiles=metafiles)
        assert '{}/30SWJ,2016-04-01,0/30/S/WJ/2016/4/1/0/'.format(OUTPUT_DIR) == file_ref.url
        assert '2016-04-01' == file_ref.start_time
        assert '2016-04-01' == file_ref.end_time
        assert 'application/x-directory' == file_ref.mime_type
    finally:
        path = '{}/30SWJ,2016-04-01,0/'.format(OUTPUT_DIR)
        if os.path.exists(path):
            shutil.rmtree(path)
def test_notify_copied_to_local():
    dir_to_be_deleted = '{}/24CBS,2017-10-16,1/'.format(OUTPUT_DIR)
    other_dir_to_be_deleted = '{}/24/C/BS/2017/10/16/1/'.format(OUTPUT_DIR)
    try:
        parameters = {'temp_dir': OUTPUT_DIR, 'path': './test/test_data/aws_s2_data/', 'pattern': ''}
        aws_s2_file_system = AwsS2FileSystemAccessor.create_from_parameters(parameters)
        if not os.path.exists(dir_to_be_deleted):
            os.mkdir(dir_to_be_deleted)
        if not os.path.exists(other_dir_to_be_deleted):
            os.makedirs(other_dir_to_be_deleted)
        data_set_meta_info = DataSetMetaInfo('something', '2017-10-16', '2017-10-16', 'AWS_S2_L1C',
                                             '24/C/BS/2017/10/16/1')
        aws_s2_file_system._notify_copied_to_local(data_set_meta_info)
        assert not os.path.exists(dir_to_be_deleted)
        assert not os.path.exists(other_dir_to_be_deleted)
    finally:
        if os.path.exists(dir_to_be_deleted):
            shutil.rmtree(dir_to_be_deleted)
Exemple #14
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     id = path.split('/')[-1]
     dataset = xarray.open_dataset(path)
     if 'lat' in dataset.coords and 'lon' in dataset.coords:
         lat_min = dataset.lat.min().values.item(0)
         lat_max = dataset.lat.max().values.item(0)
         lon_min = dataset.lon.min().values.item(0)
         lon_max = dataset.lon.max().values.item(0)
     coverage = f'POLYGON(({lon_min} {lat_max}, {lon_max} {lat_max}, {lon_max} {lat_min}, ' \
                f'{lon_min} {lat_min}, {lon_min} {lat_max}))'
     dataset.close()
     start_time = get_time_from_string(
         id[17:32]).strftime('%Y-%m-%d %H:%M:%S')
     end_time = get_time_from_string(
         id[33:48]).strftime('%Y-%m-%d %H:%M:%S')
     return DataSetMetaInfo(identifier=id,
                            coverage=coverage,
                            start_time=start_time,
                            end_time=end_time,
                            data_type=DataTypeConstants.S1_SPECKLED)
Exemple #15
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     return DataSetMetaInfo(GLOBAL, None, None,
                            DataTypeConstants.WV_EMULATOR, path)
Exemple #16
0
 def _query_wrapped_meta_info_provider(self, query_string: str, local_data_set_meta_infos: List[DataSetMetaInfo]) \
         -> List[DataSetMetaInfo]:
     requested_data_types = []
     query_data_types = self.get_data_types_from_query_string(query_string)
     for supported_data_type in self._supported_data_types:
         if supported_data_type in query_data_types:
             requested_data_types.append(supported_data_type)
     if len(requested_data_types) == 0:
         return []
     roi = self.get_roi_from_query_string(query_string)
     tile_coverages = []
     for v in range(18):
         for h in range(36):
             tile_coverage = get_tile_coverage(h, v)
             if tile_coverage is not None and tile_coverage.intersects(roi):
                 tile_coverages.append((h, v, tile_coverage.wkt))
     start_time = self.get_start_time_from_query_string(query_string)
     if start_time is None:
         start_time = get_time_from_string(FIRST_DAY)
     end_time = self.get_end_time_from_query_string(query_string)
     if end_time is None:
         end_time = datetime.datetime.now()
     data_set_meta_infos = []
     try:
         for requested_data_type in requested_data_types:
             start_doy = start_time.timetuple().tm_yday
             current_time = start_time - datetime.timedelta(
                 days=(start_doy - _DATA_OFFSETS[requested_data_type]) %
                 _DATA_INTERVALS[requested_data_type])
             while current_time < end_time:
                 current_time_str = current_time.strftime(
                     '%Y-%m-%d %H:%M:%S')
                 current_tile_coverages = []
                 for h, v, tile_coverage in tile_coverages:
                     add_to_current = True
                     for local_data_set_meta_info in local_data_set_meta_infos:
                         if local_data_set_meta_info.coverage == tile_coverage and \
                                 local_data_set_meta_info.start_time == current_time_str:
                             add_to_current = False
                             break
                     if add_to_current:
                         current_tile_coverages.append(
                             (h, v, tile_coverage))
                 next_time = current_time + datetime.timedelta(
                     days=_DATA_INTERVALS[requested_data_type])
                 next_time -= datetime.timedelta(seconds=1)
                 if len(current_tile_coverages) > 0:
                     date_dir_url = '{}/{}/{}/{}.{:02d}.{:02d}/'.format(
                         _BASE_URL, _PLATFORM, requested_data_type,
                         current_time.year, current_time.month,
                         current_time.day)
                     date_page = urllib2.urlopen(
                         date_dir_url).read().decode('utf-8')
                     for h, v, tile_coverage in current_tile_coverages:
                         file_regex = '.hdf">{}.A{}{:03d}.h{:02d}v{:02d}.006.*.hdf'. \
                             format(requested_data_type.split('.')[0], current_time.year,
                                    current_time.timetuple().tm_yday, h, v)
                         available_files = re.findall(file_regex, date_page)
                         for file in available_files:
                             current_time_str = current_time.strftime(
                                 '%Y-%m-%d %H:%M:%S')
                             logging.info('Found {} data set for {}'.format(
                                 requested_data_type, current_time_str))
                             data_set_meta_infos.append(
                                 DataSetMetaInfo(
                                     tile_coverage, current_time_str,
                                     next_time.strftime(
                                         '%Y-%m-%d %H:%M:%S'),
                                     requested_data_type, file[6:]))
                 current_time = next_time + datetime.timedelta(seconds=1)
     except URLError as e:
         logging.warning(
             'Could not access NASA Land Processes Distributed Active Archive Center: {}'
             .format(e.reason))
     return data_set_meta_infos
Exemple #17
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     relative_path = get_relative_path(path, DataTypeConstants.CAMS_TIFF)
     return DataSetMetaInfo(GLOBAL, relative_path.replace('_', '-'),
                            relative_path.replace('_', '-'),
                            DataTypeConstants.CAMS_TIFF, relative_path)
Exemple #18
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     return DataSetMetaInfo(GLOBAL, path[-13:-3], path[-13:-3],
                            DataTypeConstants.CAMS, path)
Exemple #19
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     coverage = self._extract_coverage(path)
     start_time = self._extract_start_time(path)
     end_time = self._extract_end_time(path)
     return DataSetMetaInfo(coverage, start_time, end_time, self.name(),
                            path)
Exemple #20
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     coverage = self._extract_coverage(path)
     time = self._extract_time_from_metadata_file(path)
     return DataSetMetaInfo(coverage, time, time, self.name(), path)