def test_set_self_href_does_not_break_asset_hrefs(self) -> None: cat = TestCases.test_case_2() for item in cat.get_all_items(): for asset in item.assets.values(): if is_absolute_href(asset.href): asset.href = f"./{os.path.basename(asset.href)}" item.set_self_href("http://example.com/item.json") for asset in item.assets.values(): self.assertTrue(is_absolute_href(asset.href))
def test_set_self_href_none_ignores_relative_asset_hrefs(self) -> None: cat = TestCases.test_case_2() for item in cat.get_all_items(): for asset in item.assets.values(): if is_absolute_href(asset.href): asset.href = f"./{os.path.basename(asset.href)}" item.set_self_href(None) for asset in item.assets.values(): self.assertFalse(is_absolute_href(asset.href))
def check_all_absolute(cat): for root, catalogs, items in cat.walk(): for l in root.links: self.assertTrue(l.link_type == LinkType.ABSOLUTE) self.assertTrue(is_absolute_href(l.get_href())) for i in items: for l in i.links: self.assertTrue(l.link_type == LinkType.ABSOLUTE) self.assertTrue(is_absolute_href(l.get_href()))
def test_set_self_href_doesnt_break_asset_hrefs(self): cat = TestCases.test_case_6() for item in cat.get_all_items(): for asset in item.assets.values(): print(asset.href) assert not is_absolute_href(asset.href) item.set_self_href('http://example.com/item.json') for asset in item.assets.values(): self.assertTrue(is_absolute_href(asset.href)) self.assertTrue(os.path.exists(asset.href))
def check_all_relative(cat): for root, catalogs, items in cat.walk(): for l in root.links: if l.rel != 'self': self.assertTrue(l.link_type == LinkType.RELATIVE) self.assertFalse(is_absolute_href(l.get_href())) for i in items: for l in i.links: if l.rel != 'self': self.assertTrue(l.link_type == LinkType.RELATIVE) self.assertFalse(is_absolute_href(l.get_href()))
def test_make_all_asset_hrefs_relative(self): cat = TestCases.test_case_2() item = cat.get_item('cf73ec1a-d790-4b59-b077-e101738571ed', recursive=True) asset = item.assets['cf73ec1a-d790-4b59-b077-e101738571ed'] original_href = asset.href cat.make_all_asset_hrefs_absolute() assert is_absolute_href(asset.href) cat.make_all_asset_hrefs_relative() self.assertFalse(is_absolute_href(asset.href)) self.assertEqual(asset.href, original_href)
def from_file(cls, href): """Reads a STACObject implementation from a file. Args: href (str): The HREF to read the object from. Returns: The specific STACObject implementation class that is represented by the JSON read from the file located at HREF. """ if not is_absolute_href(href): href = make_absolute_href(href) d = STAC_IO.read_json(href) if cls == STACObject: o = STAC_IO.stac_object_from_dict(d, href=href) else: o = cls.from_dict(d, href=href) # Set the self HREF, if it's not already set to something else. if o.get_self_href() is None: o.set_self_href(href) # If this is a root catalog, set the root to the catalog instance. root_link = o.get_root_link() if root_link is not None: if not root_link.is_resolved(): if root_link.get_absolute_href() == href: o.set_root(o, link_type=root_link.link_type) return o
def test_make_all_asset_hrefs_absolute(self): cat = TestCases.test_case_2() cat.make_all_asset_hrefs_absolute() item = cat.get_item('cf73ec1a-d790-4b59-b077-e101738571ed', recursive=True) href = item.assets['cf73ec1a-d790-4b59-b077-e101738571ed'].href self.assertTrue(is_absolute_href(href))
def set_self_href(self, href): """Sets the absolute HREF that is represented by the ``rel == 'self'`` :class:`~pystac.Link`. Changing the self HREF of the item will ensure that all asset HREFs remain valid. If asset HREFs are relative, the HREFs will change to point to the same location based on the new item self HREF, either by making them relative to the new location or making them absolute links if the new location does not share the same protocol as the old location. Args: href (str): The absolute HREF of this object. If the given HREF is not absolute, it will be transformed to an absolute HREF based on the current working directory. If this is None the call will clear the self HREF link. """ prev_href = self.get_self_href() super().set_self_href(href) new_href = self.get_self_href() # May have been made absolute. if prev_href is not None: # Make sure relative asset links remain valid. for asset in self.assets.values(): asset_href = asset.href if not is_absolute_href(asset_href): abs_href = make_absolute_href(asset_href, prev_href) new_relative_href = make_relative_href(abs_href, new_href) asset.href = new_relative_href
def from_dict(d): """Deserializes a Link from a dict. Args: d (dict): The dict that represents the Link in JSON Returns: Link: Link instance constructed from the dict. """ d = copy(d) rel = d.pop('rel') href = d.pop('href') media_type = d.pop('type', None) title = d.pop('title', None) properties = None if any(d): properties = d if rel == 'self' or is_absolute_href(href): link_type = LinkType.ABSOLUTE else: link_type = LinkType.RELATIVE return Link(rel=rel, target=href, media_type=media_type, title=title, properties=properties, link_type=link_type)
def determine_type(stac_json): """Determines the catalog type based on a STAC JSON dict. Only applies to Catalogs or Collections Args: stac_json (dict): The STAC JSON dict to determine the catalog type Returns: str or None: The catalog type of the catalog or collection. Will return None if it cannot be determined. """ self_link = None relative = False for link in stac_json['links']: if link['rel'] == 'self': self_link = link else: relative |= not is_absolute_href(link['href']) if self_link: if relative: return CatalogType.RELATIVE_PUBLISHED else: return CatalogType.ABSOLUTE_PUBLISHED else: if relative: return CatalogType.SELF_CONTAINED else: return None
def determine_type(cls, stac_json: Dict[str, Any]) -> Optional["CatalogType"]: """Determines the catalog type based on a STAC JSON dict. Only applies to Catalogs or Collections Args: stac_json : The STAC JSON dict to determine the catalog type Returns: Optional[CatalogType]: The catalog type of the catalog or collection. Will return None if it cannot be determined. """ self_link = None relative = False for link in stac_json["links"]: if link["rel"] == pystac.RelType.SELF: self_link = link else: relative |= not is_absolute_href(link["href"]) if self_link: if relative: return cls.RELATIVE_PUBLISHED else: return cls.ABSOLUTE_PUBLISHED else: if relative: return cls.SELF_CONTAINED else: return None
def test_copy_to_relative(self): cat = TestCases.planet_disaster() with TemporaryDirectory() as tmp_dir: cat.make_all_asset_hrefs_absolute() cat.normalize_hrefs(tmp_dir) cat.save(catalog_type=pystac.CatalogType.ABSOLUTE_PUBLISHED) cat2_dir = os.path.join(tmp_dir, 'second') command = [ 'copy', '-t', 'SELF_CONTAINED', '-a', cat.get_self_href(), cat2_dir ] self.run_command(command) cat2 = pystac.read_file(os.path.join(cat2_dir, 'collection.json')) for item in cat2.get_all_items(): item_href = item.get_self_href() for asset in item.assets.values(): href = asset.href self.assertFalse(is_absolute_href(href)) common_path = os.path.commonpath([ os.path.dirname(item_href), make_absolute_href(href, item_href) ]) self.assertTrue(common_path, os.path.dirname(item_href))
def normalize_hrefs(self, root_href): if not is_absolute_href(root_href): root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True) old_self_href = self.get_self_href() new_self_href = os.path.join(root_href, '{}.json'.format(self.id)) self.set_self_href(new_self_href) # Make sure relative asset links remain valid. # This will only work if there is a self href set. for asset in self.assets.values(): asset_href = asset.href if not is_absolute_href(asset_href): if old_self_href is not None: abs_href = make_absolute_href(asset_href, old_self_href) new_relative_href = make_relative_href(abs_href, new_self_href) asset.href = new_relative_href
def test_is_absolute_href(self): # Test cases of (href, expected) test_cases = [('item.json', False), ('./item.json', False), ('../item.json', False), ('/item.json', True), ('http://stacspec.org/item.json', True)] for href, expected in test_cases: actual = is_absolute_href(href) self.assertEqual(actual, expected)
def validate_item_link_type( href: str, link_type: str, should_include_self: bool ) -> None: item_dict = pystac.StacIO.default().read_json(href) item = pystac.Item.from_file(href) rel_links = [ *HIERARCHICAL_LINKS, *pystac.EXTENSION_HOOKS.get_extended_object_links(item), ] for link in item.get_links(): if not link.rel == "self": if link_type == "RELATIVE" and link.rel in rel_links: self.assertFalse(is_absolute_href(link.href)) else: self.assertTrue(is_absolute_href(link.href)) rels = set([link["rel"] for link in item_dict["links"]]) self.assertEqual("self" in rels, should_include_self)
def test_make_asset_href_relative_is_noop_on_relative_hrefs(self) -> None: cat = TestCases.test_case_2() item = next(iter(cat.get_all_items())) asset = list(item.assets.values())[0] assert not is_absolute_href(asset.href) original_href = asset.get_absolute_href() item.make_asset_hrefs_relative() self.assertEqual(asset.get_absolute_href(), original_href)
def merge_items(source_item: pystac.Item, target_item: pystac.Item, move_assets: bool = False, ignore_conflicts: bool = False) -> None: """Merges the assets from source_item into target_item. The geometry and bounding box of the items will also be merged. Args: source_item (pystac.Item): The Item that will be merged into target_item. This item is not mutated in this operation. target_item (pystac.Item): The target item that will be merged into. This item will be mutated in this operation. move_assets (bool): If true, move the asset files alongside the target item. ignore_conflicts (bool): If True, assets with the same keys will not be merged, and asset files that would be moved to overwrite an existing file will not be moved. If False, either of these situations will throw an error. """ target_item_href = target_item.get_self_href() if target_item_href is None: raise ValueError( f"Target Item {target_item.id} must have an HREF for merge") for key, asset in source_item.assets.items(): if key in target_item.assets: if ignore_conflicts: continue else: raise Exception( 'Target item {} already has asset with key {}, ' 'cannot merge asset in from {}'.format( target_item, key, source_item)) else: asset_href = asset.get_absolute_href() if asset_href is None: raise ValueError( f"Asset {asset.title} must have an HREF for merge") if move_assets: new_asset_href = move_asset_file_to_item( target_item, asset_href, ignore_conflicts=ignore_conflicts) else: if not is_absolute_href(asset.href): asset_href = make_relative_href(asset_href, target_item_href) new_asset_href = asset_href new_asset = asset.clone() new_asset.href = new_asset_href target_item.add_asset(key, new_asset) source_geom = shape(source_item.geometry) target_geom = shape(target_item.geometry) union_geom = source_geom.union(target_geom).buffer(0) target_item.geometry = mapping(union_geom) target_item.bbox = list(union_geom.bounds)
def test_create_item(self): granule_hrefs = [ TestData.get_path(f'data-files/sentinel2/{x}') for x in [ 'S2A_MSIL2A_20190212T192651_N0212_R013_T07HFE_20201007T160857.SAFE', 'S2B_MSIL2A_20191228T210519_N0212_R071_T01CCV_20201003T104658.SAFE', 'esa_S2B_MSIL2A_20210122T133229_N0214_R081_T22HBD_20210122T155500.SAFE' ] ] def check_proj_bbox(item): pb = mapping( box(*item.ext.projection.get_bbox(item.assets['visual-10m']))) proj_geom = shape( reproject_geom(f'epsg:{item.ext.projection.epsg}', 'epsg:4326', pb)) item_geom = shape(item.geometry) difference_area = item_geom.difference(proj_geom).area raster_area = proj_geom.area # We expect the footprint to be in the raster # bounds, so any difference should be relatively very low # and due to reprojection. self.assertLess(difference_area / raster_area, 0.005) for granule_href in granule_hrefs: with self.subTest(granule_href): with TemporaryDirectory() as tmp_dir: cmd = ['sentinel2', 'create-item', granule_href, tmp_dir] self.run_command(cmd) jsons = [ p for p in os.listdir(tmp_dir) if p.endswith('.json') ] self.assertEqual(len(jsons), 1) fname = jsons[0] item = pystac.read_file(os.path.join(tmp_dir, fname)) item.validate() bands_seen = set() for asset in item.assets.values(): self.assertTrue(is_absolute_href(asset.href)) bands = item.ext.eo.get_bands(asset) if bands is not None: bands_seen |= set(b.name for b in bands) self.assertEqual(bands_seen, set(SENTINEL_BANDS.keys())) check_proj_bbox(item)
def test_is_absolute_href_windows(self): utils._pathlib = ntpath try: # Test cases of (href, expected) test_cases = [('item.json', False), ('.\\item.json', False), ('..\\item.json', False), ('c:\\item.json', True), ('http://stacspec.org/item.json', True)] for href, expected in test_cases: actual = is_absolute_href(href) self.assertEqual(actual, expected) finally: utils._pathlib = os.path
def test_is_absolute_href(self) -> None: # Test cases of (href, expected) test_cases = [ ("item.json", False), ("./item.json", False), ("../item.json", False), ("/item.json", True), ("http://stacspec.org/item.json", True), ] for href, expected in test_cases: actual = is_absolute_href(href) self.assertEqual(actual, expected)
def get_absolute_href(self): """Gets the absolute href for this asset, if possible. If this Asset has no associated Item, this will return whatever the href is (as it cannot determine the absolute path, if the asset href is relative). Returns: str: The absolute HREF of this asset, or a relative HREF is an abslolute HREF cannot be determined. """ if not is_absolute_href(self.href): if self.owner is not None: return make_absolute_href(self.href, self.owner.get_self_href()) return self.href
def test_create_item(self): def check_proj_bbox(item): bbox = item.bbox bbox_shp = box(*bbox) proj_bbox = item.ext.projection.bbox proj_bbox_shp = box(*proj_bbox) reproj_bbox_shp = shape( reproject_geom(f"epsg:{item.ext.projection.epsg}", "epsg:4326", mapping(proj_bbox_shp))) self.assertLess((reproj_bbox_shp - bbox_shp).area, 0.0001 * reproj_bbox_shp.area) for mtl_path in TEST_MTL_PATHS: with self.subTest(mtl_path): with TemporaryDirectory() as tmp_dir: cmd = [ 'landsat', 'create-item', '--mtl', mtl_path, '--output', tmp_dir ] self.run_command(cmd) jsons = [ p for p in os.listdir(tmp_dir) if p.endswith('.json') ] self.assertEqual(len(jsons), 1) fname = jsons[0] item = pystac.read_file(os.path.join(tmp_dir, fname)) item.validate() bands_seen = set() for asset in item.assets.values(): self.assertTrue(is_absolute_href(asset.href)) bands = item.ext.eo.get_bands(asset) if bands is not None: bands_seen |= set(b.name for b in bands) if item.properties['landsat:processing_level'] == 'L2SP': self.assertEqual( bands_seen, set(L8_SR_BANDS.keys()) | set(L8_SP_BANDS.keys())) else: self.assertEqual(bands_seen, set(L8_SR_BANDS.keys())) check_proj_bbox(item)
def make_asset_hrefs_relative(self): """Modify each asset's HREF to be relative to this item's self HREF. Returns: Item: self """ self_href = None for asset in self.assets.values(): href = asset.href if is_absolute_href(href): if self_href is None: self_href = self.get_self_href() if self_href is None: raise STACError('Cannot make asset HREFs relative ' 'if no self_href is set.') asset.href = make_relative_href(asset.href, self_href) return self
def from_file(cls, href: str, stac_io: Optional[pystac.StacIO] = None) -> "ItemCollection": """Reads a :class:`ItemCollection` from a JSON file. Arguments: href : Path to the file. stac_io : A :class:`~pystac.StacIO` instance to use for file I/O """ if stac_io is None: stac_io = pystac.StacIO.default() if not is_absolute_href(href): href = make_absolute_href(href) d = stac_io.read_json(href) return cls.from_dict(d, preserve_dict=False)
def get_absolute_href(self) -> Optional[str]: """Gets the absolute href for this asset, if possible. If this Asset has no associated Item, and the asset HREF is a relative path, this method will return None. Returns: str: The absolute HREF of this asset, or None if an absolute HREF could not be determined. """ if utils.is_absolute_href(self.href): return self.href else: if self.owner is not None: return utils.make_absolute_href(self.href, self.owner.get_self_href()) else: return None
def resolve_stac_object(self, root=None): """Resolves a STAC object from the HREF of this link, if the link is not already resolved. Args: root (Catalog or Collection): Optional root of the catalog for this link. If provided, the root's resolved object cache is used to search for previously resolved instances of the STAC object. """ if isinstance(self.target, str): target_href = self.target # If it's a relative link, base it off the parent. if not is_absolute_href(target_href): if self.owner is None: raise STACError('Relative path {} encountered ' 'without owner or start_href.'.format(target_href)) start_href = self.owner.get_self_href() if start_href is None: raise STACError('Relative path {} encountered ' 'without owner "self" link set.'.format(target_href)) target_href = make_absolute_href(target_href, start_href) obj = None if root is not None: obj = root._resolved_objects.get_by_href(target_href) if obj is None: obj = STAC_IO.read_stac_object(target_href, root=root) obj.set_self_href(target_href) if root is not None: obj = root._resolved_objects.get_or_cache(obj) obj.set_root(root, link_type=self.link_type) else: obj = self.target self.target = obj if self.owner and self.rel in ['child', 'item']: self.target.set_parent(self.owner, link_type=self.link_type) return self
def get_href(self): """Gets the HREF for this link. Returns: str: Returns this link's HREF. If the link type is LinkType.RELATIVE, and there is an owner of the link, then the HREF returned will be relative. In all other cases, this method will return an absolute HREF. """ if self.link_type == LinkType.RELATIVE: if self.is_resolved(): href = self.target.get_self_href() else: href = self.target if href and is_absolute_href(href) and self.owner is not None: href = make_relative_href(href, self.owner.get_self_href()) else: href = self.get_absolute_href() return href
def make_asset_hrefs_absolute(self): """Modify each asset's HREF to be absolute. Any asset HREFs that are relative will be modified to absolute based on this item's self HREF. Returns: Item: self """ self_href = None for asset in self.assets.values(): href = asset.href if not is_absolute_href(href): if self_href is None: self_href = self.get_self_href() if self_href is None: raise STACError('Cannot make relative asset HREFs absolute ' 'if no self_href is set.') asset.href = make_absolute_href(asset.href, self_href) return self
def normalize_hrefs(self, root_href): # Normalizing requires an absolute path if not is_absolute_href(root_href): root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True) # Fully resolve the STAC to avoid linking issues. # This particularly can happen with unresolved links that have # relative paths. self.fully_resolve() for child in self.get_children(): child_root = os.path.join(root_href, '{}/'.format(child.id)) child.normalize_hrefs(child_root) for item in self.get_items(): item_root = os.path.join(root_href, '{}'.format(item.id)) item.normalize_hrefs(item_root) self.set_self_href(os.path.join(root_href, self.DEFAULT_FILE_NAME)) return self