def parse(self): """ Opens a given CPL asset, parses the XML to extract the playlist info and create a CPL object which is added to the DCP's CPL list. """ try: tree = ET.parse(self.path) root = tree.getroot() # ElementTree prepends the namespace to all elements, so we need to extract # it so that we can perform sensible searching on elements. self.cpl_ns = get_namespace(root.tag) self.validate() except Exception as e: raise CPLError(e) self.id = get_element_text(root, "Id", self.cpl_ns).split(":")[2] self.content_title_text = get_element_text(root, "ContentTitleText", self.cpl_ns) self.annotation_text = get_element_text(root, "AnnotationText", self.cpl_ns) self.issue_date = parse_date(get_element_text(root, "IssueDate", self.cpl_ns)) self.issuer = get_element_text(root, "Issuer", self.cpl_ns) self.creator = get_element_text(root, "Creator", self.cpl_ns) self.content_kind = get_element_text(root, "ContentKind", self.cpl_ns) # Get each of the parts of the CPL, i.e. the Reels :) for reel_list_elem in get_element_iterator(root, "ReelList", self.cpl_ns): for reel_elem in reel_list_elem.getchildren(): reel = Reel(reel_elem, self.cpl_ns, assetmap=self.assetmap) # Add this in as a convenience for working with assets. for asset_id, asset in reel.assets.iteritems(): self.assets[asset_id] = asset self.reels.append(reel)
def _parse(self, catalog_str): """ Parses a KDM bundle catalog XML string """ root = ET.fromstring(catalog_str) cat_ns = get_namespace(root.tag) self.id = strip_urn(get_element_text(root, 'Id', cat_ns)) self.annotation_text = get_element_text(root, 'AnnotationText', cat_ns) self.creator = get_element_text(root, 'Creator', cat_ns) self.cpl_ids = [] self.kdm_paths = [] self.start_dates = [] self.end_dates = [] for kdm_list_el in get_element_iterator(root, 'KDMFileList', cat_ns): for kdm_el in kdm_list_el.getchildren(): self.cpl_ids.append( strip_urn(get_element_text(kdm_el, 'CPLId', cat_ns))) self.kdm_paths.append( get_element_text(kdm_el, 'FilePath', cat_ns)) self.start_dates.append( get_element_text(kdm_el, 'ContentKeysNotValidBefore', cat_ns)) self.end_dates.append( get_element_text(kdm_el, 'ContentKeysNotValidAfter', cat_ns))
def parse(self): """ Parse the ASSETMAP. Extract the id, path, volume index, offset and length for each asset, and the validate the paths of the downloaded files against the paths from the ASSETMAP file. """ try: self.validate() except Exception as e: raise AssetmapError(e) tree = ET.parse(self.path) root = tree.getroot() # ElementTree prepends the namespace to all elements, so we need to extract # it so that we can perform sensible searching on elements. assetmap_ns = get_namespace(root.tag) self.id = get_element_text(root, "Id", assetmap_ns).split(":")[2] self.annotation_text = get_element_text(root, "AnnotationText", assetmap_ns) self.volume_count = int( get_element_text(root, "VolumeCount", assetmap_ns)) self.issue_date = parse_date( get_element_text(root, "IssueDate", assetmap_ns)) self.issuer = get_element_text(root, "Issuer", assetmap_ns) self.creator = get_element_text(root, "Creator", assetmap_ns) asset_list = get_element(root, "AssetList", assetmap_ns) # Get the data from the ASSETMAP file for asset in asset_list.getchildren(): asset_id = get_element_text(asset, "Id", assetmap_ns).split(":")[2] for chunklist in get_element_iterator(asset, "ChunkList", assetmap_ns): """ The code below assumes that there will only ever be one chunk in a chunklist. Chunking is used to split files up into smaller parts, usually in order to provide compatability with older filesystems, which is not applicable for our uses. """ for chunk in chunklist.getchildren(): v = get_element_text(chunk, "VolumeIndex", assetmap_ns) o = get_element_text(chunk, "Offset", assetmap_ns) l = get_element_text(chunk, "Length", assetmap_ns) a = { "path": get_element_text(chunk, "Path", assetmap_ns), "volume_index": int(v) if v is not None else v, "offset": int(o) if o is not None else o, "length": int(l) if l is not None else l } self.assets[asset_id] = AssetData(**a)
def parse(self): """ Parse the ASSETMAP. Extract the id, path, volume index, offset and length for each asset, and the validate the paths of the downloaded files against the paths from the ASSETMAP file. """ try: self.validate() except Exception as e: raise AssetmapError(e) tree = ET.parse(self.path) root = tree.getroot() # ElementTree prepends the namespace to all elements, so we need to extract # it so that we can perform sensible searching on elements. assetmap_ns = get_namespace(root.tag) self.id = get_element_text(root, "Id", assetmap_ns).split(":")[2] self.annotation_text = get_element_text(root, "AnnotationText", assetmap_ns) self.volume_count = int(get_element_text(root, "VolumeCount", assetmap_ns)) self.issue_date = parse_date(get_element_text(root, "IssueDate", assetmap_ns)) self.issuer = get_element_text(root, "Issuer", assetmap_ns) self.creator = get_element_text(root, "Creator", assetmap_ns) asset_list = get_element(root, "AssetList", assetmap_ns) # Get the data from the ASSETMAP file for asset in asset_list.getchildren(): asset_id = get_element_text(asset, "Id", assetmap_ns).split(":")[2] for chunklist in get_element_iterator(asset, "ChunkList", assetmap_ns): """ The code below assumes that there will only ever be one chunk in a chunklist. Chunking is used to split files up into smaller parts, usually in order to provide compatability with older filesystems, which is not applicable for our uses. """ for chunk in chunklist.getchildren(): v = get_element_text(chunk, "VolumeIndex", assetmap_ns) o = get_element_text(chunk, "Offset", assetmap_ns) l = get_element_text(chunk, "Length", assetmap_ns) a = { "path": get_element_text(chunk, "Path", assetmap_ns), "volume_index": int(v) if v is not None else v, "offset": int(o) if o is not None else o, "length": int(l) if l is not None else l } self.assets[asset_id] = AssetData(**a)
def _parse(self, catalog_str): """ Parses a KDM bundle catalog XML string """ root = ET.fromstring(catalog_str) cat_ns = get_namespace(root.tag) self.id = strip_urn(get_element_text(root, 'Id', cat_ns)) self.annotation_text = get_element_text(root, 'AnnotationText', cat_ns) self.creator = get_element_text(root, 'Creator', cat_ns) self.cpl_ids = [] self.kdm_paths = [] self.start_dates = [] self.end_dates = [] for kdm_list_el in get_element_iterator(root, 'KDMFileList', cat_ns): for kdm_el in kdm_list_el.getchildren(): self.cpl_ids.append(strip_urn(get_element_text(kdm_el, 'CPLId', cat_ns))) self.kdm_paths.append(get_element_text(kdm_el, 'FilePath', cat_ns)) self.start_dates.append(get_element_text(kdm_el, 'ContentKeysNotValidBefore', cat_ns)) self.end_dates.append(get_element_text(kdm_el, 'ContentKeysNotValidAfter', cat_ns))
def _parse(self, root): self.id = get_element_text(root, "Id", self.cpl_ns).split(":")[2] self.content_title_text = get_element_text(root, "ContentTitleText", self.cpl_ns) self.annotation_text = get_element_text(root, "AnnotationText", self.cpl_ns) self.issue_date = parse_date(get_element_text(root, "IssueDate", self.cpl_ns)) self.issuer = get_element_text(root, "Issuer", self.cpl_ns) self.creator = get_element_text(root, "Creator", self.cpl_ns) self.content_kind = get_element_text(root, "ContentKind", self.cpl_ns) # Get each of the parts of the CPL, i.e. the Reels :) for reel_list_elem in get_element_iterator(root, "ReelList", self.cpl_ns): for reel_elem in reel_list_elem.getchildren(): reel = Reel(reel_elem, self.cpl_ns, assetmap=self.assetmap) # Add this in as a convenience for working with assets. for asset_id, asset in reel.assets.items(): self.assets[asset_id] = asset self.reels.append(reel)
def parse(self): """ Opens a given CPL asset, parses the XML to extract the playlist info and create a CPL object which is added to the DCP's CPL list. """ try: tree = ET.parse(self.path) root = tree.getroot() # ElementTree prepends the namespace to all elements, so we need to extract # it so that we can perform sensible searching on elements. self.cpl_ns = get_namespace(root.tag) self.validate() except Exception as e: raise CPLError(e) self.id = get_element_text(root, "Id", self.cpl_ns).split(":")[2] self.content_title_text = get_element_text(root, "ContentTitleText", self.cpl_ns) self.annotation_text = get_element_text(root, "AnnotationText", self.cpl_ns) self.issue_date = parse_date( get_element_text(root, "IssueDate", self.cpl_ns)) self.issuer = get_element_text(root, "Issuer", self.cpl_ns) self.creator = get_element_text(root, "Creator", self.cpl_ns) self.content_kind = get_element_text(root, "ContentKind", self.cpl_ns) # Get each of the parts of the CPL, i.e. the Reels :) for reel_list_elem in get_element_iterator(root, "ReelList", self.cpl_ns): for reel_elem in reel_list_elem.getchildren(): reel = Reel(reel_elem, self.cpl_ns, assetmap=self.assetmap) # Add this in as a convenience for working with assets. for asset_id, asset in reel.assets.iteritems(): self.assets[asset_id] = asset self.reels.append(reel)
def _parse(self, root): self.id = get_element_text(root, "Id", self.cpl_ns).split(":")[2] self.content_title_text = get_element_text(root, "ContentTitleText", self.cpl_ns) self.annotation_text = get_element_text(root, "AnnotationText", self.cpl_ns) self.issue_date = parse_date( get_element_text(root, "IssueDate", self.cpl_ns)) self.issuer = get_element_text(root, "Issuer", self.cpl_ns) self.creator = get_element_text(root, "Creator", self.cpl_ns) self.content_kind = get_element_text(root, "ContentKind", self.cpl_ns) # Get each of the parts of the CPL, i.e. the Reels :) for reel_list_elem in get_element_iterator(root, "ReelList", self.cpl_ns): for reel_elem in reel_list_elem.getchildren(): reel = Reel(reel_elem, self.cpl_ns, assetmap=self.assetmap) # Add this in as a convenience for working with assets. for asset_id, asset in reel.assets.items(): self.assets[asset_id] = asset self.reels.append(reel)