def dirpath_to_lid(dirpath: str) -> LID: """ Find the LID corresponding to a directory in a single-versioned filesystem. """ parts = [part[:-1] for part in iteratepath(dirpath) if part.endswith("$")] return LID.create_from_parts(parts)
def __iter__(self) -> Iterator[LIDVID]: for dir in self.fs.walk.dirs(): parts = fs.path.parts(dir) if parts[-1].startswith("v$"): vid_part = str(parts[-1][2:]) lid_parts = [str(p) for p in parts[1:-1]] yield LIDVID.create_from_lid_and_vid( LID.create_from_parts(lid_parts), VID(vid_part))
def update_from_single_version(self, is_new: IS_NEW_TEST, single_version_fs: FS) -> bool: # TODO This import is circular; that's why I have it here # inside the function. But there must be a better way to # structure. from pdart.fs.multiversioned.version_view import ( is_segment, strip_segment, vv_lid_path, ) # TODO Note that this makes assumptions about the source # filesystem format. Document them. def update_from_lid(lid: LID) -> LIDVID: # Find the path corresponding to this LID. path = vv_lid_path(lid) # First, update all the children recursively. Get their # LIDs by extending this LID with the names of the # subdirectories of path. That handles directories. child_lidvids: Set[LIDVID] = { update_from_lid(lid.extend_lid(strip_segment(name))) for name in single_version_fs.listdir(path) if is_segment(name) } # Now look at files. We create a VersionContents object # from the set of new LIDVIDs and all the files contained # in the component's directory. contents = VersionContents.create_from_lidvids_and_dirpath( child_lidvids, single_version_fs, path) # Now we ask the Multiversioned to insert these contents # as a new version if needed. It returns the new LIDVID # if a new LIDVID is needed, otherwise it returns the old # one. return self.add_contents_if(is_new, lid, contents, False) bundle_segs = [ strip_segment(name) for name in single_version_fs.listdir("/") if is_segment(name) ] # TODO I can't see any reason why there wouldn't be exactly a # single segment, but I'm throwing in an assert to let me know # if I'm wrong. if len(bundle_segs) != 1: raise ValueError(f"bundle_segs: {bundle_segs} is more than 1.") changed = False for bundle_seg in bundle_segs: lid = LID.create_from_parts([str(bundle_seg)]) orig_lidvid: Optional[LIDVID] = self.latest_lidvid(lid) new_lidvid: LIDVID = update_from_lid(lid) changed = changed or new_lidvid != orig_lidvid return changed
def _run(self) -> None: working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() archive_primary_deltas_dir: str = self.archive_primary_deltas_dir() if os.path.isdir(self.deliverable_dir()): raise ValueError( f"{self.deliverable_dir()} cannot exist for PopulateDatabase.") changes_path = os.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) bundle_lid = LID.create_from_parts([self._bundle_segment]) first_round = changes_dict.vid(bundle_lid) == VID("1.0") schema_collection_lid = LID.create_from_parts( [self._bundle_segment, "schema"]) changes_dict.set(schema_collection_lid, VID("1.0"), first_round) write_changes_dict(changes_dict, changes_path) db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME) db_exists = os.path.isfile(db_filepath) db = create_bundle_db_from_os_filepath(db_filepath) with make_osfs(archive_dir) as archive_osfs, make_version_view( archive_osfs, self._bundle_segment) as version_view, make_sv_deltas( version_view, archive_primary_deltas_dir) as sv_deltas: if not db_exists: db.create_tables() documents_dir = f"/{self._bundle_segment}$/document$/phase2$" docs = set(sv_deltas.listdir(documents_dir)) # Pass this to create citation info db in _populate_citation_info info_param: Tuple = (sv_deltas, documents_dir, docs) bundle_lidvid = _populate_bundle(changes_dict, db) _populate_collections(changes_dict, db) _populate_products(changes_dict, db, sv_deltas) _populate_target_identification(changes_dict, db, sv_deltas) _populate_citation_info(changes_dict, db, info_param) if not db: raise ValueError("db doesn't exist.") if not os.path.isfile(db_filepath): raise ValueError(f"{db_filepath} is not a file.")
def _run(self) -> None: working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() deliverable_dir: str = self.deliverable_dir() manifest_dir: str = self.manifest_dir() try: PDS_LOGGER.open("Create deliverable directory") if os.path.isdir(deliverable_dir): raise ValueError( f"{deliverable_dir} cannot exist for MakeDeliverable.") changes_path = os.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) with make_osfs(archive_dir) as archive_osfs, make_multiversioned( archive_osfs) as mv: bundle_segment = self._bundle_segment bundle_lid = LID.create_from_parts([bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) version_view = VersionView(mv, bundle_lidvid) synth_files: Dict[str, bytes] = dict() # open the database db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME) bundle_db = create_bundle_db_from_os_filepath(db_filepath) bundle_lidvid_str = str(bundle_lidvid) synth_files = dict() cm = make_checksum_manifest(bundle_db, bundle_lidvid_str, short_lidvid_to_dirpath) synth_files["/checksum.manifest.txt"] = cm.encode("utf-8") tm = make_transfer_manifest(bundle_db, bundle_lidvid_str, short_lidvid_to_dirpath) synth_files["/transfer.manifest.txt"] = tm.encode("utf-8") deliverable_view = DeliverableView(version_view, synth_files) os.mkdir(deliverable_dir) deliverable_osfs = OSFS(deliverable_dir) copy_fs(deliverable_view, deliverable_osfs) PDS_LOGGER.log("info", f"Deliverable: {deliverable_dir}") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def _munge_lidvid(product_lidvid: str, suffix: str, new_basename: str) -> str: bundle_id, collection_id, product_id = LIDVID(product_lidvid).lid().parts() # TODO This is a hack collection_type = get_collection_type(suffix=suffix) first_underscore_idx = collection_id.index("_") new_collection_id = (collection_type + collection_id[first_underscore_idx:-3] + suffix.lower()) # TODO This is a hack new_product_id = new_basename[0:9] new_lid = LID.create_from_parts( [bundle_id, new_collection_id, new_product_id]) # TODO This is a hack. Fix it. vid = VID("1.0") new_lidvid = LIDVID.create_from_lid_and_vid(new_lid, vid) return str(new_lidvid)
def test_create_lid_from_parts(self) -> None: parts: List[str] = [] with self.assertRaises(ValueError): LID.create_from_parts(parts) parts = ["b"] self.assertEqual(LID("urn:nasa:pds:b"), LID.create_from_parts(parts)) parts = ["b", "c"] self.assertEqual(LID("urn:nasa:pds:b:c"), LID.create_from_parts(parts)) parts = ["b", "c", "p"] self.assertEqual(LID("urn:nasa:pds:b:c:p"), LID.create_from_parts(parts)) parts = ["b", "c", "p", "x"] with self.assertRaises(ValueError): LID.create_from_parts(parts)
def _run(self) -> None: working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() archive_primary_deltas_dir: str = self.archive_primary_deltas_dir() archive_browse_deltas_dir: str = self.archive_browse_deltas_dir() archive_label_deltas_dir: str = self.archive_label_deltas_dir() if os.path.isdir(self.deliverable_dir()): raise ValueError( f"{self.deliverable_dir()} cannot exist for BuildLabels.") changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) with make_osfs(archive_dir) as archive_osfs, make_version_view( archive_osfs, self._bundle_segment) as version_view, make_sv_deltas( version_view, archive_primary_deltas_dir) as sv_deltas, make_sv_deltas( sv_deltas, archive_browse_deltas_dir ) as browse_deltas, make_sv_deltas( browse_deltas, archive_label_deltas_dir) as label_deltas: changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) # open the database db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME) db = create_bundle_db_from_os_filepath(db_filepath) # create labels bundle_lid = LID.create_from_parts([self._bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) documents_dir = f"/{self._bundle_segment}$/document$/phase2$" docs = set(sv_deltas.listdir(documents_dir)) # fetch citation info from database citation_info_from_db = db.get_citation(str(bundle_lidvid)) info = Citation_Information( citation_info_from_db.filename, citation_info_from_db.propno, citation_info_from_db.category, citation_info_from_db.cycle, citation_info_from_db.authors.split(","), citation_info_from_db.title, citation_info_from_db.submission_year, citation_info_from_db.timing_year, citation_info_from_db.abstract.split("\n"), ) info.set_publication_year(PUBLICATION_YEAR) try: PDS_LOGGER.open("BuildLabels") # create_pds4_labels() may change changes_dict, because we # create the context collection if it doesn't exist. create_pds4_labels(working_dir, db, bundle_lidvid, changes_dict, label_deltas, info) except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close() write_changes_dict(changes_dict, changes_path)
def _build_browse_collection( db: BundleDB, changes_dict: ChangesDict, browse_deltas: COWFS, bundle_lidvid: LIDVID, data_collection_lidvid: LIDVID, bundle_path: str, ) -> None: bundle_segment = bundle_lidvid.lid().parts()[0] collection_segment = data_collection_lidvid.lid().parts()[1] browse_collection_lid = data_collection_lidvid.lid().to_browse_lid() collection_path = f"{bundle_path}{collection_segment}$/" browse_collection_segment = browse_collection_lid.collection_id browse_collection_path = f"{bundle_path}{browse_collection_segment}$/" browse_collection_vid = data_collection_lidvid.vid() browse_collection_lidvid = LIDVID.create_from_lid_and_vid( browse_collection_lid, browse_collection_vid) changes_dict.set(browse_collection_lid, browse_collection_vid, True) browse_deltas.makedirs(browse_collection_path, recreate=True) db.create_other_collection(str(browse_collection_lidvid), str(bundle_lidvid)) db.create_bundle_collection_link(str(bundle_lidvid), str(browse_collection_lidvid)) product_segments = [ str(prod[:-1]) for prod in browse_deltas.listdir(collection_path) if "$" in prod ] for product_segment in product_segments: # These product_segments are from the data_collection product_lid = LID.create_from_parts( [bundle_segment, collection_segment, product_segment]) product_vid = changes_dict.vid(product_lid) product_path = f"{collection_path}{product_segment}$/" browse_product_path = f"{browse_collection_path}{product_segment}$/" browse_product_lidvid = _extend_lidvid(browse_collection_lid, product_vid, product_segment) if changes_dict.changed(product_lid): fits_product_lidvid = _extend_lidvid( data_collection_lidvid.lid(), data_collection_lidvid.vid(), product_segment, ) bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), True) browse_deltas.makedirs(browse_product_path, recreate=True) db.create_browse_product( browse_product_lidvid, fits_product_lidvid, str(browse_collection_lidvid), ) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid) for fits_file in browse_deltas.listdir(product_path): fits_filepath = fs.path.join(product_path, fits_file) fits_os_filepath = browse_deltas.getsyspath(fits_filepath) browse_file = fs.path.splitext(fits_file)[0] + ".jpg" browse_filepath = fs.path.join(browse_product_path, browse_file) # In a COWFS, a directory does not have a # syspath, only files. So we write a stub # file into the directory, find its syspath # and its directory's syspath. Then we remove # the stub file. browse_deltas.touch(browse_filepath) browse_product_os_filepath = browse_deltas.getsyspath( browse_filepath) browse_deltas.remove(browse_filepath) browse_product_os_dirpath = fs.path.dirname( browse_product_os_filepath) # Picmaker expects a list of strings. If you give it # str, it'll index into it and complain about '/' # not being a file. So don't do that! try: picmaker.ImagesToPics( [str(fits_os_filepath)], browse_product_os_dirpath, filter="None", percentiles=(1, 99), ) except IndexError as e: tb = traceback.format_exc() message = f"File {fits_file}: {e}\n{tb}" raise Exception(message) browse_os_filepath = fs.path.join(browse_product_os_dirpath, browse_file) size = os.stat(browse_os_filepath).st_size db.create_browse_file(browse_os_filepath, browse_file, browse_product_lidvid, size) else: bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), False) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid)
def _run(self) -> None: try: PDS_LOGGER.open("BuildBrowse") PDS_LOGGER.log("info", "Entering BuildBrowse.") working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() archive_primary_deltas_dir: str = self.archive_primary_deltas_dir() archive_browse_deltas_dir: str = self.archive_browse_deltas_dir() if os.path.isdir(self.deliverable_dir()): raise ValueError(f"{self.deliverable_dir()} cannot exist " + "for BuildBrowse.") changes_path = os.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME) db = create_bundle_db_from_os_filepath(db_filepath) bundle_lid = LID.create_from_parts([self._bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) with make_osfs(archive_dir) as archive_osfs, make_version_view( archive_osfs, self._bundle_segment) as version_view, make_sv_deltas( version_view, archive_primary_deltas_dir ) as sv_deltas, make_sv_deltas( sv_deltas, archive_browse_deltas_dir) as browse_deltas: bundle_path = f"/{self._bundle_segment}$/" collection_segments = [ str(coll[:-1]) for coll in browse_deltas.listdir(bundle_path) if "$" in coll ] for collection_segment in collection_segments: collection_lid = LID.create_from_parts( [self._bundle_segment, collection_segment]) if _requires_browse_collection(collection_segment): collection_vid = changes_dict.vid(collection_lid) collection_lidvid = LIDVID.create_from_lid_and_vid( collection_lid, collection_vid) if changes_dict.changed(collection_lid): PDS_LOGGER.log( "info", f"Making browse for {collection_lidvid}") _build_browse_collection( db, changes_dict, browse_deltas, bundle_lidvid, collection_lidvid, bundle_path, ) else: _fill_in_old_browse_collection( db, changes_dict, bundle_lidvid, collection_lidvid) write_changes_dict(changes_dict, changes_path) PDS_LOGGER.log("info", "Leaving BuildBrowse.") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def make_sub_lidvid(seg: str, vid_part: str) -> LIDVID: lid_parts = lidvid.lid().parts() lid_parts.append(seg) return LIDVID.create_from_lid_and_vid( LID.create_from_parts(lid_parts), VID(vid_part))