def _fill_in_old_browse_collection( db: BundleDB, changes_dict: ChangesDict, bundle_lidvid: LIDVID, data_collection_lidvid: LIDVID, ) -> None: bundle_segment = bundle_lidvid.lid().parts()[0] collection_segment = data_collection_lidvid.lid().parts()[1] browse_collection_lid = data_collection_lidvid.lid().to_browse_lid() browse_collection_segment = browse_collection_lid.collection_id browse_collection_vid = data_collection_lidvid.vid() browse_collection_lidvid = LIDVID.create_from_lid_and_vid( browse_collection_lid, browse_collection_vid) changes_dict.set(browse_collection_lid, browse_collection_vid, False) db.create_bundle_collection_link(str(bundle_lidvid), str(browse_collection_lidvid)) try: PDS_LOGGER.open("Fill in old browse collection") PDS_LOGGER.log( "info", f"Created link and change for {browse_collection_lidvid}") for product in db.get_collection_products( str(browse_collection_lidvid)): product_lidvid = LIDVID(product.lidvid) changes_dict.set(product_lidvid.lid(), product_lidvid.vid(), False) PDS_LOGGER.log("info", f"Created link and change for {product_lidvid}") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def _populate_bundle(changes_dict: ChangesDict, db: BundleDB) -> LIDVID: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_bundle_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) db.create_bundle(str(lidvid)) # there's only one, so return it return lidvid raise RuntimeError("No changed bundle LID in changes_dict.")
def __iter__(self) -> Iterator[LIDVID]: for dir in self.fs.walk.dirs(): parts = fs.path.parts(dir) if parts[-1].startswith("v$"): vid_part = str(parts[-1][2:]) lid_parts = [str(p) for p in parts[1:-1]] yield LIDVID.create_from_lid_and_vid( LID.create_from_parts(lid_parts), VID(vid_part))
def parent_lidvid(self, lidvid: LIDVID) -> LIDVID: lid = lidvid.lid() if lid not in self.changes_dict: raise KeyError(f"lid={lid} not in changes_dict.") parent_lid = lid.parent_lid() if parent_lid not in self.changes_dict: raise KeyError(f"parent_lid={parent_lid} not in changes_dict.") parent_vid = self.vid(parent_lid) return LIDVID.create_from_lid_and_vid(parent_lid, parent_vid)
def _populate_schema_collection(db: BundleDB, bundle_lidvid: str) -> None: # TODO We're assuming here that there will only ever be one schema # collection. I'm not sure that's true. lid = LIDVID(bundle_lidvid).lid().extend_lid("schema") new_lidvid = LIDVID.create_from_lid_and_vid(lid, VID("1.0")) collection_lidvid = str(new_lidvid) db.create_schema_collection(collection_lidvid, bundle_lidvid) # TODO Hardcoded here. Is this what we want to do? for lidvid in [DISP_LIDVID, HST_LIDVID, PDS4_LIDVID]: db.create_schema_product(lidvid)
def create_deliverable_view( bundle_db: BundleDB, mv: Multiversioned, lid: LID, vid: Optional[VID] = None) -> "DeliverableView": if vid is None: vv = mv.create_version_view(lid) else: lidvid = LIDVID.create_from_lid_and_vid(lid, vid) vv = VersionView(mv, lidvid) return DeliverableView(vv)
def _run(self) -> None: working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() deliverable_dir: str = self.deliverable_dir() manifest_dir: str = self.manifest_dir() try: PDS_LOGGER.open("Create deliverable directory") if os.path.isdir(deliverable_dir): raise ValueError( f"{deliverable_dir} cannot exist for MakeDeliverable.") changes_path = os.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) with make_osfs(archive_dir) as archive_osfs, make_multiversioned( archive_osfs) as mv: bundle_segment = self._bundle_segment bundle_lid = LID.create_from_parts([bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) version_view = VersionView(mv, bundle_lidvid) synth_files: Dict[str, bytes] = dict() # open the database db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME) bundle_db = create_bundle_db_from_os_filepath(db_filepath) bundle_lidvid_str = str(bundle_lidvid) synth_files = dict() cm = make_checksum_manifest(bundle_db, bundle_lidvid_str, short_lidvid_to_dirpath) synth_files["/checksum.manifest.txt"] = cm.encode("utf-8") tm = make_transfer_manifest(bundle_db, bundle_lidvid_str, short_lidvid_to_dirpath) synth_files["/transfer.manifest.txt"] = tm.encode("utf-8") deliverable_view = DeliverableView(version_view, synth_files) os.mkdir(deliverable_dir) deliverable_osfs = OSFS(deliverable_dir) copy_fs(deliverable_view, deliverable_osfs) PDS_LOGGER.log("info", f"Deliverable: {deliverable_dir}") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def visit_bundle(self, bundle: Bundle, post: bool) -> None: if post: first_bundle = LIDVID(bundle.lidvid).vid() == VID("1.0") if first_bundle: self._create_context_collection(bundle) self._create_schema_collection(bundle) else: context_collection_lid = (LIDVID( bundle.lidvid).lid().extend_lid("context")) context_collection_lidvid = LIDVID.create_from_lid_and_vid( context_collection_lid, VID("1.0")) bundle_db.create_context_collection( str(context_collection_lidvid), bundle.lidvid) changes_dict.set(context_collection_lid, VID("1.0"), False) bundle_db.create_bundle_collection_link( str(bundle_lidvid), str(context_collection_lidvid)) self._post_visit_bundle(bundle)
def _populate_collections(changes_dict: ChangesDict, db: BundleDB) -> None: for lid, (vid, changed) in changes_dict.items(): if lid.is_collection_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) bundle_lidvid = changes_dict.parent_lidvid(lidvid) if changed: if lid.collection_id == "document": db.create_document_collection(str(lidvid), str(bundle_lidvid)) elif lid.collection_id == "schema": # it's created separately _populate_schema_collection(db, str(bundle_lidvid)) else: db.create_other_collection(str(lidvid), str(bundle_lidvid)) else: if changes_dict.changed(bundle_lidvid.lid()): db.create_bundle_collection_link(str(bundle_lidvid), str(lidvid))
def _munge_lidvid(product_lidvid: str, suffix: str, new_basename: str) -> str: bundle_id, collection_id, product_id = LIDVID(product_lidvid).lid().parts() # TODO This is a hack collection_type = get_collection_type(suffix=suffix) first_underscore_idx = collection_id.index("_") new_collection_id = (collection_type + collection_id[first_underscore_idx:-3] + suffix.lower()) # TODO This is a hack new_product_id = new_basename[0:9] new_lid = LID.create_from_parts( [bundle_id, new_collection_id, new_product_id]) # TODO This is a hack. Fix it. vid = VID("1.0") new_lidvid = LIDVID.create_from_lid_and_vid(new_lid, vid) return str(new_lidvid)
def _populate_target_identification(changes_dict: ChangesDict, db: BundleDB, sv_deltas: COWFS) -> None: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_product_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) product_path = lid_to_dirpath(lidvid.lid()) # Get a list of SHM/SPT/SHP fits files fits_files = [ fits_file for fits_file in sv_deltas.listdir(product_path) if (fs.path.splitext(fits_file)[1].lower() == ".fits" and has_suffix_shm_spt_shf( fs.path.splitext(fits_file)[0].lower())) ] # Pass the path of SHM/SPT/SHP fits files to create a record in # target identification table for fits_file in fits_files: fits_file_path = fs.path.join(product_path, fits_file) fits_os_path = sv_deltas.getsyspath(fits_file_path) db.create_target_identification(fits_os_path)
def _populate_products(changes_dict: ChangesDict, db: BundleDB, sv_deltas: COWFS) -> None: for lid, (vid, changed) in changes_dict.items(): if lid.is_product_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) collection_lidvid = changes_dict.parent_lidvid(lidvid) if changed: product_path = lid_to_dirpath(lidvid.lid()) if collection_lidvid.lid().collection_id == "document": db.create_document_product(str(lidvid), str(collection_lidvid)) doc_files = [ doc_file for doc_file in sv_deltas.listdir(product_path) if (fs.path.splitext(doc_file)[1].lower() in DOCUMENT_SUFFIXES) ] for doc_file in doc_files: sys_filepath = sv_deltas.getsyspath( fs.path.join(product_path, doc_file)) db.create_document_file(sys_filepath, doc_file, str(lidvid)) else: db.create_fits_product(str(lidvid), str(collection_lidvid)) fits_files = [ fits_file for fits_file in sv_deltas.listdir(product_path) if fs.path.splitext(fits_file)[1].lower() == ".fits" ] for fits_file in fits_files: fits_file_path = fs.path.join(product_path, fits_file) fits_os_path = sv_deltas.getsyspath(fits_file_path) populate_database_from_fits_file( db, fits_os_path, str(lidvid)) else: if changes_dict.changed(collection_lidvid.lid()): db.create_collection_product_link(str(collection_lidvid), str(lidvid))
def test_lidvid_lid_vid_properties(self, lid: LID, vid: VID) -> None: lidvid = LIDVID.create_from_lid_and_vid(lid, vid) self.assertEqual(lidvid.lid(), lid) self.assertEqual(lidvid.vid(), vid)
def test_create_from_lid_and_vid(self) -> None: lid = LID("urn:nasa:pds:ssc01.hirespc.cruise:browse") vid = VID("2.5") lidvid = LIDVID.create_from_lid_and_vid(lid, vid) self.assertEqual( LIDVID("urn:nasa:pds:ssc01.hirespc.cruise:browse::2.5"), lidvid)
def test_is_next_minor_lidvid_property(self, lidvid: LIDVID) -> None: self.assertEqual( LIDVID.create_from_lid_and_vid(lidvid.lid(), lidvid.vid().next_minor_vid()), lidvid.next_minor_lidvid(), )
def make_sub_lidvid(seg: str, vid_part: str) -> LIDVID: lid_parts = lidvid.lid().parts() lid_parts.append(seg) return LIDVID.create_from_lid_and_vid( LID.create_from_parts(lid_parts), VID(vid_part))
def _run(self) -> None: try: PDS_LOGGER.open("BuildBrowse") PDS_LOGGER.log("info", "Entering BuildBrowse.") working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() archive_primary_deltas_dir: str = self.archive_primary_deltas_dir() archive_browse_deltas_dir: str = self.archive_browse_deltas_dir() if os.path.isdir(self.deliverable_dir()): raise ValueError(f"{self.deliverable_dir()} cannot exist " + "for BuildBrowse.") changes_path = os.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME) db = create_bundle_db_from_os_filepath(db_filepath) bundle_lid = LID.create_from_parts([self._bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) with make_osfs(archive_dir) as archive_osfs, make_version_view( archive_osfs, self._bundle_segment) as version_view, make_sv_deltas( version_view, archive_primary_deltas_dir ) as sv_deltas, make_sv_deltas( sv_deltas, archive_browse_deltas_dir) as browse_deltas: bundle_path = f"/{self._bundle_segment}$/" collection_segments = [ str(coll[:-1]) for coll in browse_deltas.listdir(bundle_path) if "$" in coll ] for collection_segment in collection_segments: collection_lid = LID.create_from_parts( [self._bundle_segment, collection_segment]) if _requires_browse_collection(collection_segment): collection_vid = changes_dict.vid(collection_lid) collection_lidvid = LIDVID.create_from_lid_and_vid( collection_lid, collection_vid) if changes_dict.changed(collection_lid): PDS_LOGGER.log( "info", f"Making browse for {collection_lidvid}") _build_browse_collection( db, changes_dict, browse_deltas, bundle_lidvid, collection_lidvid, bundle_path, ) else: _fill_in_old_browse_collection( db, changes_dict, bundle_lidvid, collection_lidvid) write_changes_dict(changes_dict, changes_path) PDS_LOGGER.log("info", "Leaving BuildBrowse.") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def _extend_lidvid(lid: LID, vid: VID, segment: str) -> str: new_lid = lid.extend_lid(segment) new_lidvid = LIDVID.create_from_lid_and_vid(new_lid, vid) return str(new_lidvid)
def _populate_citation_info(changes_dict: ChangesDict, db: BundleDB, info_param: Tuple) -> None: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_bundle_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) db.create_citation(str(lidvid), info_param)
def next_minor_lidvid(self, lid: LID) -> LIDVID: latest = self.latest_lidvid(lid) if latest: return latest.next_minor_lidvid() else: return LIDVID.create_from_lid_and_vid(lid, VID("1.0"))
def _extend_lidvid(lidvid_str: str, segment: str) -> str: lidvid = LIDVID(lidvid_str) lid = lidvid.lid().extend_lid(segment) new_lidvid = LIDVID.create_from_lid_and_vid(lid, lidvid.vid()) return str(new_lidvid)
def _run(self) -> None: working_dir: str = self.working_dir() archive_dir: str = self.archive_dir() archive_primary_deltas_dir: str = self.archive_primary_deltas_dir() archive_browse_deltas_dir: str = self.archive_browse_deltas_dir() archive_label_deltas_dir: str = self.archive_label_deltas_dir() if os.path.isdir(self.deliverable_dir()): raise ValueError( f"{self.deliverable_dir()} cannot exist for BuildLabels.") changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) with make_osfs(archive_dir) as archive_osfs, make_version_view( archive_osfs, self._bundle_segment) as version_view, make_sv_deltas( version_view, archive_primary_deltas_dir) as sv_deltas, make_sv_deltas( sv_deltas, archive_browse_deltas_dir ) as browse_deltas, make_sv_deltas( browse_deltas, archive_label_deltas_dir) as label_deltas: changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME) changes_dict = read_changes_dict(changes_path) # open the database db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME) db = create_bundle_db_from_os_filepath(db_filepath) # create labels bundle_lid = LID.create_from_parts([self._bundle_segment]) bundle_vid = changes_dict.vid(bundle_lid) bundle_lidvid = LIDVID.create_from_lid_and_vid( bundle_lid, bundle_vid) documents_dir = f"/{self._bundle_segment}$/document$/phase2$" docs = set(sv_deltas.listdir(documents_dir)) # fetch citation info from database citation_info_from_db = db.get_citation(str(bundle_lidvid)) info = Citation_Information( citation_info_from_db.filename, citation_info_from_db.propno, citation_info_from_db.category, citation_info_from_db.cycle, citation_info_from_db.authors.split(","), citation_info_from_db.title, citation_info_from_db.submission_year, citation_info_from_db.timing_year, citation_info_from_db.abstract.split("\n"), ) info.set_publication_year(PUBLICATION_YEAR) try: PDS_LOGGER.open("BuildLabels") # create_pds4_labels() may change changes_dict, because we # create the context collection if it doesn't exist. create_pds4_labels(working_dir, db, bundle_lidvid, changes_dict, label_deltas, info) except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close() write_changes_dict(changes_dict, changes_path)
def _build_browse_collection( db: BundleDB, changes_dict: ChangesDict, browse_deltas: COWFS, bundle_lidvid: LIDVID, data_collection_lidvid: LIDVID, bundle_path: str, ) -> None: bundle_segment = bundle_lidvid.lid().parts()[0] collection_segment = data_collection_lidvid.lid().parts()[1] browse_collection_lid = data_collection_lidvid.lid().to_browse_lid() collection_path = f"{bundle_path}{collection_segment}$/" browse_collection_segment = browse_collection_lid.collection_id browse_collection_path = f"{bundle_path}{browse_collection_segment}$/" browse_collection_vid = data_collection_lidvid.vid() browse_collection_lidvid = LIDVID.create_from_lid_and_vid( browse_collection_lid, browse_collection_vid) changes_dict.set(browse_collection_lid, browse_collection_vid, True) browse_deltas.makedirs(browse_collection_path, recreate=True) db.create_other_collection(str(browse_collection_lidvid), str(bundle_lidvid)) db.create_bundle_collection_link(str(bundle_lidvid), str(browse_collection_lidvid)) product_segments = [ str(prod[:-1]) for prod in browse_deltas.listdir(collection_path) if "$" in prod ] for product_segment in product_segments: # These product_segments are from the data_collection product_lid = LID.create_from_parts( [bundle_segment, collection_segment, product_segment]) product_vid = changes_dict.vid(product_lid) product_path = f"{collection_path}{product_segment}$/" browse_product_path = f"{browse_collection_path}{product_segment}$/" browse_product_lidvid = _extend_lidvid(browse_collection_lid, product_vid, product_segment) if changes_dict.changed(product_lid): fits_product_lidvid = _extend_lidvid( data_collection_lidvid.lid(), data_collection_lidvid.vid(), product_segment, ) bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), True) browse_deltas.makedirs(browse_product_path, recreate=True) db.create_browse_product( browse_product_lidvid, fits_product_lidvid, str(browse_collection_lidvid), ) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid) for fits_file in browse_deltas.listdir(product_path): fits_filepath = fs.path.join(product_path, fits_file) fits_os_filepath = browse_deltas.getsyspath(fits_filepath) browse_file = fs.path.splitext(fits_file)[0] + ".jpg" browse_filepath = fs.path.join(browse_product_path, browse_file) # In a COWFS, a directory does not have a # syspath, only files. So we write a stub # file into the directory, find its syspath # and its directory's syspath. Then we remove # the stub file. browse_deltas.touch(browse_filepath) browse_product_os_filepath = browse_deltas.getsyspath( browse_filepath) browse_deltas.remove(browse_filepath) browse_product_os_dirpath = fs.path.dirname( browse_product_os_filepath) # Picmaker expects a list of strings. If you give it # str, it'll index into it and complain about '/' # not being a file. So don't do that! try: picmaker.ImagesToPics( [str(fits_os_filepath)], browse_product_os_dirpath, filter="None", percentiles=(1, 99), ) except IndexError as e: tb = traceback.format_exc() message = f"File {fits_file}: {e}\n{tb}" raise Exception(message) browse_os_filepath = fs.path.join(browse_product_os_dirpath, browse_file) size = os.stat(browse_os_filepath).st_size db.create_browse_file(browse_os_filepath, browse_file, browse_product_lidvid, size) else: bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), False) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid)