Beispiel #1
0
def dirpath_to_lid(dirpath: str) -> LID:
    """
    Find the LID corresponding to a directory in a single-versioned
    filesystem.
    """
    parts = [part[:-1] for part in iteratepath(dirpath) if part.endswith("$")]
    return LID.create_from_parts(parts)
Beispiel #2
0
 def __iter__(self) -> Iterator[LIDVID]:
     for dir in self.fs.walk.dirs():
         parts = fs.path.parts(dir)
         if parts[-1].startswith("v$"):
             vid_part = str(parts[-1][2:])
             lid_parts = [str(p) for p in parts[1:-1]]
             yield LIDVID.create_from_lid_and_vid(
                 LID.create_from_parts(lid_parts), VID(vid_part))
Beispiel #3
0
    def update_from_single_version(self, is_new: IS_NEW_TEST,
                                   single_version_fs: FS) -> bool:
        # TODO This import is circular; that's why I have it here
        # inside the function.  But there must be a better way to
        # structure.
        from pdart.fs.multiversioned.version_view import (
            is_segment,
            strip_segment,
            vv_lid_path,
        )

        # TODO Note that this makes assumptions about the source
        # filesystem format.  Document them.

        def update_from_lid(lid: LID) -> LIDVID:
            # Find the path corresponding to this LID.
            path = vv_lid_path(lid)

            # First, update all the children recursively.  Get their
            # LIDs by extending this LID with the names of the
            # subdirectories of path.  That handles directories.
            child_lidvids: Set[LIDVID] = {
                update_from_lid(lid.extend_lid(strip_segment(name)))
                for name in single_version_fs.listdir(path) if is_segment(name)
            }

            # Now look at files.  We create a VersionContents object
            # from the set of new LIDVIDs and all the files contained
            # in the component's directory.
            contents = VersionContents.create_from_lidvids_and_dirpath(
                child_lidvids, single_version_fs, path)

            # Now we ask the Multiversioned to insert these contents
            # as a new version if needed.  It returns the new LIDVID
            # if a new LIDVID is needed, otherwise it returns the old
            # one.
            return self.add_contents_if(is_new, lid, contents, False)

        bundle_segs = [
            strip_segment(name) for name in single_version_fs.listdir("/")
            if is_segment(name)
        ]

        # TODO I can't see any reason why there wouldn't be exactly a
        # single segment, but I'm throwing in an assert to let me know
        # if I'm wrong.
        if len(bundle_segs) != 1:
            raise ValueError(f"bundle_segs: {bundle_segs} is more than 1.")

        changed = False

        for bundle_seg in bundle_segs:
            lid = LID.create_from_parts([str(bundle_seg)])
            orig_lidvid: Optional[LIDVID] = self.latest_lidvid(lid)
            new_lidvid: LIDVID = update_from_lid(lid)
            changed = changed or new_lidvid != orig_lidvid

        return changed
Beispiel #4
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        archive_dir: str = self.archive_dir()
        archive_primary_deltas_dir: str = self.archive_primary_deltas_dir()

        if os.path.isdir(self.deliverable_dir()):
            raise ValueError(
                f"{self.deliverable_dir()} cannot exist for PopulateDatabase.")

        changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
        changes_dict = read_changes_dict(changes_path)
        bundle_lid = LID.create_from_parts([self._bundle_segment])
        first_round = changes_dict.vid(bundle_lid) == VID("1.0")
        schema_collection_lid = LID.create_from_parts(
            [self._bundle_segment, "schema"])
        changes_dict.set(schema_collection_lid, VID("1.0"), first_round)
        write_changes_dict(changes_dict, changes_path)

        db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME)
        db_exists = os.path.isfile(db_filepath)
        db = create_bundle_db_from_os_filepath(db_filepath)

        with make_osfs(archive_dir) as archive_osfs, make_version_view(
                archive_osfs,
                self._bundle_segment) as version_view, make_sv_deltas(
                    version_view, archive_primary_deltas_dir) as sv_deltas:
            if not db_exists:
                db.create_tables()

            documents_dir = f"/{self._bundle_segment}$/document$/phase2$"
            docs = set(sv_deltas.listdir(documents_dir))
            # Pass this to create citation info db in _populate_citation_info
            info_param: Tuple = (sv_deltas, documents_dir, docs)

            bundle_lidvid = _populate_bundle(changes_dict, db)
            _populate_collections(changes_dict, db)
            _populate_products(changes_dict, db, sv_deltas)
            _populate_target_identification(changes_dict, db, sv_deltas)
            _populate_citation_info(changes_dict, db, info_param)

        if not db:
            raise ValueError("db doesn't exist.")

        if not os.path.isfile(db_filepath):
            raise ValueError(f"{db_filepath} is not a file.")
Beispiel #5
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        archive_dir: str = self.archive_dir()
        deliverable_dir: str = self.deliverable_dir()
        manifest_dir: str = self.manifest_dir()
        try:
            PDS_LOGGER.open("Create deliverable directory")
            if os.path.isdir(deliverable_dir):
                raise ValueError(
                    f"{deliverable_dir} cannot exist for MakeDeliverable.")

            changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
            changes_dict = read_changes_dict(changes_path)

            with make_osfs(archive_dir) as archive_osfs, make_multiversioned(
                    archive_osfs) as mv:
                bundle_segment = self._bundle_segment
                bundle_lid = LID.create_from_parts([bundle_segment])
                bundle_vid = changes_dict.vid(bundle_lid)
                bundle_lidvid = LIDVID.create_from_lid_and_vid(
                    bundle_lid, bundle_vid)
                version_view = VersionView(mv, bundle_lidvid)

                synth_files: Dict[str, bytes] = dict()

                # open the database
                db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME)
                bundle_db = create_bundle_db_from_os_filepath(db_filepath)

                bundle_lidvid_str = str(bundle_lidvid)
                synth_files = dict()
                cm = make_checksum_manifest(bundle_db, bundle_lidvid_str,
                                            short_lidvid_to_dirpath)
                synth_files["/checksum.manifest.txt"] = cm.encode("utf-8")
                tm = make_transfer_manifest(bundle_db, bundle_lidvid_str,
                                            short_lidvid_to_dirpath)
                synth_files["/transfer.manifest.txt"] = tm.encode("utf-8")

                deliverable_view = DeliverableView(version_view, synth_files)

                os.mkdir(deliverable_dir)
                deliverable_osfs = OSFS(deliverable_dir)
                copy_fs(deliverable_view, deliverable_osfs)
                PDS_LOGGER.log("info", f"Deliverable: {deliverable_dir}")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #6
0
def _munge_lidvid(product_lidvid: str, suffix: str, new_basename: str) -> str:
    bundle_id, collection_id, product_id = LIDVID(product_lidvid).lid().parts()

    # TODO This is a hack
    collection_type = get_collection_type(suffix=suffix)
    first_underscore_idx = collection_id.index("_")
    new_collection_id = (collection_type +
                         collection_id[first_underscore_idx:-3] +
                         suffix.lower())
    # TODO This is a hack
    new_product_id = new_basename[0:9]

    new_lid = LID.create_from_parts(
        [bundle_id, new_collection_id, new_product_id])
    # TODO This is a hack.  Fix it.
    vid = VID("1.0")
    new_lidvid = LIDVID.create_from_lid_and_vid(new_lid, vid)
    return str(new_lidvid)
Beispiel #7
0
    def test_create_lid_from_parts(self) -> None:
        parts: List[str] = []
        with self.assertRaises(ValueError):
            LID.create_from_parts(parts)

        parts = ["b"]
        self.assertEqual(LID("urn:nasa:pds:b"), LID.create_from_parts(parts))

        parts = ["b", "c"]
        self.assertEqual(LID("urn:nasa:pds:b:c"), LID.create_from_parts(parts))

        parts = ["b", "c", "p"]
        self.assertEqual(LID("urn:nasa:pds:b:c:p"), LID.create_from_parts(parts))

        parts = ["b", "c", "p", "x"]
        with self.assertRaises(ValueError):
            LID.create_from_parts(parts)
Beispiel #8
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        archive_dir: str = self.archive_dir()
        archive_primary_deltas_dir: str = self.archive_primary_deltas_dir()
        archive_browse_deltas_dir: str = self.archive_browse_deltas_dir()
        archive_label_deltas_dir: str = self.archive_label_deltas_dir()

        if os.path.isdir(self.deliverable_dir()):
            raise ValueError(
                f"{self.deliverable_dir()} cannot exist for BuildLabels.")

        changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME)
        changes_dict = read_changes_dict(changes_path)

        with make_osfs(archive_dir) as archive_osfs, make_version_view(
                archive_osfs,
                self._bundle_segment) as version_view, make_sv_deltas(
                    version_view,
                    archive_primary_deltas_dir) as sv_deltas, make_sv_deltas(
                        sv_deltas, archive_browse_deltas_dir
                    ) as browse_deltas, make_sv_deltas(
                        browse_deltas,
                        archive_label_deltas_dir) as label_deltas:
            changes_path = fs.path.join(working_dir, CHANGES_DICT_NAME)
            changes_dict = read_changes_dict(changes_path)

            # open the database
            db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME)
            db = create_bundle_db_from_os_filepath(db_filepath)

            # create labels
            bundle_lid = LID.create_from_parts([self._bundle_segment])
            bundle_vid = changes_dict.vid(bundle_lid)
            bundle_lidvid = LIDVID.create_from_lid_and_vid(
                bundle_lid, bundle_vid)

            documents_dir = f"/{self._bundle_segment}$/document$/phase2$"
            docs = set(sv_deltas.listdir(documents_dir))

            # fetch citation info from database
            citation_info_from_db = db.get_citation(str(bundle_lidvid))
            info = Citation_Information(
                citation_info_from_db.filename,
                citation_info_from_db.propno,
                citation_info_from_db.category,
                citation_info_from_db.cycle,
                citation_info_from_db.authors.split(","),
                citation_info_from_db.title,
                citation_info_from_db.submission_year,
                citation_info_from_db.timing_year,
                citation_info_from_db.abstract.split("\n"),
            )
            info.set_publication_year(PUBLICATION_YEAR)

            try:
                PDS_LOGGER.open("BuildLabels")
                # create_pds4_labels() may change changes_dict, because we
                # create the context collection if it doesn't exist.
                create_pds4_labels(working_dir, db, bundle_lidvid,
                                   changes_dict, label_deltas, info)
            except Exception as e:
                PDS_LOGGER.exception(e)
            finally:
                PDS_LOGGER.close()
            write_changes_dict(changes_dict, changes_path)
Beispiel #9
0
def _build_browse_collection(
    db: BundleDB,
    changes_dict: ChangesDict,
    browse_deltas: COWFS,
    bundle_lidvid: LIDVID,
    data_collection_lidvid: LIDVID,
    bundle_path: str,
) -> None:
    bundle_segment = bundle_lidvid.lid().parts()[0]
    collection_segment = data_collection_lidvid.lid().parts()[1]

    browse_collection_lid = data_collection_lidvid.lid().to_browse_lid()
    collection_path = f"{bundle_path}{collection_segment}$/"
    browse_collection_segment = browse_collection_lid.collection_id
    browse_collection_path = f"{bundle_path}{browse_collection_segment}$/"
    browse_collection_vid = data_collection_lidvid.vid()
    browse_collection_lidvid = LIDVID.create_from_lid_and_vid(
        browse_collection_lid, browse_collection_vid)

    changes_dict.set(browse_collection_lid, browse_collection_vid, True)

    browse_deltas.makedirs(browse_collection_path, recreate=True)

    db.create_other_collection(str(browse_collection_lidvid),
                               str(bundle_lidvid))
    db.create_bundle_collection_link(str(bundle_lidvid),
                                     str(browse_collection_lidvid))

    product_segments = [
        str(prod[:-1]) for prod in browse_deltas.listdir(collection_path)
        if "$" in prod
    ]
    for product_segment in product_segments:
        # These product_segments are from the data_collection
        product_lid = LID.create_from_parts(
            [bundle_segment, collection_segment, product_segment])
        product_vid = changes_dict.vid(product_lid)

        product_path = f"{collection_path}{product_segment}$/"
        browse_product_path = f"{browse_collection_path}{product_segment}$/"

        browse_product_lidvid = _extend_lidvid(browse_collection_lid,
                                               product_vid, product_segment)

        if changes_dict.changed(product_lid):
            fits_product_lidvid = _extend_lidvid(
                data_collection_lidvid.lid(),
                data_collection_lidvid.vid(),
                product_segment,
            )

            bpl = LIDVID(browse_product_lidvid)
            changes_dict.set(bpl.lid(), bpl.vid(), True)

            browse_deltas.makedirs(browse_product_path, recreate=True)
            db.create_browse_product(
                browse_product_lidvid,
                fits_product_lidvid,
                str(browse_collection_lidvid),
            )
            db.create_collection_product_link(str(browse_collection_lidvid),
                                              browse_product_lidvid)

            for fits_file in browse_deltas.listdir(product_path):
                fits_filepath = fs.path.join(product_path, fits_file)
                fits_os_filepath = browse_deltas.getsyspath(fits_filepath)

                browse_file = fs.path.splitext(fits_file)[0] + ".jpg"
                browse_filepath = fs.path.join(browse_product_path,
                                               browse_file)

                # In a COWFS, a directory does not have a
                # syspath, only files.  So we write a stub
                # file into the directory, find its syspath
                # and its directory's syspath.  Then we remove
                # the stub file.
                browse_deltas.touch(browse_filepath)
                browse_product_os_filepath = browse_deltas.getsyspath(
                    browse_filepath)
                browse_deltas.remove(browse_filepath)

                browse_product_os_dirpath = fs.path.dirname(
                    browse_product_os_filepath)

                # Picmaker expects a list of strings.  If you give it
                # str, it'll index into it and complain about '/'
                # not being a file.  So don't do that!
                try:
                    picmaker.ImagesToPics(
                        [str(fits_os_filepath)],
                        browse_product_os_dirpath,
                        filter="None",
                        percentiles=(1, 99),
                    )
                except IndexError as e:
                    tb = traceback.format_exc()
                    message = f"File {fits_file}: {e}\n{tb}"
                    raise Exception(message)

                browse_os_filepath = fs.path.join(browse_product_os_dirpath,
                                                  browse_file)
                size = os.stat(browse_os_filepath).st_size
                db.create_browse_file(browse_os_filepath, browse_file,
                                      browse_product_lidvid, size)
        else:
            bpl = LIDVID(browse_product_lidvid)
            changes_dict.set(bpl.lid(), bpl.vid(), False)
            db.create_collection_product_link(str(browse_collection_lidvid),
                                              browse_product_lidvid)
Beispiel #10
0
    def _run(self) -> None:
        try:
            PDS_LOGGER.open("BuildBrowse")
            PDS_LOGGER.log("info", "Entering BuildBrowse.")
            working_dir: str = self.working_dir()
            archive_dir: str = self.archive_dir()
            archive_primary_deltas_dir: str = self.archive_primary_deltas_dir()
            archive_browse_deltas_dir: str = self.archive_browse_deltas_dir()

            if os.path.isdir(self.deliverable_dir()):
                raise ValueError(f"{self.deliverable_dir()} cannot exist " +
                                 "for BuildBrowse.")

            changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
            changes_dict = read_changes_dict(changes_path)

            db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME)
            db = create_bundle_db_from_os_filepath(db_filepath)

            bundle_lid = LID.create_from_parts([self._bundle_segment])
            bundle_vid = changes_dict.vid(bundle_lid)
            bundle_lidvid = LIDVID.create_from_lid_and_vid(
                bundle_lid, bundle_vid)

            with make_osfs(archive_dir) as archive_osfs, make_version_view(
                    archive_osfs,
                    self._bundle_segment) as version_view, make_sv_deltas(
                        version_view, archive_primary_deltas_dir
                    ) as sv_deltas, make_sv_deltas(
                        sv_deltas, archive_browse_deltas_dir) as browse_deltas:
                bundle_path = f"/{self._bundle_segment}$/"
                collection_segments = [
                    str(coll[:-1])
                    for coll in browse_deltas.listdir(bundle_path)
                    if "$" in coll
                ]
                for collection_segment in collection_segments:
                    collection_lid = LID.create_from_parts(
                        [self._bundle_segment, collection_segment])
                    if _requires_browse_collection(collection_segment):
                        collection_vid = changes_dict.vid(collection_lid)
                        collection_lidvid = LIDVID.create_from_lid_and_vid(
                            collection_lid, collection_vid)
                        if changes_dict.changed(collection_lid):
                            PDS_LOGGER.log(
                                "info",
                                f"Making browse for {collection_lidvid}")
                            _build_browse_collection(
                                db,
                                changes_dict,
                                browse_deltas,
                                bundle_lidvid,
                                collection_lidvid,
                                bundle_path,
                            )
                        else:
                            _fill_in_old_browse_collection(
                                db, changes_dict, bundle_lidvid,
                                collection_lidvid)

                write_changes_dict(changes_dict, changes_path)
            PDS_LOGGER.log("info", "Leaving BuildBrowse.")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #11
0
 def make_sub_lidvid(seg: str, vid_part: str) -> LIDVID:
     lid_parts = lidvid.lid().parts()
     lid_parts.append(seg)
     return LIDVID.create_from_lid_and_vid(
         LID.create_from_parts(lid_parts), VID(vid_part))