Beispiel #1
0
def _fill_in_old_browse_collection(
    db: BundleDB,
    changes_dict: ChangesDict,
    bundle_lidvid: LIDVID,
    data_collection_lidvid: LIDVID,
) -> None:
    bundle_segment = bundle_lidvid.lid().parts()[0]
    collection_segment = data_collection_lidvid.lid().parts()[1]

    browse_collection_lid = data_collection_lidvid.lid().to_browse_lid()
    browse_collection_segment = browse_collection_lid.collection_id
    browse_collection_vid = data_collection_lidvid.vid()
    browse_collection_lidvid = LIDVID.create_from_lid_and_vid(
        browse_collection_lid, browse_collection_vid)

    changes_dict.set(browse_collection_lid, browse_collection_vid, False)
    db.create_bundle_collection_link(str(bundle_lidvid),
                                     str(browse_collection_lidvid))
    try:
        PDS_LOGGER.open("Fill in old browse collection")
        PDS_LOGGER.log(
            "info", f"Created link and change for {browse_collection_lidvid}")
        for product in db.get_collection_products(
                str(browse_collection_lidvid)):
            product_lidvid = LIDVID(product.lidvid)
            changes_dict.set(product_lidvid.lid(), product_lidvid.vid(), False)
            PDS_LOGGER.log("info",
                           f"Created link and change for {product_lidvid}")
    except Exception as e:
        PDS_LOGGER.exception(e)
    finally:
        PDS_LOGGER.close()
Beispiel #2
0
 def _copy_docs_files(self, bundle_segment: str, documents_dir: str,
                      primary_files_dir: str) -> None:
     if not os.path.isdir(documents_dir):
         raise ValueError(f"{documents_dir} doesn't exist.")
     try:
         PDS_LOGGER.open("Copy docs files to document directory")
         with make_osfs(documents_dir) as documents_fs, make_sv_osfs(
                 primary_files_dir) as primary_files_fs:
             new_dir_path = os.path.join(
                 to_segment_dir(bundle_segment),
                 to_segment_dir("document"),
                 to_segment_dir("phase2"),
             )
             primary_files_fs.makedirs(new_dir_path)
             for file in documents_fs.walk.files():
                 file_basename = os.path.basename(file)
                 new_file_path = os.path.join(new_dir_path, file_basename)
                 PDS_LOGGER.log("info",
                                f"Copy {file_basename} to {new_file_path}")
                 fs.copy.copy_file(documents_fs, file, primary_files_fs,
                                   new_file_path)
     except Exception as e:
         PDS_LOGGER.exception(e)
     finally:
         PDS_LOGGER.close()
Beispiel #3
0
def download_product_documents(proposal_id: int, download_dir: str) -> Set[str]:
    """
    Using the templates, try to download the documentation files for
    this proposal ID into a directory and return a set of the
    basenames of the files successfully downloaded.
    """
    table: List[Tuple[str, str]] = [
        (f"https://www.stsci.edu/hst/phase2-public/{proposal_id}.apt", "phase2.apt"),
        (f"https://www.stsci.edu/hst/phase2-public/{proposal_id}.pdf", "phase2.pdf"),
        (f"https://www.stsci.edu/hst/phase2-public/{proposal_id}.pro", "phase2.pro"),
        (f"https://www.stsci.edu/hst/phase2-public/{proposal_id}.prop", "phase2.prop"),
    ]
    res: Set[str] = set()
    try:
        PDS_LOGGER.open("Download product documents")
        for (url, basename) in table:
            filepath = fs.path.join(download_dir, basename)
            if _retrieve_doc(url, filepath):
                PDS_LOGGER.log("info", f"Retrieve {basename} from {url}")
                res.add(basename)
    except Exception as e:
        PDS_LOGGER.exception(e)
    finally:
        PDS_LOGGER.close()

    return res
Beispiel #4
0
 def _delete_directory() -> None:
     PDS_LOGGER.open("Delete directory")
     for path in mast_fs.walk.dirs():
         if len(fs.path.parts(path)) == 3:
             PDS_LOGGER.log("info", f"REMOVED {path}")
             mast_fs.removetree(path)
             PDS_LOGGER.close()
             return
     raise RuntimeError(
         "Fell off the end of delete_directory in ChangeFiles.")
Beispiel #5
0
    def next_stage(self, phase: str) -> Optional[Stage]:
        def phase_index() -> int:
            for i, (name, stage) in enumerate(self.stages):
                if name == phase:
                    return i
            raise ValueError(f"unknown phase {phase}.")

        i = phase_index()
        try:
            PDS_LOGGER.log("info", f"{self.stages[i+1][0]}")
            return self.stages[i + 1][1]
        except IndexError:
            return None
Beispiel #6
0
 def _change_fits_file() -> None:
     which_file = 0
     PDS_LOGGER.open("Change fits file")
     for path in mast_fs.walk.files(filter=["*.fits"]):
         # change only the n-th FITS file then return
         if which_file == 0:
             change_fits_file(path)
             PDS_LOGGER.log("info", f"CHANGED {path}")
             PDS_LOGGER.close()
             return
         which_file = which_file - 1
     raise RuntimeError(
         "Fell off the end of change_fits_file in ChangeFiles.")
Beispiel #7
0
    def _copy_fits_files(self, bundle_segment: str, mast_downloads_dir: str,
                         primary_files_dir: str) -> None:
        if not os.path.isdir(mast_downloads_dir):
            raise ValueError(f"{mast_downloads_dir} doesn't exist.")
        try:
            PDS_LOGGER.open("Copy fits files to corresponding directories")
            with make_osfs(
                    mast_downloads_dir) as mast_downloads_fs, make_sv_osfs(
                        primary_files_dir) as primary_files_fs:

                # Walk the mast_downloads_dir for FITS file and file
                # them into the COW filesystem.
                for filepath in mast_downloads_fs.walk.files(
                        filter=["*.fits"]):
                    parts = fs.path.iteratepath(filepath)
                    depth = len(parts)
                    if depth != 3:
                        raise ValueError(f"{parts} length is not 3.")
                    # New way: product name comes from the filename
                    _, _, filename = parts
                    filename = filename.lower()
                    hst_filename = HstFilename(filename)
                    product = hst_filename.rootname()
                    instrument_name = hst_filename.instrument_name()
                    suffix = hst_filename.suffix()

                    collection_type = get_collection_type(
                        suffix=suffix, instrument_id=instrument_name)
                    coll = f"{collection_type}_{instrument_name.lower()}_{suffix}"

                    new_path = fs.path.join(
                        to_segment_dir(bundle_segment),
                        to_segment_dir(coll),
                        to_segment_dir(product),
                        filename,
                    )
                    dirs, filename = fs.path.split(new_path)
                    primary_files_fs.makedirs(dirs)
                    PDS_LOGGER.log("info", f"Copy {filename} to {new_path}")
                    fs.copy.copy_file(mast_downloads_fs, filepath,
                                      primary_files_fs, new_path)

            if not os.path.isdir(primary_files_dir + "-sv"):
                raise ValueError(f"{primary_files_dir + '-sv'} doesn't exist.")
            # # If I made it to here, it should be safe to delete the downloads
            # shutil.rmtree(mast_downloads_dir)
            # assert not os.path.isdir(mast_downloads_dir)
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #8
0
        def change_fits_file(rel_path: str) -> None:
            abs_path = fs.path.join(self.mast_downloads_dir(),
                                    fs.path.relpath(rel_path))

            from touch_fits import touch_fits

            try:
                PDS_LOGGER.open("Change fits file")
                PDS_LOGGER.log("info", f"Touching {abs_path}")
                touch_fits(abs_path)
            except Exception as e:
                PDS_LOGGER.exception(e)
            finally:
                PDS_LOGGER.close()
Beispiel #9
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        archive_dir: str = self.archive_dir()
        deliverable_dir: str = self.deliverable_dir()
        manifest_dir: str = self.manifest_dir()
        try:
            PDS_LOGGER.open("Create deliverable directory")
            if os.path.isdir(deliverable_dir):
                raise ValueError(
                    f"{deliverable_dir} cannot exist for MakeDeliverable.")

            changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
            changes_dict = read_changes_dict(changes_path)

            with make_osfs(archive_dir) as archive_osfs, make_multiversioned(
                    archive_osfs) as mv:
                bundle_segment = self._bundle_segment
                bundle_lid = LID.create_from_parts([bundle_segment])
                bundle_vid = changes_dict.vid(bundle_lid)
                bundle_lidvid = LIDVID.create_from_lid_and_vid(
                    bundle_lid, bundle_vid)
                version_view = VersionView(mv, bundle_lidvid)

                synth_files: Dict[str, bytes] = dict()

                # open the database
                db_filepath = fs.path.join(working_dir, _BUNDLE_DB_NAME)
                bundle_db = create_bundle_db_from_os_filepath(db_filepath)

                bundle_lidvid_str = str(bundle_lidvid)
                synth_files = dict()
                cm = make_checksum_manifest(bundle_db, bundle_lidvid_str,
                                            short_lidvid_to_dirpath)
                synth_files["/checksum.manifest.txt"] = cm.encode("utf-8")
                tm = make_transfer_manifest(bundle_db, bundle_lidvid_str,
                                            short_lidvid_to_dirpath)
                synth_files["/transfer.manifest.txt"] = tm.encode("utf-8")

                deliverable_view = DeliverableView(version_view, synth_files)

                os.mkdir(deliverable_dir)
                deliverable_osfs = OSFS(deliverable_dir)
                copy_fs(deliverable_view, deliverable_osfs)
                PDS_LOGGER.log("info", f"Deliverable: {deliverable_dir}")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #10
0
 def files_match(dirpath: str) -> bool:
     # All files in subcomponents will have a "$" in their path (it
     # comes after the name of the subcomponent), so by filtering
     # them out, we get only the files for this component.  PDS4
     # *does* allow directories in a component (that aren't part of
     # a subcomponent), so we use walk instead of listdir() to get
     # *all* the files, not just the top-level ones.
     primary_files = filter_to_primary_files(
         dirpath,
         (
             relpath(filepath)
             for filepath in SubFS(primary_fs, dirpath).walk.files()
             if "$" not in filepath
         ),
     )
     latest_files = filter_to_primary_files(
         dirpath,
         (
             relpath(filepath)
             for filepath in SubFS(latest_version_fs, dirpath).walk.files()
             if "$" not in filepath
         ),
     )
     try:
         PDS_LOGGER.open("File changes detected")
         if primary_files != latest_files:
             PDS_LOGGER.log(
                 "info",
                 f"CHANGE DETECTED IN {dirpath}: {primary_files} != {latest_files}",
             )
             PDS_LOGGER.close()
             return False
         for filename in primary_files:
             filepath = join(dirpath, relpath(filename))
             if primary_fs.getbytes(filepath) != latest_version_fs.getbytes(
                 filepath
             ):
                 PDS_LOGGER.log(
                     "info", f"CHANGE DETECTED IN {filepath}; DIRPATH = {dirpath}"
                 )
                 PDS_LOGGER.close()
                 return False
     except Exception as e:
         PDS_LOGGER.exception(e)
     finally:
         PDS_LOGGER.close()
     return True
Beispiel #11
0
    def _do_downloads(
        self,
        working_dir: str,
        mast_downloads_dir: str,
        proposal_id: int,
    ) -> None:
        try:
            PDS_LOGGER.open("Download datafiles")
            # first pass, <working_dir> shouldn't exist; second pass
            # <working_dir>/mastDownload should not exist.
            if os.path.isdir(mast_downloads_dir):
                raise ValueError(
                    "<working_dir>/mastDownload should not exist.")

            # TODO These dates are wrong; they potentially collect too
            # much.  Do I need to reduce the range of dates here?
            slice = MastSlice((1900, 1, 1), (2025, 1, 1), proposal_id)
            proposal_ids = slice.get_proposal_ids()
            if proposal_id not in proposal_ids:
                raise KeyError(f"{proposal_id} not in {proposal_ids}")
            # get files from full list of ACCEPTED_SUFFIXES
            product_set = slice.to_product_set(proposal_id)
            if not os.path.isdir(working_dir):
                os.makedirs(working_dir)

            # TODO I should also download the documents here.
            product_set.download(working_dir)

            # TODO This might fail if there are no files.  Which might not be
            # a bad thing.
            PDS_LOGGER.log(
                "info", f"::::::::::mast_downloads_dir: {mast_downloads_dir}")
            PDS_LOGGER.log("info",
                           f"Download datafiles to {mast_downloads_dir}")
            if not os.path.isdir(mast_downloads_dir):
                raise ValueError(f"{mast_downloads_dir} doesn't exist.")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #12
0
 def dirs_match(dirpath: str) -> bool:
     primary_dirs = filter_to_primary_dirs(
         dirpath,
         (
             relpath(dir)
             for dir in SubFS(primary_fs, dirpath).walk.dirs()
             if "$" in dir
         ),
     )
     latest_dirs = filter_to_primary_dirs(
         dirpath,
         (
             relpath(dir)
             for dir in SubFS(latest_version_fs, dirpath).walk.dirs()
             if "$" in dir
         ),
     )
     PDS_LOGGER.open("Directory changes detected")
     if primary_dirs == latest_dirs:
         for dir in primary_dirs:
             full_dirpath = join(dirpath, relpath(dir))
             lid = dirpath_to_lid(full_dirpath)
             if lid not in result.changes_dict:
                 raise KeyError(f"{lid} not in changes_dict.")
             if result.changed(lid):
                 PDS_LOGGER.log(
                     "info", f"CHANGE DETECTED in {dirpath}: {lid} changed"
                 )
                 PDS_LOGGER.close()
                 return False
         PDS_LOGGER.close()
         return True
     else:
         # list of dirs does not match
         added = primary_dirs - latest_dirs
         removed = latest_dirs - primary_dirs
         if added and removed:
             PDS_LOGGER.log(
                 "info",
                 f"CHANGE DETECTED IN {dirpath}: added {added}; removed {removed}",
             )
         elif added:
             PDS_LOGGER.log("info", f"CHANGE DETECTED IN {dirpath}: added {added}")
         else:  # removed
             PDS_LOGGER.log(
                 "info", f"CHANGE DETECTED IN {dirpath}: removed {removed}"
             )
         PDS_LOGGER.close()
         return False
Beispiel #13
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        documents_dir: str = self.documents_dir()
        mast_downloads_dir: str = self.mast_downloads_dir()
        archive_dir: str = self.archive_dir()

        if not os.path.isdir(working_dir):
            return
        for entry in os.listdir(working_dir):
            fullpath = os.path.join(working_dir, entry)
            if not (fullpath in [
                    documents_dir, mast_downloads_dir, archive_dir
            ] or fullpath.endswith(".tar.gz") or fullpath.endswith(".db")):
                if os.path.isdir(fullpath):
                    shutil.rmtree(fullpath)
                else:
                    os.unlink(fullpath)
        PDS_LOGGER.open("Re-reset pipeline")
        PDS_LOGGER.log(
            "info",
            f"contents of working_dir after re-reset: {os.listdir(working_dir)}"
        )
        PDS_LOGGER.close()
Beispiel #14
0
    def _run(self) -> None:
        working_dir: str = self.working_dir()
        primary_files_dir: str = self.primary_files_dir()
        archive_dir: str = self.archive_dir()
        archive_primary_deltas_dir: str = self.archive_primary_deltas_dir()
        try:
            PDS_LOGGER.open(
                "Create a directory for a new version of the bundle")
            if os.path.isdir(self.deliverable_dir()):
                raise ValueError(
                    f"{self.deliverable_dir()} cannot exist for InsertChanges."
                )

            changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
            with make_osfs(archive_dir) as archive_osfs, make_version_view(
                    archive_osfs, self._bundle_segment
            ) as version_view, make_sv_osfs(
                    primary_files_dir) as primary_files_osfs, make_sv_deltas(
                        version_view, archive_primary_deltas_dir) as sv_deltas:
                archive_dirs = list(archive_osfs.walk.dirs())
                changes_dict = read_changes_dict(changes_path)
                _merge_primaries(changes_dict, primary_files_osfs, sv_deltas)

            shutil.rmtree(primary_files_dir + "-sv")
            if not os.path.isdir(archive_dir):
                raise ValueError(f"{archive_dir} doesn't exist.")
            dirpath = archive_primary_deltas_dir + "-deltas-sv"
            PDS_LOGGER.log("info", f"Directory for the new version: {dirpath}")
            if not os.path.isdir(dirpath):
                raise ValueError(f"{dirpath} doesn't exist.")
            if not os.path.isfile(changes_path):
                raise ValueError(f"{changes_path} is not a file.")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()
Beispiel #15
0
def log_label(tag: str, lidvid: str) -> None:
    PDS_LOGGER.log("info", f"{tag} label for {lidvid}")
Beispiel #16
0
    def _run(self) -> None:
        try:
            PDS_LOGGER.open("BuildBrowse")
            PDS_LOGGER.log("info", "Entering BuildBrowse.")
            working_dir: str = self.working_dir()
            archive_dir: str = self.archive_dir()
            archive_primary_deltas_dir: str = self.archive_primary_deltas_dir()
            archive_browse_deltas_dir: str = self.archive_browse_deltas_dir()

            if os.path.isdir(self.deliverable_dir()):
                raise ValueError(f"{self.deliverable_dir()} cannot exist " +
                                 "for BuildBrowse.")

            changes_path = os.path.join(working_dir, CHANGES_DICT_NAME)
            changes_dict = read_changes_dict(changes_path)

            db_filepath = os.path.join(working_dir, _BUNDLE_DB_NAME)
            db = create_bundle_db_from_os_filepath(db_filepath)

            bundle_lid = LID.create_from_parts([self._bundle_segment])
            bundle_vid = changes_dict.vid(bundle_lid)
            bundle_lidvid = LIDVID.create_from_lid_and_vid(
                bundle_lid, bundle_vid)

            with make_osfs(archive_dir) as archive_osfs, make_version_view(
                    archive_osfs,
                    self._bundle_segment) as version_view, make_sv_deltas(
                        version_view, archive_primary_deltas_dir
                    ) as sv_deltas, make_sv_deltas(
                        sv_deltas, archive_browse_deltas_dir) as browse_deltas:
                bundle_path = f"/{self._bundle_segment}$/"
                collection_segments = [
                    str(coll[:-1])
                    for coll in browse_deltas.listdir(bundle_path)
                    if "$" in coll
                ]
                for collection_segment in collection_segments:
                    collection_lid = LID.create_from_parts(
                        [self._bundle_segment, collection_segment])
                    if _requires_browse_collection(collection_segment):
                        collection_vid = changes_dict.vid(collection_lid)
                        collection_lidvid = LIDVID.create_from_lid_and_vid(
                            collection_lid, collection_vid)
                        if changes_dict.changed(collection_lid):
                            PDS_LOGGER.log(
                                "info",
                                f"Making browse for {collection_lidvid}")
                            _build_browse_collection(
                                db,
                                changes_dict,
                                browse_deltas,
                                bundle_lidvid,
                                collection_lidvid,
                                bundle_path,
                            )
                        else:
                            _fill_in_old_browse_collection(
                                db, changes_dict, bundle_lidvid,
                                collection_lidvid)

                write_changes_dict(changes_dict, changes_path)
            PDS_LOGGER.log("info", "Leaving BuildBrowse.")
        except Exception as e:
            PDS_LOGGER.exception(e)
        finally:
            PDS_LOGGER.close()