Esempio n. 1
0
 def rewrite(self, filename, uniqname):
     """Add a FITS checksum to `filename.`"""
     with data_file.fits_open(filename, mode="readonly", checksum=self.args.verify_file, do_not_scale_image_data=True) as hdus:
         verify_mode = "fix+warn" if not self.args.fits_errors else "fix+exception"
         if self.args.verify_file:
             hdus.verify(verify_mode)
         basefile = os.path.basename(filename)
         baseuniq = os.path.basename(uniqname)
         if self.args.add_keywords:
             now = datetime.datetime.utcnow()
             hdus[0].header["FILENAME"] = baseuniq
             hdus[0].header["ROOTNAME"] = os.path.splitext(baseuniq)[0].upper()
             hdus[0].header["HISTORY"] = "{0} renamed to {1} on {2} {3} {4}".format(
                 basefile, baseuniq, MONTHS[now.month - 1], now.day, now.year)
         if self.args.output_path:
             uniqname = os.path.join(self.args.output_path, baseuniq)
         try:
             log.info("Rewriting", self.format_file(filename), "-->", self.format_file(uniqname))
             hdus.writeto(uniqname, output_verify=verify_mode, checksum=self.args.add_checksum)
         except Exception as exc:
             if os.path.exists(uniqname):
                 os.remove(uniqname)
             if "buffer is too small" in str(exc):
                 raise CrdsError(
                     "Failed to rename/rewrite", repr(basefile),
                     "as", repr(baseuniq), ":", 
                     "probable file truncation", ":", str(exc)) from exc
             else:
                 raise CrdsError("Failed to rename/rewrite", repr(basefile),
                                 "as", repr(baseuniq), ":",
                                 str(exc)) from exc
Esempio n. 2
0
 def check_exptypes(self):
     """Based on EXP_TYPEs defined by CAL schema and the specified instrument
     contexts, print out log info on missing or unexpected coverage.
     """
     for imap_name in self.contexts:
         i_loaded = crds.get_cached_mapping(imap_name)
         s_exp_types = self.locator.get_exptypes(i_loaded.instrument)
         for exp_type in s_exp_types:
             reftypes = self.locator.get_reftypes(exp_type)
             for filekind in i_loaded.selections:
                 ufilekind = (i_loaded.instrument.upper(), filekind.upper())
                 rmap_name = i_loaded.selections[filekind]
                 if rmap_name == 'N/A':
                     if filekind in reftypes:
                         log.verbose("Reftype rmap", repr(ufilekind), "is defined as N/A for", repr(exp_type))
                 else:
                     r_loaded = i_loaded.get_rmap(filekind)
                     r_exp_types = r_loaded.get_parkey_map().get("META.EXPOSURE.TYPE", None)
                     if r_exp_types is None:   # ???
                         log.verbose("Reftype", repr(ufilekind), "does not match using EXP_TYPE.")
                     elif exp_type in r_exp_types:
                         if filekind in reftypes:
                             log.verbose("Reftype", repr(ufilekind), "explicitly mentions", repr(exp_type))
                         else:
                             log.warning("Reftype", repr(ufilekind), "has unexpected coverage for", repr(exp_type))
                     elif "ANY" in r_exp_types or "N/A" in r_exp_types:
                         log.verbose("Reftype", repr(ufilekind), "is satisfied by ANY or N/A for", repr(exp_type))
                     elif filekind in reftypes:
                         log.info("Reftype", repr(ufilekind), "is missing coverage for", repr(exp_type))
                     else:
                         log.verbose("Reftype", repr(ufilekind), "has no expected coverage for", repr(exp_type))
Esempio n. 3
0
def wfpc2_flatfile_filter(kmap):
    log.info("Hacking WFPC2 Flatfile.")
    # :  ('MODE', 'FILTER1', 'FILTER2', 'IMAGETYP', 'FILTNAM1', 'FILTNAM2', 'LRFWAVE'), ('DATE-OBS', 'TIME-OBS')),
    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >3000 and <=4200 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >3000 and <=4200 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')]
    
    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >4200 and <=5800 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >4200 and <=5800 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')]
    
    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >5800 and <=7600 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >5800 and <=7600 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')]
    
    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >7600 and <=10000 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >7600 and <=10000 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')]

    header_additions = [
        ("hooks", {
            "fallback_header" : "fallback_header_wfpc2_flatfile_v1",
        }),
    ]
    
    return kmap, header_additions
Esempio n. 4
0
    def ingest_files(self):
        """Copy self.files into the user's ingest directory on the CRDS server."""
        stats = self._start_stats()
        destination = self.submission_info.ingest_dir
        host, path = destination.split(":")
        self.ensure_ingest_exists(host, path)
        total_size = utils.total_size(self.files)

        ingest_info = self.get_ingested_files()

        self.scan_for_nonsubmitted_ingests(ingest_info)

        remaining_files = self.keep_existing_files(ingest_info, self.files) \
            if self.args.keep_existing_files else self.files

        for i, filename in enumerate(remaining_files):
            file_size = utils.file_size(filename)
            log.info("Copy started", repr(filename), "[", i+1, "/", len(self.files), " files ]",
                     "[", utils.human_format_number(file_size), 
                     "/", utils.human_format_number(total_size), " bytes ]")
            self.copy_file(filename, path, destination)
            stats.increment("bytes", file_size)
            stats.increment("files", 1)
            stats.log_status("files", "Copy complete", len(self.files))
            stats.log_status("bytes", "Copy complete", total_size)

        log.divider(func=log.verbose)
        stats.report()
        log.divider(char="=")
Esempio n. 5
0
    def ingest_files(self):
        """Copy self.files into the user's ingest directory on the CRDS server."""
        stats = self._start_stats()
        destination = self.submission_info.ingest_dir
        host, path = destination.split(":")
        self.ensure_ingest_exists(host, path)
        total_size = utils.total_size(self.files)

        ingest_info = self.get_ingested_files()

        self.scan_for_nonsubmitted_ingests(ingest_info)

        remaining_files = self.keep_existing_files(ingest_info, self.files) \
            if self.args.keep_existing_files else self.files

        for i, filename in enumerate(remaining_files):
            file_size = utils.file_size(filename)
            log.info("Copy started", repr(filename), "[", i + 1, "/",
                     len(self.files), " files ]", "[",
                     utils.human_format_number(file_size), "/",
                     utils.human_format_number(total_size), " bytes ]")
            self.copy_file(filename, path, destination)
            stats.increment("bytes", file_size)
            stats.increment("files", 1)
            stats.log_status("files", "Copy complete", len(self.files))
            stats.log_status("bytes", "Copy complete", total_size)

        log.divider(func=log.verbose)
        stats.report()
        log.divider(char="=")
Esempio n. 6
0
def wfpc2_flatfile_filter(kmap):
    log.info("Hacking WFPC2 Flatfile.")
    # :  ('MODE', 'FILTER1', 'FILTER2', 'IMAGETYP', 'FILTNAM1', 'FILTNAM2', 'LRFWAVE'), ('DATE-OBS', 'TIME-OBS')),
    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >3000 and <=4200 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >3000 and <=4200 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10045u.r4h', comment='')]

    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >4200 and <=5800 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >4200 and <=5800 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004fu.r4h', comment='')]

    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >5800 and <=7600 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >5800 and <=7600 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c1004nu.r4h', comment='')]

    kmap[('*',    '*',       '*',       'EXT',       'FR*',     '*',      '# >7600 and <=10000 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')]
    kmap[('*',    '*',       '*',       'EXT',       '*',     'FR*',      '# >7600 and <=10000 #')] = \
           [rmap.Filemap(date='1990-01-01 00:00:00', file='m3c10052u.r4h', comment='')]

    header_additions = [
        ("hooks", {
            "fallback_header" : "fallback_header_wfpc2_flatfile_v1",
        }),
    ]

    return kmap, header_additions
Esempio n. 7
0
 def polled(self):
     """Output the latest affected datasets taken from the history starting item onward.
     Since the history drives and ultimately precedes any affected datasets computation,  there's
     no guarantee that every history item is available.
     """
     assert 0 <= self.history_start < len(
         self.history
     ), "Invalid history interval with starting index " + repr(
         self.history_start)
     assert 0 <= self.history_stop < len(
         self.history
     ), "Invalid history interval with stopping index " + repr(
         self.history_stop)
     assert self.history_start <= self.history_stop, "Invalid history interval,  start >= stop."
     effects = []
     for i in range(self.history_start, self.history_stop):
         old_context = self.history[i][1]
         new_context = self.history[i + 1][1]
         if old_context > new_context:  # skip over backward transitions,  no output.
             continue
         log.info("Fetching effects for", (i, ) + self.history[i + 1])
         affected = self.get_affected(old_context, new_context)
         if affected:
             effects.append((i, affected))
     return effects
Esempio n. 8
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset),
                                                 "under context", repr(context)):
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory,
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Esempio n. 9
0
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
Esempio n. 10
0
 def wipe_files(self):
     """Delete all files from the user's ingest directory on the CRDS server."""
     log.divider(name="wipe files", char="=")
     ingest_info = self.get_ingested_files()
     for basename in ingest_info:
         log.info("Wiping file", repr(basename))
         self.connection.get(ingest_info[basename]["deleteUrl"])
Esempio n. 11
0
    def ingest_files(self):
        """Upload self.files to the user's ingest directory on the CRDS server."""
        stats = self._start_stats()
        total_size = utils.total_size(self.files)

        ingest_info = self.get_ingested_files()

        self.scan_for_nonsubmitted_ingests(ingest_info)

        remaining_files = self.keep_existing_files(ingest_info, self.files) \
            if self.args.keep_existing_files else self.files

        for i, filename in enumerate(remaining_files):
            file_size = utils.file_size(filename)
            log.info("Upload started", repr(filename), "[", i + 1, "/",
                     len(self.files), " files ]", "[",
                     utils.human_format_number(file_size), "/",
                     utils.human_format_number(total_size), " bytes ]")
            self.connection.upload_file("/upload/new/", filename)
            stats.increment("bytes", file_size)
            stats.increment("files", 1)
            stats.log_status("files", "Upload complete", len(self.files))
            stats.log_status("bytes", "Upload complete", total_size)

        log.divider(func=log.verbose)
        stats.report()
        log.divider(char="=")
Esempio n. 12
0
 def download_files(self, downloads, localpaths):
     """Serial file-by-file download."""
     download_metadata = get_download_metadata()
     self.info_map = {}
     for filename in downloads:
         self.info_map[filename] = download_metadata.get(
             filename, "NOT FOUND unknown to server")
     if config.writable_cache_or_verbose(
             "Readonly cache, skipping download of (first 5):",
             repr(downloads[:5]),
             verbosity=70):
         bytes_so_far = 0
         total_files = len(downloads)
         total_bytes = get_total_bytes(self.info_map)
         for nth_file, name in enumerate(downloads):
             try:
                 if "NOT FOUND" in self.info_map[name]:
                     raise CrdsDownloadError(
                         "file is not known to CRDS server.")
                 bytes, path = self.catalog_file_size(
                     name), localpaths[name]
                 log.info(
                     file_progress("Fetching", name, path, bytes,
                                   bytes_so_far, total_bytes, nth_file,
                                   total_files))
                 self.download(name, path)
                 bytes_so_far += os.stat(path).st_size
             except Exception as exc:
                 if self.raise_exceptions:
                     raise
                 else:
                     log.error("Failure downloading file", repr(name), ":",
                               str(exc))
         return bytes_so_far
     return 0
Esempio n. 13
0
 def verify_context_change(self, old_context):
     """Verify that the starting and post-sync contexts are different,  or issue an error."""
     new_context = heavy_client.load_server_info(self.observatory).operational_context
     if old_context == new_context:
         log.error("Expected operational context switch but starting and post-sync contexts are both", repr(old_context))
     else:
         log.info("Operational context updated from", repr(old_context), "to",  repr(new_context))
Esempio n. 14
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Esempio n. 15
0
    def main(self):
        """Generate names corrsponding to files listed on the command line."""
        if self.args.standard:
            self.args.add_keywords = True
            self.args.verify_file = True

        if not self.args.files:
            return

        for filename in self.files:
            assert config.is_reference(filename), \
                "File " + repr(filename) + " does not appear to be a reference file.  Only references can be renamed."
            uniqname = naming.generate_unique_name(filename, self.observatory)
            if self.args.dry_run:
                log.info("Would rename", self.format_file(filename), "-->",
                         self.format_file(uniqname))
            else:
                self.rewrite(filename, uniqname)
                if self.args.remove_original:
                    os.remove(filename)

        # XXXX script returns filename result not suitable as program exit status
        # XXXX filename result is insufficient if multiple files are specified.
        # XXXX filename result supports embedded use on web server returning new name.
        return uniqname
Esempio n. 16
0
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text, new_text, *args, **keys):
    log.info("Adding substitution for", srepr(parameter_name), 
             "from", srepr(old_text), "to", srepr(new_text), "in", srepr(rmapping.basename))
    new_mapping = rmapping.copy()
    if "substitutions" not in new_mapping.header:
        new_mapping.header["substitutions"] = {}
    new_mapping.header["substitutions"][parameter_name] = { old_text : new_text }
    new_mapping.write(new_filename)
Esempio n. 17
0
 def push_context(self):
     """Push the final context recorded in the local cache to the CRDS server so it can be displayed
     as the operational state of a pipeline.
     """
     info = heavy_client.load_server_info(self.observatory)
     with log.error_on_exception("Failed pushing cached operational context name to CRDS server"):
         api.push_remote_context(self.observatory, "operational", self.args.push_context, info.operational_context)
         log.info("Pushed cached operational context name", repr(info.operational_context), "to CRDS server")
Esempio n. 18
0
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file])
Esempio n. 19
0
 def error_and_repair(self, file, *args, **keys):
     """Issue an error message and repair `file` if requested by command line args."""
     log.error(*args, **keys)
     if self.args.repair_files:
         if config.writable_cache_or_info("Skipping remove and re-download of", repr(file)):
             log.info("Repairing file", repr(file))
             utils.remove(file, observatory=self.observatory)
             self.dump_files(self.default_context, [file]) 
Esempio n. 20
0
 def push_context(self):
     """Push the final context recorded in the local cache to the CRDS server so it can be displayed
     as the operational state of a pipeline.
     """
     info = heavy_client.load_server_info(self.observatory)
     with log.error_on_exception("Failed pushing cached operational context name to CRDS server"):
         api.push_remote_context(self.observatory, "operational", self.args.push_context, info.operational_context)
         log.info("Pushed cached operational context name", repr(info.operational_context), "to CRDS server")
Esempio n. 21
0
 def get_ingested_files(self):
     """Return the server-side JSON info on the files already in the submitter's ingest directory."""
     log.info("Determining existing files.")
     result = self.connection.get('/upload/list/').json()
     log.verbose("JSON info on existing ingested files:\n", log.PP(result))
     if "files" in result and isinstance(result["files"], list):
         return { info["name"] : info for info in result["files"] }
     else:
         return { info["name"] : info for info in result }
Esempio n. 22
0
 def _start_stats(self):
     """Helper method to initialize stats keeping for ingest."""
     total_bytes = utils.total_size(self.files)
     stats = utils.TimingStats(output=log.verbose)
     stats.start()
     log.divider(name="ingest files", char="=")
     log.info("Copying", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes")
     log.divider(func=log.verbose)
     return stats
Esempio n. 23
0
 def get_ingested_files(self):
     """Return the server-side JSON info on the files already in the submitter's ingest directory."""
     log.info("Determining existing files.")
     result = self.connection.get('/upload/list/').json()
     log.verbose("JSON info on existing ingested files:\n", log.PP(result))
     if "files" in result and isinstance(result["files"], list):
         return {info["name"]: info for info in result["files"]}
     else:
         return {info["name"]: info for info in result}
Esempio n. 24
0
def save_json_specs(specs, combined_specs_path):
    """Write out the specs dictionary returned by _load_specs() as .json in one combined file."""
    specs_json = json.dumps(specs,
                            indent=4,
                            sort_keys=True,
                            separators=(',', ':'))
    with open(combined_specs_path, "w+") as specs_file:
        specs_file.write(specs_json)
        log.info("Saved combined type specs to", repr(combined_specs_path))
Esempio n. 25
0
 def _start_stats(self):
     """Helper method to initialize stats keeping for ingest."""
     total_bytes = utils.total_size(self.files)
     stats = utils.TimingStats(output=log.verbose)
     stats.start()
     log.divider(name="ingest files", char="=")
     log.info("Uploading", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes")
     log.divider(func=log.verbose)
     return stats
Esempio n. 26
0
def set_rmap_substitution(rmapping, new_filename, parameter_name, old_text,
                          new_text, *args, **keys):
    log.info("Adding substitution for", srepr(parameter_name), "from",
             srepr(old_text), "to", srepr(new_text), "in",
             srepr(rmapping.basename))
    new_mapping = rmapping.copy()
    if "substitutions" not in new_mapping.header:
        new_mapping.header["substitutions"] = {}
    new_mapping.header["substitutions"][parameter_name] = {old_text: new_text}
    new_mapping.write(new_filename)
Esempio n. 27
0
 def clear_pickles(self):
     """Remove all pickles."""
     log.info(
         "Removing all context pickles.  Use --save-pickles to recreate for specified contexts."
     )
     for path in rmap.list_pickles("*.pmap",
                                   self.observatory,
                                   full_path=True):
         if os.path.exists(path):
             utils.remove(path, self.observatory)
Esempio n. 28
0
 def _submission(self, relative_url):
     """Do a generic submission re-post to the specified relative_url."""
     assert self.args.description is not None, "You must supply a --description for this function."
     self.ingest_files()
     log.info("Posting web request for", srepr(relative_url))
     submission_args = self.get_submission_args()
     completion_args = self.connection.repost_start(relative_url, **submission_args)
     # give POST time to complete send, not response
     time.sleep(10)
     return completion_args
Esempio n. 29
0
def update_header_names(name_map):
    """Update the .name and .derived_from fields in mapping new_path.header
    to reflect derivation from old_path and name new_path.
    """
    for old_path, new_path in sorted(name_map.items()):
        old_base, new_base = os.path.basename(old_path), os.path.basename(new_path)
        refactor.update_derivation(new_path, old_base)
        log.info("Adjusting name", repr(new_base), "derived_from", repr(old_base), 
                 "in", repr(new_path))
    return name_map # no change
Esempio n. 30
0
def hack_in_new_maps(old, new, updated_maps):
    """Given mapping named `old`,  create a modified copy named `new` which
    installs each map of `updated_maps` in place of it's predecessor.
    """
    copy_mapping(old, new)  
    for mapping in sorted(updated_maps):
        key, replaced, replacement = insert_mapping(new, mapping)
        if replaced:
            log.info("Replaced", repr(replaced), "with", repr(replacement), "for", repr(key), "in", repr(old), "producing", repr(new))
        else:
            log.info("Added", repr(replacement), "for", repr(key), "in", repr(old), "producing", repr(new))
Esempio n. 31
0
def get_data_model_flat_dict(filepath):
    """Get the header from `filepath` using the jwst data model."""
    datamodels = get_datamodels()
    log.info("Checking JWST datamodels.")
    # with log.error_on_exception("JWST Data Model (jwst.datamodels)"):
    try:
        with datamodels.open(filepath) as d_model:
            flat_dict = d_model.to_flat_dict(include_arrays=False)
    except Exception as exc:
        raise exceptions.ValidationError("JWST Data Models:", str(exc).replace("u'","'")) from exc
    return flat_dict
Esempio n. 32
0
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys):
    """Delete `parameter_name` from the parkey item of the `types` of the specified
    `instruments` in `context`.
    """
    log.info("Deleting parameter", repr(parameter), "from",repr(rmapping.basename))
    parkey = rmapping.parkey
    i, j = get_parameter_index(parkey, parameter)
    del_parkey = parkey[:i] +  ((parkey[i][:j] + parkey[i][j+1:]),)  + parkey[i+1:]
    log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in", srepr(rmapping.basename))
    rmapping.header["parkey"] = del_parkey
    rmapping.selector.delete_match_param(parameter)
    rmapping.write(new_filename)
Esempio n. 33
0
 def wipe_files(self):
     """Copy self.files into the user's ingest directory on the CRDS server."""
     destination = self.submission_info.ingest_dir
     log.divider(name="wipe files", char="=")
     log.info("Wiping files at", repr(destination))
     host, path = destination.split(":")
     if destination.startswith(socket.gethostname()):
         output = pysh.out_err("rm -vf  ${path}/*")
     else:
         output = pysh.out_err("ssh ${host} rm -vf ${path}/*")
     if output:
         log.verbose(output)
Esempio n. 34
0
 def wipe_files(self):
     """Copy self.files into the user's ingest directory on the CRDS server."""
     destination = self.submission_info.ingest_dir
     log.divider(name="wipe files", char="=")
     log.info("Wiping files at", repr(destination))
     host, path = destination.split(":")
     if destination.startswith(socket.gethostname()):
         output = pysh.out_err("rm -vf  ${path}/*")
     else:
         output = pysh.out_err("ssh ${host} rm -vf ${path}/*")
     if output:
         log.verbose(output)
Esempio n. 35
0
def remove_checksum(file_):
    """Remove checksums from `file_`."""
    log.info("Removing checksum for", repr(file_))
    if config.is_reference(file_):
        data_file.remove_checksum(file_)
    elif rmap.is_mapping(file_):
        raise exceptions.CrdsError("Mapping checksums cannot be removed for:",
                                   repr(file_))
    else:
        raise exceptions.CrdsError(
            "File", repr(file_),
            "does not appear to be a CRDS reference or mapping file.")
Esempio n. 36
0
 def main(self):
     """Check files for availability from the archive."""
     self.require_server_connection()
     log.info("Mapping URL:", repr(self.mapping_url))
     log.info("Reference URL:", repr(self.reference_url))
     stats = utils.TimingStats()
     self.init_files(self.files)
     for filename in self.files:
         self.verify_archive_file(filename)
         stats.increment("files")
     self.print_files()
     stats.report_stat("files")
     log.standard_status()
Esempio n. 37
0
def add_checksum(file_):
    """Add checksums to file_."""
    log.info("Adding checksum for", repr(file_))
    if config.is_reference(file_):
        with log.error_on_exception("Failed updating checksum for",
                                    repr(file_)):
            data_file.add_checksum(file_)
    elif rmap.is_mapping(file_):
        update_mapping_checksum(file_)
    else:
        raise exceptions.CrdsError(
            "File", repr(file_),
            "does not appear to be a CRDS reference or mapping file.")
Esempio n. 38
0
 def _setup_source_context(self):
     """Default the --source-context if necessary and then translate any symbolic name to a literal .pmap
     name.  e.g.  jwst-edit -->  jwst_0109.pmap.   Then optionally sync the files to a local cache.
     """
     if self.args.source_context is None:
         self.source_context = self.observatory + "-edit"
         log.info("Defaulting --source-context to", srepr(self.source_context))
     else:
         self.source_context = self.args.source_context
     self.source_context = self.resolve_context(self.source_context)
     if self.args.sync_files:
         errs = sync.SyncScript("crds.sync --contexts {}".format(self.source_context))()
         assert not errs, "Errors occurred while syncing all rules to CRDS cache."
Esempio n. 39
0
 def remove_dir(self, instrument):
     """Remove an instrument cache directory and any associated legacy link."""
     if config.writable_cache_or_info("Skipping remove instrument", repr(instrument), "directory."):
         crds_refpath = config.get_crds_refpath(self.observatory)
         prefix = self.locator.get_env_prefix(instrument)
         rootdir = os.path.join(crds_refpath, instrument)
         refdir = os.path.join(crds_refpath, prefix[:-1])
         if len(glob.glob(os.path.join(rootdir, "*"))):
             log.info("Residual files in '{}'. Not removing.".format(rootdir))
             return
         if os.path.exists(refdir):   # skip crds://  vs.  oref
             utils.remove(refdir, observatory=self.observatory)
         utils.remove(rootdir, observatory=self.observatory)
Esempio n. 40
0
 def remove_dir(self, instrument):
     """Remove an instrument cache directory and any associated legacy link."""
     if config.writable_cache_or_info("Skipping remove instrument", repr(instrument), "directory."):
         crds_refpath = config.get_crds_refpath(self.observatory)
         prefix = self.locator.get_env_prefix(instrument)
         rootdir = os.path.join(crds_refpath, instrument)
         refdir = os.path.join(crds_refpath, prefix[:-1])
         if len(glob.glob(os.path.join(rootdir, "*"))):
             log.info("Residual files in '{}'. Not removing.".format(rootdir))
             return
         if os.path.exists(refdir):   # skip crds://  vs.  oref
             utils.remove(refdir, observatory=self.observatory)
         utils.remove(rootdir, observatory=self.observatory)
Esempio n. 41
0
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     result = message.data.get("result", None)
     if status == 0:
         log.info(self.format_remote("COMPLETED:", result))
     elif status == 1:
         log.error(self.format_remote("FAILED:", result))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", result))
     else:
         log.info(self.format_remote("DONE:", result))
     self.result = result
     return result
Esempio n. 42
0
 def handle_done(self, message):
     """Generic "done" handler issue info() message and stops monitoring / exits."""
     status = message.data["status"]
     result = message.data.get("result", None)
     if status == 0:
         log.info(self.format_remote("COMPLETED:", result))
     elif status == 1:
         log.fatal_error(self.format_remote("FAILED:", result))
     elif status == 2:
         log.error(self.format_remote("CANCELLED:", result))
     else:
         log.info(self.format_remote("DONE:", result))
     self.result = result
     return result
Esempio n. 43
0
 def _setup_source_context(self):
     """Default the --source-context if necessary and then translate any symbolic name to a literal .pmap
     name.  e.g.  jwst-edit -->  jwst_0109.pmap.   Then optionally sync the files to a local cache.
     """
     if self.args.source_context is None:
         self.source_context = self.observatory + "-edit"
         log.info("Defaulting --source-context to",
                  srepr(self.source_context))
     else:
         self.source_context = self.args.source_context
     self.source_context = self.resolve_context(self.source_context)
     if self.args.sync_files:
         errs = sync.SyncScript("crds.sync --contexts {}".format(
             self.source_context))()
         assert not errs, "Errors occurred while syncing all rules to CRDS cache."
Esempio n. 44
0
def del_rmap_parameter(rmapping, new_filename, parameter, *args, **keys):
    """Delete `parameter_name` from the parkey item of the `types` of the specified
    `instruments` in `context`.
    """
    log.info("Deleting parameter", repr(parameter), "from",
             repr(rmapping.basename))
    parkey = rmapping.parkey
    i, j = get_parameter_index(parkey, parameter)
    del_parkey = parkey[:i] + (
        (parkey[i][:j] + parkey[i][j + 1:]), ) + parkey[i + 1:]
    log.verbose("Replacing", srepr(parkey), "with", srepr(del_parkey), "in",
                srepr(rmapping.basename))
    rmapping.header["parkey"] = del_parkey
    rmapping.selector.delete_match_param(parameter)
    rmapping.write(new_filename)
Esempio n. 45
0
def mapping_check_diffs(mapping, derived_from):
    """Issue warnings for *deletions* in self relative to parent derived_from
    mapping.  Issue warnings for *reversions*,  defined as replacements which
    where the replacement is older than the original,  as defined by the names.   
    
    This is intended to check for missing modes and for inadvertent reversions
    to earlier versions of files.   For speed and simplicity,  file time order
    is currently determined by the names themselves,  not file contents, file
    system,  or database info.
    """
    mapping = rmap.asmapping(mapping, cached="readonly")
    derived_from = rmap.asmapping(derived_from, cached="readonly")
    log.info("Checking diffs from", repr(derived_from.basename), "to", repr(mapping.basename))
    diffs = derived_from.difference(mapping)
    mapping_check_diffs_core(diffs)
Esempio n. 46
0
def check_sha1sums(filepaths, observatory=None):
    """Given a list of filepaths which nominally will be submitted
    to CRDS for project `observatory`,  check to see if any are
    bit-for-bit identical with existing files as determined by
    the CRDS server's catalog and sha1sum matching.
    
    filepaths  [str, ...]   paths of files to be checked for preexistence
    observartory   str    e.g. 'hst' or 'jwst'

    Returns    count of duplicate files
    """
    log.info("Checking local file sha1sums vs. CRDS server to identify files already in CRDS.")
    sha1sums = get_all_sha1sums(observatory)
    for filepath in filepaths:
        check_sha1sum(filepath, sha1sums, observatory)
Esempio n. 47
0
    def __init__(self, context, datasets, datasets_since):
        """"Contact the CRDS server and get headers for the list of `datasets` ids with respect to `context`."""
        super(DatasetHeaderGenerator, self).__init__(context, datasets, datasets_since)
        server = api.get_crds_server()
        log.info("Dumping dataset parameters from CRDS server at", repr(server), "for", repr(datasets))
        self.headers = api.get_dataset_headers_by_id(context, datasets)
        log.info("Dumped", len(self.headers), "of", len(datasets), "datasets from CRDS server at", repr(server))

        # every command line id should correspond to 1 or more headers
        for source in self.sources:
            if self.matching_two_part_id(source) not in self.headers.keys():
                log.warning("Dataset", repr(source), "isn't represented by downloaded parameters.")

        # Process according to downloaded 2-part ids,  not command line ids.
        self.sources = sorted(self.headers.keys())
Esempio n. 48
0
def set_rmap_parkey(rmapping, new_filename, parkey, *args, **keys):
    """Set the parkey of `rmapping` to `parkey` and write out to `new_filename`.
    """
    log.info("Setting parkey, removing all references from", srepr(rmapping.basename))
    pktuple = eval(parkey)
    required_keywords = tuple(utils.flatten(pktuple))
    refnames = rmapping.reference_names()
    references_headers = { refname : get_refactoring_header(rmapping.filename, refname, required_keywords)
                           for refname in refnames }
    rmapping = rmap_delete_references(rmapping.filename, new_filename, refnames)
    log.info("Setting parkey", srepr(parkey), "in", srepr(rmapping.basename))
    rmapping.header["parkey"] = pktuple
    rmapping.write(new_filename)
    rmapping = rmap.load_mapping(new_filename)
    rmapping = rmap_insert_references_by_matches(new_filename, new_filename, references_headers)
    return rmapping
Esempio n. 49
0
 def _process_rmap(self, func, rmapping, *args, **keys):
     """Execute `func` on a single `rmapping` passing along *args and **keys"""
     keywords = dict(keys)
     rmapping_org = rmapping
     new_filename  = rmapping.filename if self.args.inplace else os.path.join(".", rmapping.basename)
     if os.path.exists(new_filename):
         log.info("Continuing refactoring from local copy", srepr(new_filename))
         rmapping = rmap.load_mapping(new_filename)
     keywords.update(locals())
     fixers = self.args.fixers
     if fixers:
         rmapping = rmap.load_mapping(rmapping.filename)
         keywords.update(locals())
         apply_rmap_fixers(*args, **keywords)
     func(*args, **keywords)
     return new_filename
Esempio n. 50
0
 def get_affected(self, old_context, new_context):
     """Return the affected datasets Struct for the transition from old_context to new_context,  
     or None if the results aren't ready yet.   
     """
     try:
         affected = api.get_affected_datasets(self.observatory, old_context, new_context)
     except Exception as exc:
         if "No precomputed affected datasets results exist" in str(exc):
             if self.args.ignore_missing_results:
                 log.info("No results for", old_context, "-->", new_context, "ignoring and proceeding.")
                 affected = None
             else:
                 self.fatal_error("Results for", old_context, "-->", new_context, "don't exist or are not yet complete.")
         else:
             self.fatal_error("get_affected_datasets failed: ", str(exc).replace("OtherError:",""))
     return affected
Esempio n. 51
0
def cat_rmap(rmapping, new_filename, header_key, *args, **keys):
    """Cat/print rmapping's source text or the value of `header_key` in the rmap header."""
    if header_key is not None:
        log.info("In", srepr(rmapping.basename), "parameter", srepr(header_key), "=", srepr(rmapping.header[header_key]))
    else:
        log.info("-"*80)
        log.info("Rmap", srepr(rmapping.basename), "is:")
        log.info("-"*80)
        log.write(str(rmapping))
Esempio n. 52
0
 def insert_references(self):
     """Insert files specified by --references into the appropriate rmaps identified by --source-context."""
     self._setup_source_context()
     categorized = self.categorize_files(self.args.references)
     pmap = crds.get_pickled_mapping(self.source_context)  # reviewed
     self.args.rmaps = []
     for (instrument, filekind) in categorized:
         try:
             self.args.rmaps.append(pmap.get_imap(instrument).get_rmap(filekind).filename)
         except crexc.CrdsError:
             log.info("Existing rmap for", (instrument, filekind), "not found.  Trying empty spec.")
             spec_file = os.path.join(
                 os.path.dirname(self.obs_pkg.__file__), "specs", instrument + "_" + filekind + ".rmap")
             rmapping = rmap.asmapping(spec_file)
             log.info("Loaded spec file from", repr(spec_file))
             self.args.rmaps.append(spec_file)
     self.rmap_apply(insert_rmap_references, categorized=categorized)
Esempio n. 53
0
 def polled(self):
     """Output the latest affected datasets taken from the history starting item onward.
     Since the history drives and ultimately precedes any affected datasets computation,  there's
     no guarantee that every history item is available.
     """
     assert 0 <= self.history_start < len(self.history), "Invalid history interval with starting index " + repr(self.history_start)
     assert 0 <= self.history_stop  < len(self.history), "Invalid history interval with stopping index " + repr(self.history_stop)
     assert self.history_start <= self.history_stop, "Invalid history interval,  start >= stop."
     effects = []
     for i in range(self.history_start, self.history_stop):
         log.info("Fetching effects for", (i,) + self.history[i+1])
         old_context = self.history[i][1]
         new_context = self.history[i+1][1]
         affected = self.get_affected(old_context, new_context)
         if affected:
             effects.append((i, affected))
     return effects
Esempio n. 54
0
 def _submission(self, relative_url):
     """Do a generic submission re-post to the specified relative_url."""
     assert self.args.description is not None, "You must supply a --description for this function."
     self.ingest_files()
     log.info("Posting web request for", srepr(relative_url))
     completion_args = self.connection.repost_start(
         relative_url,
         pmap_mode = self.pmap_mode,
         pmap_name = self.pmap_name,
         instrument = self.instrument,
         change_level=self.args.change_level,
         creator=self.args.creator,
         description=self.args.description,
         auto_rename=not self.args.dont_auto_rename,
         compare_old_reference=not self.args.dont_compare_old_reference,
         )
     # give POST time to complete send, not response
     time.sleep(10)
     return completion_args
Esempio n. 55
0
    def handle_misc_switches(self):
        """Handle command line switches with simple side-effects that should precede
        other sync operations.
        """
        if self.args.dry_run:
            config.set_cache_readonly(True)

        if self.args.repair_files:
            self.args.check_files = True

        if self.args.output_dir:
            os.environ["CRDS_MAPPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_REFPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_CFGPATH_SINGLE"] = self.args.output_dir
            os.environ["CRDS_PICKLEPATH_SINGLE"] = self.args.output_dir

        if self.readonly_cache:
            log.info("Syncing READONLY cache,  only checking functions are enabled.")
            log.info("All cached updates, context changes, and file downloads are inhibited.")
Esempio n. 56
0
 def log_all_ids(self, effects, ids):
     """PLUGIN: Summary output after all contexts processed."""
     if self.args.quiet:
         return 
     if not effects:
         log.info("No new results are available.")
     else:
         if not ids:
             log.info("No ids were affected.")
         print("#"*100, file=sys.stderr)
         log.info("Contributing context switches =", len(effects))
         log.info("Total products affected =", len(ids))
     log.standard_status()