Ejemplo n.º 1
0
 def dump_dataset_headers(self):
     """Print out the matching parameters for the --datasets specified on
     the command line.
     """
     multi_context_headers = defaultdict(list)
     for context in self.contexts:
         if self.args.datasets:
             headers = api.get_dataset_headers_by_id(
                 context, self.args.datasets)
         elif self.args.instrument:
             headers = api.get_dataset_headers_by_instrument(
                 context, self.args.instrument)
         for dataset_id, header in headers.items():
             multi_context_headers[dataset_id].append((context, header))
     for dataset_id, context_headers in multi_context_headers.items():
         for (context, header) in context_headers:
             if self.args.condition_values:
                 header = utils.condition_header(header)
             if self.args.minimize_headers:
                 header = crds.get_cached_mapping(context).minimize_header(
                     header)
             if len(self.contexts) == 1:
                 print(dataset_id, ":", log.format_parameter_list(header))
             else:
                 print(dataset_id, ":", context, ":",
                       log.format_parameter_list(header))
Ejemplo n.º 2
0
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.dataset_headers)):
             pars = api.get_dataset_headers_by_id(context, self.args.dataset_headers)
             pmap = crds.get_cached_mapping(context)
             for requested_id in self.args.dataset_headers:
                 for returned_id in sorted(pars.keys()):
                     if requested_id.upper() in returned_id.upper():
                         header = pars[returned_id]
                         if isinstance(header, python23.string_types):
                             log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason
                             continue
                         if self.args.id_expansions_only:
                             print(returned_id, context if len(self.contexts) > 1 else "")
                         else:
                             if self.args.minimize_headers:
                                 header2 = pmap.minimize_header(header)
                             else:
                                 header2 = dict(header)
                             header2.pop("REFTYPE", None)
                             header2["dataset_id"] = returned_id
                             log.info("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n",
                                      log.PP(header2))
                         if self.args.first_id_expansion_only:
                             break
Ejemplo n.º 3
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset), 
                                                 "under context", repr(context)):   
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory, 
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Ejemplo n.º 4
0
 def sync_datasets(self):
     """Sync mappings and references for datasets with respect to `self.contexts`."""
     if not self.contexts:
         log.error("Define --contexts under which references are fetched for --dataset-files or --dataset-ids.""")
         sys.exit(-1)
     active_references = []
     for context in self.contexts:
         if self.args.dataset_ids:
             if len(self.args.dataset_ids) == 1 and self.args.dataset_ids[0].startswith("@"):
                 with open(self.args.dataset_ids[0][1:]) as pfile:
                     self.args.dataset_ids = pfile.read().splitlines()
             with log.error_on_exception("Failed to get matching parameters for", self.args.dataset_ids):
                 id_headers = api.get_dataset_headers_by_id(context, self.args.dataset_ids)
         for dataset in self.args.dataset_files or self.args.dataset_ids:
             log.info("Syncing context '%s' dataset '%s'." % (context, dataset))
             with log.error_on_exception("Failed to get matching parameters from", repr(dataset)):
                 if self.args.dataset_files:
                     headers = { dataset : data_file.get_conditioned_header(dataset, observatory=self.observatory) }
                 else:
                     headers = { dataset_id : header for (dataset_id, header) in id_headers.items() if
                                 dataset.upper() in dataset_id }
                 for assc_dataset, header in headers.items():
                     with log.error_on_exception("Failed syncing references for dataset", repr(assc_dataset),
                                                 "under context", repr(context)):
                         bestrefs = crds.getrecommendations(header, context=context, observatory=self.observatory,
                                                            ignore_cache=self.args.ignore_cache)
                         log.verbose("Best references for", repr(assc_dataset), "are", bestrefs)
                         active_references.extend(bestrefs.values())
     active_references = [ ref for ref in active_references if not ref.startswith("NOT FOUND") ]
     log.verbose("Syncing references:", repr(active_references))
     return list(set(active_references))
Ejemplo n.º 5
0
Archivo: list.py Proyecto: nden/crds
 def list_dataset_headers(self):
     """List dataset header info for self.args.dataset_headers with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.dataset_headers)):
             pars = api.get_dataset_headers_by_id(context, self.args.dataset_headers)
             pmap = crds.get_cached_mapping(context)
             for requested_id in self.args.dataset_headers:
                 for returned_id in sorted(pars.keys()):
                     if requested_id.upper() in returned_id.upper():
                         header = pars[returned_id]
                         if isinstance(header, python23.string_types):
                             log.error("No header for", repr(returned_id), ":", repr(header)) # header is reason
                             continue
                         if self.args.id_expansions_only:
                             print(returned_id, context if len(self.contexts) > 1 else "")
                         else:
                             if self.args.minimize_headers:
                                 header2 = pmap.minimize_header(header)
                             else:
                                 header2 = dict(header)
                             header2.pop("REFTYPE", None)
                             header2["dataset_id"] = returned_id
                             log.info("Dataset pars for", repr(returned_id), "with respect to", repr(context) + ":\n",
                                      log.PP(header2))
                         if self.args.first_id_expansion_only:
                             break
Ejemplo n.º 6
0
    def __init__(self, context, datasets, datasets_since):
        """"Contact the CRDS server and get headers for the list of `datasets` ids with respect to `context`."""
        super(DatasetHeaderGenerator, self).__init__(context, datasets, datasets_since)
        server = api.get_crds_server()
        log.info("Dumping dataset parameters from CRDS server at", repr(server), "for", repr(datasets))
        self.headers = api.get_dataset_headers_by_id(context, datasets)
        log.info("Dumped", len(self.headers), "of", len(datasets), "datasets from CRDS server at", repr(server))

        # every command line id should correspond to 1 or more headers
        for source in self.sources:
            if self.matching_two_part_id(source) not in self.headers.keys():
                log.warning("Dataset", repr(source), "isn't represented by downloaded parameters.")

        # Process according to downloaded 2-part ids,  not command line ids.
        self.sources = sorted(self.headers.keys())
Ejemplo n.º 7
0
 def list_datasets(self):
     """List dataset header info for self.args.datasets with respect to self.args.context"""
     for context in self.contexts:
         with log.error_on_exception("Failed fetching dataset parameters with repect to", repr(context), 
                                     "for", repr(self.args.datasets)):
             pars = api.get_dataset_headers_by_id(context, self.args.datasets)
             pmap = rmap.get_cached_mapping(context)
             for (dataset_id, header) in pars.items():
                 if isinstance(header, python23.string_types):
                     log.error("No header for", repr(dataset_id), ":", repr(header)) # header is reason
                     continue
                 header2 = pmap.minimize_header(header)
                 header2.pop("REFTYPE", None)
                 log.info("Dataset pars for", repr(dataset_id), "with respect to", repr(context) + ":\n",
                          log.PP(header2))
Ejemplo n.º 8
0
 def fetch_source_segment(self, source):
     """Return the segment of dataset ids which surrounds id `source`."""
     try:
         index = self.sources.index(source) // self.segment_size
     except ValueError as exc:
         raise CrdsError("Unknown dataset id " + repr(source)) from exc
     lower = index * self.segment_size
     upper = (index + 1) * self.segment_size
     segment_ids = self.sources[lower:upper]
     log.verbose("Dumping", len(segment_ids), "datasets from indices", lower, "to",
                 lower + len(segment_ids), verbosity=20)
     dumped_headers = api.get_dataset_headers_by_id(self.context, segment_ids)
     log.verbose("Dumped", len(dumped_headers), "datasets", verbosity=20)
     if self.save_pickles:  # keep all headers,  causes memory problems with multiple instruments on ~8G ram.
         self.headers.update(dumped_headers)
     else:  # conserve memory by keeping only the last N headers
         self.headers = dumped_headers
Ejemplo n.º 9
0
 def dump_dataset_headers(self):
     """Print out the matching parameters for the --datasets specified on
     the command line.
     """
     multi_context_headers = defaultdict(list)
     for context in self.contexts:
         if self.args.datasets:
             headers = api.get_dataset_headers_by_id(context, self.args.datasets)
         elif self.args.instrument:
             headers = api.get_dataset_headers_by_instrument(context, self.args.instrument)
         for dataset_id, header in headers.items():
             multi_context_headers[dataset_id].append((context, header))
     for dataset_id, context_headers in multi_context_headers.items():
         for (context, header) in context_headers:
             if self.args.condition_values:
                 header = utils.condition_header(header)
             if self.args.minimize_headers:
                 header = rmap.get_cached_mapping(context).minimize_header(header)
             if len(self.contexts) == 1:
                 print(dataset_id, ":", log.format_parameter_list(header))
             else:
                 print(dataset_id, ":", context, ":", log.format_parameter_list(header))