def mock_get_data_label(uri): ignore_scheme, ignore_collection, f_name = mc.decompose_uri(uri) file_id = GemName.remove_extensions(f_name) temp = mock_get_obs_metadata(file_id) result = None for ii in temp: y = obs_file_relationship.remove_extensions(ii.get('filename')) if y == file_id: result = ii.get('data_label') break return result
def add(self, metadata, file_id): # the json summary results are a list, keep the entry in the list # which has the information for a particular file_id if isinstance(metadata, list): for entry in metadata: temp_file_id = GemName.remove_extensions(entry.get('filename')) if temp_file_id is not None and file_id == temp_file_id: self.lookup[file_id] = entry break else: self.lookup[file_id] = metadata self.current = file_id
def _get_index(self, file_id): result = -1 for index, value in enumerate(self.current): indexed_f_name = mc.response_lookup(value, 'filename') if indexed_f_name is not None: temp = GemName.remove_extensions(indexed_f_name) if temp == file_id: result = index break if result == -1: # TODO - set obs id? raise mc.CadcException( 'JSON Summary: unrecognized file_id {} in obs_id {}'.format( file_id, '')) return result
def _do_prev(file_id, working_dir, plane, cadc_client, stream): gem_name = GemName('{}.jpg'.format(file_id)) preview = gem_name.prev preview_fqn = os.path.join(working_dir, preview) thumb = gem_name.thumb thumb_fqn = os.path.join(working_dir, thumb) if not os.access(preview_fqn, 0): mc.data_get(cadc_client, working_dir, preview, ARCHIVE) if os.access(thumb_fqn, 0): os.remove(thumb_fqn) convert_cmd = 'convert -resize 256x256 {} {}'.format( preview_fqn, thumb_fqn) mc.exec_cmd(convert_cmd) thumb_uri = gem_name.thumb_uri _augment(plane, thumb_uri, thumb_fqn, ProductType.THUMBNAIL) if cadc_client is not None: mc.data_put(cadc_client, working_dir, thumb, ARCHIVE, stream) return 1
def run_single(): """ Run the processing for a single entry. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' if config.features.run_in_airflow: temp = tempfile.NamedTemporaryFile() mc.write_to_file(temp.name, sys.argv[2]) config.proxy = temp.name else: config.proxy = sys.argv[2] config.stream = 'default' if config.features.use_file_names: storage_name = GemName(file_name=sys.argv[1]) else: raise mc.CadcException('No code to handle running GEM by obs id.') result = ec.run_single(config, storage_name, APPLICATION, meta_visitors, data_visitors) sys.exit(result)
def _do_prev(obs_id, working_dir, plane, cadc_client, stream, observable): """Retrieve the preview file, so that a thumbnail can be made, store the preview if necessary, and the thumbnail, to ad. Then augment the CAOM observation with the two additional artifacts. """ count = 0 gem_name = GemName(obs_id=obs_id, file_id=plane.product_id) preview = gem_name.prev if observable.rejected.is_no_preview(preview): logging.info( f'Stopping visit because no preview exists for {preview} in ' f'observation {obs_id}.' ) observable.rejected.record(mc.Rejected.NO_PREVIEW, preview) count += _check_for_delete( preview, gem_name.prev_uri, observable, plane ) else: preview_fqn = os.path.join(working_dir, preview) thumb = gem_name.thumb thumb_fqn = os.path.join(working_dir, thumb) new_retrieval = False # Get the file - try disk first, then CADC, then Gemini. # Only try to retrieve from Gemini if the eventual purpose is # storage, though if not os.access(preview_fqn, 0) and cadc_client is not None: try: mc.data_get( cadc_client, working_dir, preview, ARCHIVE, observable.metrics, ) except mc.CadcException: new_retrieval = _retrieve_from_gemini( gem_name, observable, plane, preview_fqn, ) if os.path.exists(preview_fqn): # in case TaskType == SCRAPE + MODIFY # always generate the thumbnails, but only store it if it's a # new retrieval from archive.gemini.edu try: fp = open(preview_fqn, 'r') except PermissionError as e: raise mc.CadcException( f'Should not have reached this point in thumbnail ' f'generation for {plane.product_id}' ) logging.debug(f'Generate thumbnail for file id {plane.product_id}') if os.access(thumb_fqn, 0): os.remove(thumb_fqn) try: image.thumbnail(preview_fqn, thumb_fqn, scale=0.25) except ValueError as e: # probably the jpg did not transfer properly from # archive.gemini.edu, so try to retrieve it one more time, # but ignore the count, because the count before this is # wrong # # have a retry here, because otherwise there's no way to # update the file in CADC storage without intervention # from Ops - i.e. the file will retrieve from CADC, so # there will be no succeeding attempt to retrieve from Gemini # that might otherwise fix the value logging.debug(traceback.format_exc()) logging.warning( f'matplotlib error handling {gem_name.prev}.Try to ' f'retrieve from {PREVIEW_URL} one more time.' ) new_retrieval = _retrieve_from_gemini( gem_name, observable, plane, preview_fqn, ) image.thumbnail(preview_fqn, thumb_fqn, scale=0.25) _augment( plane, gem_name.prev_uri, preview_fqn, ProductType.PREVIEW ) count = 1 if cadc_client is not None and new_retrieval: # if the thumbnail could be generated from the preview, # the preview is probably not corrupted mc.data_put( cadc_client, working_dir, gem_name.prev, ARCHIVE, stream, MIME_TYPE, mime_encoding=None, metrics=observable.metrics, ) _augment( plane, gem_name.thumb_uri, thumb_fqn, ProductType.THUMBNAIL ) if cadc_client is not None and new_retrieval: mc.data_put( cadc_client, working_dir, thumb, ARCHIVE, stream, MIME_TYPE, mime_encoding=None, metrics=observable.metrics, ) count += 1 return count
def reset_index(self, uri): file_id = GemName.remove_extensions(ec.CaomName(uri).file_name) self._reset_index(file_id)