def test_get_file_meta(): # None with pytest.raises(mc.CadcException): mc.get_file_meta(None) # non-existent file fqn = os.path.join(TESTDATA_DIR, 'abc.txt') with pytest.raises(mc.CadcException): mc.get_file_meta(fqn) # empty file fqn = os.path.join(TESTDATA_DIR, 'todo.txt') result = mc.get_file_meta(fqn) assert result['size'] == 0, result['size']
def test_get_file_meta(): # None with pytest.raises(mc.CadcException): mc.get_file_meta(None) # non-existent file fqn = os.path.join(tc.TEST_DATA_DIR, 'abc.txt') with pytest.raises(mc.CadcException): mc.get_file_meta(fqn) # empty file fqn = os.path.join(tc.TEST_DATA_DIR, 'todo.txt') if os.path.exists(fqn): os.unlink(fqn) open(fqn, 'w').close() result = mc.get_file_meta(fqn) assert result['size'] == 0, result['size']
def si_client_put(client, fqn, storage_name, metrics): """ Make a copy of a locally available file by writing it to CADC. Assumes file and directory locations are correct. Uses StorageInventoryClient to check the md5sum of the file stored is the same as the md5sum of the file on disk. :param client: Client for write access to CADC storage. :param fqn: str fully-qualified name from which the file will be stored. :param storage_name: Artifact URI - the label for storing the file. :param metrics: Tracking success execution times, and failure counts. """ start = current() replace = True cwd = os.getcwd() try: cadc_meta = si_client_info(client, storage_name) os.chdir(os.path.dirname(fqn)) local_meta = mc.get_file_meta(fqn) if cadc_meta is None: replace = False client.cadcput( storage_name, src=fqn, replace=replace, file_type=local_meta.get('type'), file_encoding='', md5_checksum=local_meta.get('md5sum'), ) except Exception as e: metrics.observe_failure('cadcput', 'si', os.path.basename(fqn)) logging.debug(traceback.format_exc()) raise mc.CadcException(f'Failed to store data with {e}') finally: os.chdir(cwd) end = current() metrics.observe( start, end, local_meta.get('size'), 'cadcput', 'si', os.path.basename(fqn), )
def _update_fits_artifact_metadata(plane, science_fqn, science_file): logging.debug('Begin _update_fits_artifact_metadata') # Get the file metadata file_meta = mc.get_file_meta(science_fqn) # Find the science artifact to update for i in plane.artifacts: artifact = plane.artifacts[i] uri = artifact.uri if uri.split('/')[1] == science_file: artifact.content_type = file_meta['type'] artifact.content_length = file_meta['size'] artifact.content_checksum = ChecksumURI( 'md5:{}'.format(file_meta['md5sum'])) break logging.debug('Done _update_fits_artifact_metadata.')
def si_client_get(client, fqn, source, metrics): """ Retrieve a local copy of a file available from CADC using the StorageInventory client. Assumes the working directory location exists and is writeable. Checks that the md5sum of the retrieved file is the same as the md5sum of the file at CADC. :param client: The Client for read access to CADC storage. :param fqn: str fully-qualified name to which the retrieved file will be written. :param source: Artifact URI - where to retrieve the file from. :param metrics: track success execution times, and failure counts. """ start = current() try: client.cadcget(source, dest=fqn) if not os.path.exists(fqn): raise mc.CadcException(f'Retrieve failed. {fqn} does not exist.') local_meta = mc.get_file_meta(fqn) cadc_meta = si_client_info(client, source) if local_meta.get('md5sum') != cadc_meta.md5sum: raise mc.CadcException( f'Wrong MD5 checksum {local_meta.get("md5sum")} retrieved for ' f'{source}.' ) except Exception as e: if metrics is not None: metrics.observe_failure('cadcget', 'si', os.path.basename(fqn)) logging.debug(traceback.format_exc()) raise mc.CadcException(f'Did not retrieve {fqn} because {e}') if metrics is not None: end = current() metrics.observe( start, end, local_meta.get('size'), 'cadcget', 'si', os.path.basename(fqn), )