Esempio n. 1
0
def get_cadc_headers(uri):
    """
    Creates the FITS headers object by fetching the FITS headers of a CADC
    file. The function takes advantage of the fhead feature of the CADC
    storage service and retrieves just the headers and no data, minimizing
    the transfer time.

    The file must be public, because the header retrieval is done as an
    anonymous user.

    :param uri: CADC URI
    :return: a string of keyword/value pairs.
    """
    file_url = parse.urlparse(uri)
    # create possible types of subjects
    subject = net.Subject()
    client = CadcDataClient(subject)
    # do a fhead on the file
    archive, file_id = file_url.path.split('/')
    b = BytesIO()
    b.name = uri
    client.get_file(archive, file_id, b, fhead=True)
    fits_header = b.getvalue().decode('ascii')
    b.close()
    return fits_header
Esempio n. 2
0
def get_file(archive, file_name, cutout=None, destination=None):
    anonSubject = net.Subject()
    data_client = CadcDataClient(anonSubject)
    return data_client.get_file(archive,
                                file_name,
                                cutout=cutout,
                                destination=destination)
Esempio n. 3
0
 def __init__(
     self,
     subject,
     using_storage_inventory=True,
     resource_id='ivo://cadc.nrc.ca/uvic/minoc',
     metrics=None,
 ):
     """
     :param subject: net.Subject instance for authentication and
         authorization
     :param using_storage_inventory: if True will use
         StorageInventoryClient for file operations at CADC. If False will
         use CadcDataClient.
     :param resource_id: str identifies the StorageInventoryClient
         endpoint. If using_storage_inventory is set to False, it's
         un-necessary.
     :param metrics: caom2pipe.manaage_composable.Metrics instance. If set,
         will track execution times, by action, from the beginning of
         the method invocation to the end of the method invocation,
         success or failure. Defaults to None, because fits2caom2 is
         a stand-alone application.
     """
     if using_storage_inventory:
         self._cadc_client = StorageInventoryClient(
             subject=subject, resource_id=resource_id
         )
     else:
         self._cadc_client = CadcDataClient(subject=subject)
     self._use_si = using_storage_inventory
     self._metrics = metrics
     self._logger = logging.getLogger(self.__class__.__name__)
Esempio n. 4
0
    def update(self, observation, **kwargs):
        """
        Processes an observation and updates it
        """
        assert isinstance(observation, Observation), (
            "observation {} is not an Observation".format(observation))

        for plane in observation.planes.values():
            for artifact in plane.artifacts.values():
                url = urlparse(artifact.uri)
                if url.scheme != 'ad':
                    raise ValueError('Unsupported schema in uri: {}'.format(
                        artifact.uri))
                [archive, file] = url.path.split('/')

                # create cadc data web service client
                if 'subject' in kwargs:
                    client = CadcDataClient(kwargs['subject'])
                else:
                    client = CadcDataClient(net.Subject())

                metadata = client.get_file_info(archive, file)
                uri = artifact.uri.replace('/{}'.format(file),
                                           '/{}'.format(metadata['name']))
                checksum = ChecksumURI('md5:{}'.format(metadata['md5sum']))
                print("old - uri({}), encoding({}), size({}), type({})".format(
                    artifact.uri, artifact.content_checksum,
                    artifact.content_length, artifact.content_type))
                artifact.uri = uri
                artifact.content_checksum = checksum
                artifact.content_length = int(metadata['size'])
                artifact.content_type = str(metadata['type'])
                print("updated - uri({}), encoding({}), size({}), type({})".
                      format(artifact.uri, artifact.content_checksum,
                             artifact.content_length, artifact.content_type))
Esempio n. 5
0
def get_cadc_meta(netrc_fqn, collection, fname):
    """
    Gets contentType, contentLength and contentChecksum of a CADC artifact
    :param netrc_fqn: user credentials
    :param collection: archive file has been stored to
    :param fname: name of file in the archive
    :return:
    """
    subject = net.Subject(username=None, certificate=None, netrc=netrc_fqn)
    client = CadcDataClient(subject)
    return client.get_file_info(collection, fname)
Esempio n. 6
0
def put_cadc_file(filename,
                  stream,
                  subject,
                  mime_type=None,
                  mime_encoding=None):
    """
    Transfers a file to the CADC archive
    :param filename -- name of the file
    :param stream -- the name of archive stream at the CADC
    :param subject -- subject (type cadcutils.net.Subject) executing the
    command.
    :param mime_type -- file MIME type
    :param mime_encoding - file MIME encoding
    """
    size = os.stat(filename).st_size
    transfer_result = {
        'path': filename,
        'size': round((size / 1024.0 / 1024.0), 2)
    }
    try:
        archive = etrans_config.get('etransfer', 'archive')
        if not archive:
            raise RuntimeError('Name of archive not found')

        start = time.time()
        data_client = CadcDataClient(subject)

        data_client.put_file(archive,
                             filename,
                             archive_stream=stream,
                             mime_type=mime_type,
                             mime_encoding=mime_encoding)
        duration = time.time() - start
        transfer_result['success'] = True
        transfer_result['time'] = duration
        transfer_result['speed'] = round(size / 1024 / 1024 / duration, 2)
    except Exception as e:
        transfer_result['success'] = False
        transfer_result['message'] = str(e)
        raise ProcError('Error transferring file: ' + str(e))
    finally:
        _get_transfer_log().info('{} - {}'.format(LOG_PUT_LABEL,
                                                  json.dumps(transfer_result)))
Esempio n. 7
0
def fetch_cadc_file_info(filename, subject):
    """
    Retrieve information about a file at the CADC.
    :param filename -- name of the file
    :param subject -- subject (type cadcutils.net.Subject) executing the
    command.
    """
    try:
        archive = etrans_config.get('etransfer', 'archive')
        if not archive:
            raise RuntimeError('Name of archive not found')

        data_client = CadcDataClient(subject)

        return data_client.get_file_info(archive, filename)
    except exceptions.NotFoundException:
        return None
    except Exception as e:
        raise ProcError('Error fetching CADC file info: ' + str(e))
Esempio n. 8
0
 def __init__(self, config):
     super(CFHTBuilder, self).__init__()
     self._config = config
     self._data_client = None
     self._repo_client = None
     self._metrics = mc.Metrics(self._config)
     if not self._config.use_local_files:
         subject = mc.define_subject(self._config)
         self._data_client = CadcDataClient(subject)
         self._repo_client = CAOM2RepoClient(
             subject, resource_id=self._config.resource_id)
     self._logger = logging.getLogger(__name__)
Esempio n. 9
0
    def update(self, observation, **kwargs):
        """
        Processes an observation and updates it
        """
        assert isinstance(observation, Observation), (
            "observation %s is not an Observation".format(observation))

        for plane in observation.planes.values():
            for artifact in plane.artifacts.values():
                url = urlparse(artifact.uri)
                if url.scheme != 'ad':
                    raise ValueError(
                        'Unsupported schema in uri: {}'.format(artifact.uri))
                [archive, file] = url.path.split('/')

                # create cadc data web service client
                if 'subject' in kwargs:
                    client = CadcDataClient(kwargs['subject'])
                else:
                    client = CadcDataClient(net.Subject())

                metadata = client.get_file_info(archive, file)
                uri = artifact.uri.replace('/{}'.format(file), '/{}'.
                                           format(metadata['name']))
                checksum = ChecksumURI('md5:{}'.format(metadata['md5sum']))
                print("old - uri({}), encoding({}), size({}), type({})".
                      format(artifact.uri,
                             artifact.content_checksum,
                             artifact.content_length,
                             artifact.content_type))
                artifact.uri = uri
                artifact.content_checksum = checksum
                artifact.content_length = int(metadata['size'])
                artifact.content_type = str(metadata['type'])
                print("updated - uri({}), encoding({}), size({}), type({})".
                      format(artifact.uri,
                             artifact.content_checksum,
                             artifact.content_length,
                             artifact.content_type))
Esempio n. 10
0
def test_info_file(basews_mock):
    client = CadcDataClient(auth.Subject())
    # test an info
    file_name = 'myfile'
    file_name = 'myfile.txt'
    archive = 'TEST'
    size = '123'
    md5sum = '0x123'
    type = 'txt'
    encoding = 'gzip'
    lastmod = '11/11/11T11:11:11.000'
    usize = '1234'
    umd5sum = '0x1234'

    h = {}
    h['Content-Disposition'] = 'inline; filename={}'.format(file_name)
    h['Content-Length'] = size
    h['Content-MD5'] = md5sum
    h['Content-Type'] = type
    h['Content-Encoding'] = encoding
    h['Last-Modified'] = lastmod
    h['X-Uncompressed-Length'] = usize
    h['X-Uncompressed-MD5'] = umd5sum
    response = Mock()
    response.headers = h
    basews_mock.return_value.head.return_value = response
    info = client.get_file_info('TEST', 'myfile')
    assert archive == info['archive']
    assert file_name == info['name']
    assert size == info['size']
    assert md5sum == info['md5sum']
    assert type == info['type']
    assert encoding == info['encoding']
    assert lastmod == info['lastmod']
    assert usize == info['usize']
    assert umd5sum == info['umd5sum']
Esempio n. 11
0
def test_put_file(basews_mock):
    client = CadcDataClient(auth.Subject())
    # test a put
    file_name = '/tmp/putfile.txt'
    file_content = 'ABCDEFGH12345'
    hash_md5 = hashlib.md5()
    hash_md5.update(file_content.encode())
    hash_md5 = hash_md5.hexdigest()
    # write the file
    with open(file_name, 'w') as f:
        f.write(file_content)
    put_mock = Mock()
    basews_mock.return_value.put = put_mock
    with pytest.raises(exceptions.UnauthorizedException):
        client.put_file('TEST', 'putfile', file_name)
    client._data_client.subject.anon = False  # authenticate the user
    transf_end_point = 'http://test.ca/endpoint'

    def mock_get_trans_protocols(archive, file_name, is_get, headers):
        protocol = Mock()
        protocol.endpoint = '{}/{}'.format(transf_end_point, file_name)
        return [protocol]

    client._get_transfer_protocols = mock_get_trans_protocols
    client.put_file('TEST', file_name)
    # Note Content* headers automatically created by cadc-data except when
    # MAGIC_WANT -- libmagic not present
    put_mock.assert_called_with('{}/{}'.format(transf_end_point,
                                               os.path.basename(file_name)),
                                data=ANY,
                                headers={
                                    'Content-Type': 'text/plain',
                                    'Content-Encoding': 'us-ascii',
                                    'Content-MD5': '{}'.format(hash_md5)
                                })

    # mimic libmagic missing
    cadcdata.core.MAGIC_WARN = 'Some warning'
    put_mock.reset_mock()
    client.put_file('TEST', file_name)
    put_mock.assert_called_with(
        '{}/{}'.format(transf_end_point, os.path.basename(file_name)),
        data=ANY,
        headers={'Content-MD5': '835e7e6cd54e18ae21d50af963b0c32b'})
    cadcdata.core.MAGIC_WARN = None

    # specify an archive stream and override the name of the file
    input_name = 'abc'
    client.put_file('TEST',
                    file_name,
                    archive_stream='default',
                    input_name=input_name)
    put_mock.assert_called_with('{}/{}'.format(transf_end_point, input_name),
                                data=ANY,
                                headers={
                                    'Content-Encoding': 'us-ascii',
                                    'X-CADC-Stream': 'default',
                                    'Content-Type': 'text/plain',
                                    'Content-MD5': '{}'.format(hash_md5)
                                })
    # specify the mime types
    client.put_file('TEST',
                    file_name,
                    archive_stream='default',
                    mime_type='ASCII',
                    mime_encoding='GZIP')
    put_mock.assert_called_with('{}/{}'.format(transf_end_point,
                                               os.path.basename(file_name)),
                                data=ANY,
                                headers={
                                    'Content-Encoding': 'GZIP',
                                    'X-CADC-Stream': 'default',
                                    'Content-Type': 'ASCII',
                                    'Content-MD5': '{}'.format(hash_md5)
                                })
    os.remove(file_name)
Esempio n. 12
0
def test_get_file(trans_reader_mock, basews_mock):
    # test a simple get - no decompress
    file_name = '/tmp/afile.txt'
    file_chunks = ['aaaa'.encode(), 'bbbb'.encode(), ''.encode()]
    response = Mock()
    hash_md5 = hashlib.md5()
    for i in file_chunks:
        hash_md5.update(i)
    response.headers.get.return_value = \
        'filename={}'.format('orig_file_name')
    response.raw.read.side_effect = file_chunks  # returns multiple blocks
    basews_mock.return_value.get.return_value = response
    client = CadcDataClient(auth.Subject())
    with pytest.raises(exceptions.HttpException):
        # no URLs returned in the transfer negotiations
        client.get_file('TEST', 'afile', destination=file_name)
    t = transfer.Transfer('ad:TEST/afile', 'pullFromVoSpace')
    p = transfer.Protocol
    p.endpoint = Mock()
    t.protocols = [p]
    trans_reader_mock.return_value.read.return_value = t
    client.get_file('TEST', 'afile', destination=file_name, md5_check=False)
    expected_content = \
        (''.join([c.decode() for c in file_chunks])).encode()
    with open(file_name, 'rb') as f:
        assert expected_content == f.read()
    os.remove(file_name)
    # do it again with the file now open
    response = Mock()
    response.headers = {
        'filename': 'orig_file_name',
        'content-MD5': hash_md5.hexdigest()
    }
    response.raw.read.side_effect = file_chunks
    basews_mock.return_value.get.return_value = response
    with open(file_name, 'wb') as f:
        client.get_file('TEST', 'afile', destination=f)
    with open(file_name, 'rb') as f:
        assert expected_content == f.read()
    os.remove(file_name)

    # test a get with decompress and md5 check enabled
    file_name = 'bfile.txt'
    file_content = 'aaaabbbb'
    hash_md5 = hashlib.md5()
    hash_md5.update(file_content.encode())
    file_chunks = [file_content.encode(), ''.encode()]
    decoded_file_content = 'MNOPRST6789'
    decoded_file_chunks = [decoded_file_content.encode(), ''.encode()]
    response = Mock()
    response.headers = \
        {'content-MD5': '{}'.format(hash_md5.hexdigest()),
         'filename': file_name}
    response.raw.read.side_effect = file_chunks
    response.raw._decode.side_effect = decoded_file_chunks
    basews_mock.return_value.get.return_value = response
    client = CadcDataClient(auth.Subject())
    client.get_file('TEST',
                    file_name=file_name,
                    decompress=True,
                    md5_check=True)
    with open(file_name, 'r') as f:
        # note the check against the decoded content
        assert decoded_file_content == f.read()
    os.remove(file_name)

    # repeat test with a bad md5
    file_name = 'bfile.txt'
    file_content = 'ABCDEFGH12345'
    file_chunks = [file_content.encode(), ''.encode()]
    decoded_file_content = 'MNOPRST6789'
    decoded_file_chunks = [decoded_file_content.encode(), ''.encode()]
    response = Mock()
    response.headers = {'content-MD5': 'abc', 'filename': file_name}
    response.raw.read.side_effect = file_chunks
    response.raw._decode.side_effect = decoded_file_chunks
    basews_mock.return_value.get.return_value = response
    client = CadcDataClient(auth.Subject())
    with pytest.raises(exceptions.HttpException):
        client.get_file('TEST',
                        file_name=file_name,
                        decompress=True,
                        md5_check=True)

    # test process_bytes and send the content to /dev/null after.
    # Use no decompress
    def concatenate_chunks(chunk):
        global mycontent
        mycontent = '{}{}'.format(mycontent, chunk.decode())

    file_name = 'bfile.txt'
    file_content = 'ABCDEFGH12345'
    file_chunks = [
        file_content[i:i + 5].encode() for i in xrange(0, len(file_content), 5)
    ]
    file_chunks.append('')  # last chunk is empty
    response = Mock()
    response.headers = {'filename': '{}.gz'.format(file_name)}
    response.raw.read.side_effect = file_chunks
    basews_mock.return_value.get.return_value = response
    client = CadcDataClient(auth.Subject())
    client.logger.setLevel(logging.INFO)
    # md5_check does not take place because no content-MD5 received
    # from server
    client.get_file('TEST',
                    'afile',
                    destination='/dev/null',
                    process_bytes=concatenate_chunks)
    assert file_content == mycontent

    # failed md5 checksum
    response = Mock()
    response.headers = {
        'filename': '{}.gz'.format(file_name),
        'content-MD5': '33'
    }
    response.raw.read.side_effect = file_chunks
    basews_mock.return_value.get.return_value = response
    client = CadcDataClient(auth.Subject())
    client.logger.setLevel(logging.INFO)
    # md5_check does not take place because no content-MD5 received
    # from server
    with pytest.raises(exceptions.HttpException):
        client.get_file('TEST',
                        'afile',
                        destination='/dev/null',
                        process_bytes=concatenate_chunks)

    # test get fhead
    response = Mock()
    response.headers.get.return_value = 'filename={}.gz'.format(file_name)
    response.raw.read.side_effect = file_chunks
    response.history = []
    response.status_code = 200
    response.url = 'someurl'
    post_mock = Mock(return_value=response)
    basews_mock.return_value.post = post_mock
    file_name = 'getfile'
    archive = 'TEST'
    p.endpoint = 'http://someurl/transfer/{}/{}'.format(archive, file_name)
    client.get_file('TEST',
                    'getfile',
                    decompress=True,
                    wcs=True,
                    md5_check=False)
    trans_doc = \
        ('<vos:transfer xmlns:'
         'vos="http://www.ivoa.net/xml/VOSpace/v2.0">\n  '
         '<vos:target>ad:TEST/getfile</vos:target>\n  '
         '<vos:direction>pullFromVoSpace</vos:direction>\n  '
         '<vos:protocol uri="ivo://ivoa.net/vospace/core#httpget"/>\n'
         '  <vos:protocol uri="ivo://ivoa.net/vospace/core#httpsget"/>\n'
         '</vos:transfer>\n').encode()
    post_mock.assert_called_with(resource=(TRANSFER_RESOURCE_ID, None),
                                 params={'wcs': True},
                                 data=trans_doc,
                                 headers={'Content-Type': 'text/xml'})
    response.raw.read.side_effect = file_chunks
    post_mock.reset_mock()
    client.get_file('TEST',
                    'getfile',
                    decompress=True,
                    fhead=True,
                    md5_check=False)
    post_mock.assert_called_with(resource=(TRANSFER_RESOURCE_ID, None),
                                 params={'fhead': True},
                                 data=trans_doc,
                                 headers={'Content-Type': 'text/xml'})
    response.raw.read.side_effect = file_chunks
    post_mock.reset_mock()
    client.get_file('TEST',
                    'getfile',
                    decompress=True,
                    cutout='[1:1]',
                    md5_check=False)
    post_mock.assert_called_with(resource=(TRANSFER_RESOURCE_ID, None),
                                 params={'cutout': '[1:1]'},
                                 data=trans_doc,
                                 headers={'Content-Type': 'text/xml'})
    response.raw.read.side_effect = file_chunks
    post_mock.reset_mock()
    client.get_file('TEST',
                    'getfile',
                    decompress=True,
                    cutout='[[1:1], 2]',
                    md5_check=False)
    post_mock.assert_called_with(resource=(TRANSFER_RESOURCE_ID, None),
                                 params={'cutout': '[[1:1], 2]'},
                                 data=trans_doc,
                                 headers={'Content-Type': 'text/xml'})
Esempio n. 13
0
from cadcdata import CadcDataClient
from cadcutils import net

fname = "cadcUrlList.txt"

with open(fname) as f:
    txt = f.readlines()

txt = [x.strip() for x in txt]

print(len(txt))

txt = list(map(lambda x: x[73:81], txt))

for pid in txt:
    if "." in pid:
        pid = pid[:-1]
    else:
        pid = pid
    try:
        client = CadcDataClient(net.Subject())
        client.get_file('CFHT', pid + '.fits.fz')
        print(pid)
    except Exception as e:
        print(e)
        continue
Esempio n. 14
0
ops_client.query(ops_query, output_file=ops_buffer, data_only=True, response_format='csv')
ops_table = Table.read(ops_buffer.getvalue().split('\n'), format='csv')
if len(ops_table) == 1:
    obs_id = ops_table[0]['observationID']
    uri = ops_table[0]['uri']
    ignore_scheme, ignore_path, f_name = mc.decompose_uri(uri)
    print(f':::Looking for {obs_id} and {f_name}')
else:
    print(f':::No observation records found for collection {archive}')
    sys.exit(-1)

obs = caom_client.read(archive, obs_id)
obs_fqn = f'/usr/src/app/expected.{obs_id}.xml'
mc.write_obs_to_file(obs, obs_fqn)

print(f':::2 - Get {f_name}')
config = mc.Config()
config.get_executors()
data_client = CadcDataClient(subject)
metrics = mc.Metrics(config)
mc.data_get(data_client, '/usr/src/app', f_name, collection, metrics)

print(':::3 - Update config.yml to say task types are scrape and modify, and use local files.')
config.task_types = [mc.TaskType.SCRAPE, mc.TaskType.MODIFY]
config.use_local_files = True
config.logging_level = logging.INFO
mc.Config.write_to_file(config)

print(':::4 - Run the application.')
sys.exit(0)
Esempio n. 15
0
class StorageClientWrapper:
    """
    Wrap the choice between CadcDataClient and StorageInventoryClient.
    """

    def __init__(
        self,
        subject,
        using_storage_inventory=True,
        resource_id='ivo://cadc.nrc.ca/uvic/minoc',
        metrics=None,
    ):
        """
        :param subject: net.Subject instance for authentication and
            authorization
        :param using_storage_inventory: if True will use
            StorageInventoryClient for file operations at CADC. If False will
            use CadcDataClient.
        :param resource_id: str identifies the StorageInventoryClient
            endpoint. If using_storage_inventory is set to False, it's
            un-necessary.
        :param metrics: caom2pipe.manaage_composable.Metrics instance. If set,
            will track execution times, by action, from the beginning of
            the method invocation to the end of the method invocation,
            success or failure. Defaults to None, because fits2caom2 is
            a stand-alone application.
        """
        if using_storage_inventory:
            self._cadc_client = StorageInventoryClient(
                subject=subject, resource_id=resource_id
            )
        else:
            self._cadc_client = CadcDataClient(subject=subject)
        self._use_si = using_storage_inventory
        self._metrics = metrics
        self._logger = logging.getLogger(self.__class__.__name__)

    def _add_fail_metric(self, action, name):
        """Single location for the check for a self._metrics member in the
        failure case."""
        if self._metrics is not None:
            client_name = 'si' if self._use_si else 'data'
            self._metrics.observe_failure(action, client_name, name)

    def _add_metric(self, action, name, start, value):
        """Single location for the check for a self._metrics member in the
        success case."""
        if self._metrics is not None:
            client_name = 'si' if self._use_si else 'data'
            self._metrics.observe(
                start,
                StorageClientWrapper._current(),
                value,
                action,
                client_name,
                name,
            )

    def get(self, working_directory, uri):
        """
        Retrieve data.
        :param working_directory: str where the file will be retrieved to.
            Assumes the same machine as this function is being called from.
        :param uri: str this is an Artifact URI, representing the file to
            be retrieved.
        """
        self._logger.debug(f'Being get for {uri} in {working_directory}')
        start = StorageClientWrapper._current()
        try:
            archive, f_name = self._decompose(uri)
            fqn = path.join(working_directory, f_name)
            if self._use_si:
                self._cadc_client.cadcget(uri, dest=fqn)
            else:
                self._cadc_client.get_file(archive, f_name, destination=fqn)
        except Exception as e:
            self._add_fail_metric('get', uri)
            self._logger.debug(traceback.format_exc())
            raise exceptions.UnexpectedException(
                f'Did not retrieve {uri} because {e}'
            )
        self._add_metric('get', uri, start, stat(fqn).st_size)
        self._logger.debug('End get')

    def get_head(self, uri):
        """
        Retrieve FITS file header data.
        :param uri: str that is an Artifact URI, representing the file for
            which to retrieve headers
        :return: list of fits.Header instances
        """
        self._logger.debug(f'Begin get_head for {uri}')
        start = StorageClientWrapper._current()
        try:
            b = BytesIO()
            b.name = uri
            if self._use_si:
                self._cadc_client.cadcget(uri, b, fhead=True)
            else:
                archive, f_name = StorageClientWrapper._decompose(uri)
                self._cadc_client.get_file(archive, f_name, b, fhead=True)
            fits_header = b.getvalue().decode('ascii')
            b.close()
            self._add_metric('get_head', uri, start, len(fits_header))
            temp = make_headers_from_string(fits_header)
            self._logger.debug('End get_head')
            return temp
        except Exception as e:
            self._add_fail_metric('get_header', uri)
            self._logger.debug(traceback.format_exc())
            self._logger.error(e)
            raise exceptions.UnexpectedException(
                f'Did not retrieve {uri} header because {e}'
            )

    def info(self, uri):
        """
        Retrieve the descriptive metdata associated with a file.
        :param uri: str that is an Artifact URI, representing the file for
            which to retrieve metadata
        :return: cadcdata.FileInfo instance, no scheme for md5sum
        """
        self._logger.debug(f'Begin info for {uri}')
        try:
            if self._use_si:
                result = self._cadc_client.cadcinfo(uri)
                # make the result look like the other possible ways to
                # obtain metadata
                result.md5sum = result.md5sum.replace('md5:', '')
            else:
                archive, f_name = StorageClientWrapper._decompose(uri)
                temp = self._cadc_client.get_file_info(archive, f_name)
                result = FileInfo(
                    id=uri,
                    size=temp.get('size'),
                    file_type=temp.get('type'),
                    md5sum=temp.get('md5sum').replace('md5:', '')
                )
        except exceptions.NotFoundException:
            self._logger.info(f'cadcinfo:: {uri} not found')
            result = None
        self._logger.debug('End info')
        return result

    def put(self, working_directory, uri, stream='default'):
        """
        Store a file at CADC.
        :param working_directory: str fully-qualified name of where to find
            the file on the local machine
        :param uri: str that is an Artifact URI, representing the file to
            be stored at CADC.
        :param stream: str representing the namespace used by the
            CadcDataClient. Not required if using the StorageInventoryClient.
            'default' is default name for a lately-created ad archive.
        """
        self._logger.debug(f'Begin put for {uri} in {working_directory}')
        start = self._current()
        cwd = getcwd()
        archive, f_name = StorageClientWrapper._decompose(uri)
        fqn = path.join(working_directory, f_name)
        chdir(working_directory)
        try:
            local_meta = get_local_file_info(fqn)
            encoding = get_file_encoding(fqn)
            if self._use_si:
                replace = True
                cadc_meta = self.info(uri)
                if cadc_meta is None:
                    replace = False
                self._logger.debug(
                    f'uri {uri} src {fqn} replace {replace} file_type '
                    f'{local_meta.file_type} encoding {encoding} md5_checksum '
                    f'{local_meta.md5sum}'
                )
                self._cadc_client.cadcput(
                    uri,
                    src=fqn,
                    replace=replace,
                    file_type=local_meta.file_type,
                    file_encoding=encoding,
                    md5_checksum=local_meta.md5sum,
                )
            else:
                archive, f_name = self._decompose(uri)
                # libmagic does a worse job with guessing file types
                # than ad for .fits.gz => it will say 'binary'
                self._logger.debug(
                    f'archive {archive} f_name {f_name} archive_stream '
                    f'{stream} mime_type {local_meta.file_type} '
                    f'mime_encoding {encoding} md5_check True '
                )
                self._cadc_client.put_file(
                    archive,
                    f_name,
                    archive_stream=stream,
                    mime_type=local_meta.file_type,
                    mime_encoding=encoding,
                    md5_check=True,
                )
            self._logger.info(f'Stored {fqn} at CADC.')
        except Exception as e:
            self._add_fail_metric('put', uri)
            self._logger.debug(traceback.format_exc())
            self._logger.error(e)
            raise exceptions.UnexpectedException(
                f'Failed to store data with {e}'
            )
        finally:
            chdir(cwd)
        self._add_metric('put', uri, start, local_meta.size)
        self._logger.debug('End put')

    def remove(self, uri):
        """
        Delete a file from CADC storage.
        :param uri: str that is an Artifact URI, representing the file to
            be removed from CADC.
        """
        self._logger.debug(f'Begin remove for {uri}')
        start = StorageClientWrapper._current()
        if self._use_si:
            try:
                self._cadc_client.cadcremove(uri)
            except Exception as e:
                self._add_fail_metric('remove', uri)
                self._logger.debug(traceback.format_exc())
                self._logger.error(e)
                raise exceptions.UnexpectedException(
                    f'Did not remove {uri} because {e}'
                )
        else:
            raise NotImplementedError(
                'No remove functionality for CadcDataClient'
            )
        self._add_metric('remove', uri, start, value=None)
        self._logger.debug('End remove')

    @staticmethod
    def _current():
        """Encapsulate returning UTC now in microsecond resolution."""
        return datetime.now(tz=timezone.utc).timestamp()

    @staticmethod
    def _decompose(uri):
        temp = urlparse(uri)
        return path.dirname(temp.path), path.basename(temp.path)