Example #1
0
    def list(self, what, start=None, end=None, where=None, work_id=None):
        '''list metadata records for specified files

        Args:
          what: what kind of file to list (e.g., syslog, nginx)

          start: List only files after this time. This argument is
          polymorphic. datetimes are accepted. Strings will be converted to
          datetimes, so inputs like `2015-12-21` and `2015-12-21T09:11:14.08Z`
          are acceptable. Floats will be interpreted as UTC seconds since the
          epoch. Integers will be interpreted as milliseconds since the epoch.

          end: List only files before this time. Same semantics as start.

          where: List only files from this host.

          work_id: Show only files with this work id.

        returns a generator that lists records of the form:
            {
                'url': <url>,
                'metadata': <metadata>,
            }
        '''
        url = self.http_url + '/v0/archive/files/'
        params = dict(
            what=what,
            start=None if start is None else Metadata.normalize_date(start),
            end=None if end is None else Metadata.normalize_date(end),
            where=where,
            work_id=work_id,
        )
        response = self._requests_get(url, params=params)

        while True:
            self._check_http_response(response)
            response = response.json()
            for record in response['records']:
                yield record
            if response['next']:
                response = self._requests_get(response['next'])
            else:
                break
Example #2
0
    def tester(start, end):
        random_metadata['start'] = Metadata.normalize_date(start)
        random_metadata['end'] = Metadata.normalize_date(end)
        r = {
            'records': [
                {
                    'url': 's3://bucket/file',
                    'metadata': random_metadata,
                }
            ],
            'next': None,
        }

        prepare_response(r, what=random_metadata['what'],
                         start=random_metadata['start'],
                         end=random_metadata['end'])
        l = list(archive.list(random_metadata['what'], start=start, end=end))
        assert len(l) == 1
        assert l[0]['url'] == 's3://bucket/file'
        assert l[0]['metadata'] == random_metadata
def test_normalize_garbage(basic_metadata):
    with pytest.raises(InvalidDatalakeMetadata):
        Metadata.normalize_date('bleeblaaablooo')
def test_normalize_date_with_datetime(basic_metadata):
    date = dateparse('2015-03-20T00:00:00Z')
    ms = Metadata.normalize_date(date)
    assert ms == 1426809600000
Example #5
0
 def _get_create_time(cls, key):
     return Metadata.normalize_date(key.last_modified)