def list(self, what, start=None, end=None, where=None, work_id=None): '''list metadata records for specified files Args: what: what kind of file to list (e.g., syslog, nginx) start: List only files after this time. This argument is polymorphic. datetimes are accepted. Strings will be converted to datetimes, so inputs like `2015-12-21` and `2015-12-21T09:11:14.08Z` are acceptable. Floats will be interpreted as UTC seconds since the epoch. Integers will be interpreted as milliseconds since the epoch. end: List only files before this time. Same semantics as start. where: List only files from this host. work_id: Show only files with this work id. returns a generator that lists records of the form: { 'url': <url>, 'metadata': <metadata>, } ''' url = self.http_url + '/v0/archive/files/' params = dict( what=what, start=None if start is None else Metadata.normalize_date(start), end=None if end is None else Metadata.normalize_date(end), where=where, work_id=work_id, ) response = self._requests_get(url, params=params) while True: self._check_http_response(response) response = response.json() for record in response['records']: yield record if response['next']: response = self._requests_get(response['next']) else: break
def tester(start, end): random_metadata['start'] = Metadata.normalize_date(start) random_metadata['end'] = Metadata.normalize_date(end) r = { 'records': [ { 'url': 's3://bucket/file', 'metadata': random_metadata, } ], 'next': None, } prepare_response(r, what=random_metadata['what'], start=random_metadata['start'], end=random_metadata['end']) l = list(archive.list(random_metadata['what'], start=start, end=end)) assert len(l) == 1 assert l[0]['url'] == 's3://bucket/file' assert l[0]['metadata'] == random_metadata
def test_normalize_garbage(basic_metadata): with pytest.raises(InvalidDatalakeMetadata): Metadata.normalize_date('bleeblaaablooo')
def test_normalize_date_with_datetime(basic_metadata): date = dateparse('2015-03-20T00:00:00Z') ms = Metadata.normalize_date(date) assert ms == 1426809600000
def _get_create_time(cls, key): return Metadata.normalize_date(key.last_modified)