def test_read_gunzip_file(self): bucket, myfile = s3grep._parse_url(TestBotoStream.url) resource = boto3.resource('s3') obj3 = resource.Object(bucket, myfile) datadict = obj3.get() buffr = io.BufferedReader(boto_stream.BotoStreamBody(datadict['Body'])) reader = io.TextIOWrapper(gzip.GzipFile(fileobj=buffr, mode='rb')) # check the first line self.assertEqual(next(reader), "common-crawl/crawl-data/CC-MAIN-2015-40/segments/" "1443736672328.14/\n")
def test_grep_a_file(self): bucket, myfile = s3grep._parse_url(TestS3Grep.url) output = io.StringIO() s3grep._grep_a_file(bucketstr=bucket, key=myfile, regex=r'.*1443737929054.*', output=output) self.assertEqual(output.getvalue(), "common-crawl/crawl-data/CC-MAIN-2015-40/" "segment.paths.gz:" "common-crawl/crawl-data/CC-MAIN-2015-40/segments/" "1443737929054.69/\n")
def test_read_full_binary_file(self): bucket, myfile = s3grep._parse_url(TestBotoStream.url) resource = boto3.resource('s3') obj = resource.Object(bucket, myfile) datadict = obj.get() botostream = boto_stream.BotoStreamBody(body=datadict['Body']) reader = io.BufferedReader(botostream) # use the regular boto3 api with tempfile.NamedTemporaryFile('wb') as tfile: obj2 = resource.Object(bucket, myfile) obj2.download_file(tfile.name) with open(tfile.name, 'rb') as rtfile: self.assertEqual(rtfile.read(), reader.read())
def test_parse_url(self): bucket, myflie = s3grep._parse_url("s3://mybucket/myfile") self.assertEqual(bucket, "mybucket") self.assertEqual(myflie, "myfile")