def test_can_download_keys(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d')) self.assertIsNotNone(accessible_keys) self.assertIsInstance(accessible_keys[0], Key)
def test_can_download_keys(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d') ) self.assertIsNotNone(accessible_keys) self.assertIsInstance(accessible_keys[0], Key)
def test_can_unzip_one_file(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d') ) num_files_processed = fetch_referrer_logs.process_keys([accessible_keys[0],]) self.assertEqual(num_files_processed, 1) self.assertEqual(FilenameLog.objects.all().count(), 1) self.assertIsNotNone(AccessLogAggregate.objects.all())
def test_todays_logs(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d')) # Process only first 10 files to save time num_files_processed = fetch_referrer_logs.process_keys( accessible_keys[:10]) self.assertEqual(num_files_processed, 10) self.assertEqual(FilenameLog.objects.all().count(), 10) self.assertIsNotNone(AccessLogAggregate.objects.all())
def test_no_duplicate_files_are_processed(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d') ) num_files_processed = fetch_referrer_logs.process_keys(accessible_keys[:3]) self.assertEqual(num_files_processed, 3) self.assertEqual(FilenameLog.objects.all().count(), 3) self.assertIsNotNone(AccessLogAggregate.objects.all()) # Now input those 3 files again with an extra, only the extra should be processed num_files_processed = fetch_referrer_logs.process_keys(accessible_keys[:4]) self.assertEqual(num_files_processed, 1) self.assertEqual(FilenameLog.objects.all().count(), 4)
def test_no_duplicate_files_are_processed(self): bucket = self.conn.get_bucket("edx-s3-logs", validate=False) # Get only today's keys to reduce search time accessible_keys = fetch_referrer_logs.get_accessible_keys( bucket, "edx-static-cloudfront/E32IHGJJSQ4SLL." + date.today().strftime('%Y-%m-%d')) num_files_processed = fetch_referrer_logs.process_keys( accessible_keys[:3]) self.assertEqual(num_files_processed, 3) self.assertEqual(FilenameLog.objects.all().count(), 3) self.assertIsNotNone(AccessLogAggregate.objects.all()) # Now input those 3 files again with an extra, only the extra should be processed num_files_processed = fetch_referrer_logs.process_keys( accessible_keys[:4]) self.assertEqual(num_files_processed, 1) self.assertEqual(FilenameLog.objects.all().count(), 4)