def test_initializer_preexisting_report(self): """Test getting a new accessor stats on a preexisting report.""" saver = ReportStatsDBAccessor('myreport') saver.update(cursor_position=33, last_completed_datetime='1/1/2011 11:11:11', last_started_datetime='2/2/22 22:22:22', etag='myetag') saver.commit() self.assertIsNotNone(saver._session) # Get another accessor for the same report and verify we get back the right information. saver2 = ReportStatsDBAccessor('myreport') last_completed = saver2.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) self.assertEqual(saver.get_etag(), 'myetag') saver.remove() saver.commit() saver.close_session() saver2.close_session()
def test_remove_temp_cur_files(self): """Test to remove temporary cost usage files.""" cur_dir = tempfile.mkdtemp() manifest_data = {"assemblyId": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"} manifest = '{}/{}'.format(cur_dir, 'koku-Manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [{'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3)}, {'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-koku-2.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3)}, {'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=2)}, {'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-koku-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=1)}, {'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-koku-1.csv.gz', 'processed_date': None}] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(cur_dir, item['file']) f = open(path, 'w') obj = self.manifest_accessor.get_manifest(self.assembly_id, self.provider_id) stats = ReportStatsDBAccessor(item['file'], obj.id) stats.update(last_completed_datetime=item['processed_date']) stats.commit() stats.close_session() f.close() if not item['file'].startswith(manifest_data.get('assemblyId')) and item['processed_date']: expected_delete_list.append(path) removed_files = self.processor.remove_temp_cur_files(cur_dir) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(cur_dir)
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ manifest = self._get_manifest(date_time) reports = manifest.get('reportKeys') cur_reports = [] for report in reports: report_dictionary = {} local_s3_filename = utils.get_local_file_name(report) stats_recorder = ReportStatsDBAccessor(local_s3_filename) stored_etag = stats_recorder.get_etag() report_path = self.bucket_path + '/' + report LOG.info('Downloading %s with credential %s', report_path, self.credential) file_name, etag = self.download_file(report_path, stored_etag) stats_recorder.update(etag=etag) stats_recorder.commit() report_dictionary['file'] = file_name report_dictionary['compression'] = 'GZIP' cur_reports.append(report_dictionary) return cur_reports
def download_report(self, date_time): """ Download CUR for a given date. Args: date_time (DateTime): The starting datetime object Returns: ([{}]) List of dictionaries containing file path and compression. """ LOG.info('Current date is %s. Attempting to get manifest...', str(date_time)) manifest = self._get_manifest(date_time) reports = manifest.get('reportKeys') cur_reports = [] for report in reports: report_dictionary = {} local_s3_filename = utils.get_local_file_name(report) stats_recorder = ReportStatsDBAccessor(local_s3_filename) stored_etag = stats_recorder.get_etag() file_name, etag = self.download_file(report, stored_etag) stats_recorder.update(etag=etag) stats_recorder.commit() stats_recorder.close_session() report_dictionary['file'] = file_name report_dictionary['compression'] = self.report.get('Compression') cur_reports.append(report_dictionary) return cur_reports
def test_initializer(self): """Test Initializer""" saver = ReportStatsDBAccessor('myreport') self.assertIsNotNone(saver._session) saver.remove() saver.commit() saver.close_session()
def test_log_last_completed_datetime(self): """Test convience function for last completed processing time.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.log_last_completed_datetime() saver.commit() # FIXME: missing asserts saver.delete() saver.commit() saver.close_session()
def test_add_remove(self): """Test basic add/remove logic.""" saver = ReportStatsDBAccessor('myreport') saver.commit() self.assertTrue(saver.does_db_entry_exist()) returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, 'myreport') saver.remove() saver.commit() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first()) saver.close_session()
def test_remove_temp_cur_files(self): """Test to remove temporary usage report files.""" insights_local_dir = tempfile.mkdtemp() manifest_data = {"uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5"} manifest = '{}/{}'.format(insights_local_dir, 'manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [ { 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3), }, { 'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=2), }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=1), }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz', 'processed_date': None, }, ] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(insights_local_dir, item['file']) f = open(path, 'w') stats = ReportStatsDBAccessor(item['file'], None) stats.update(last_completed_datetime=item['processed_date']) stats.commit() stats.close_session() f.close() if (not item['file'].startswith(manifest_data.get('uuid')) and item['processed_date']): expected_delete_list.append(path) removed_files = self.ocp_processor.remove_temp_cur_files( insights_local_dir, manifest_id=None) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(insights_local_dir)
def test_update(self): """Test updating an existing row.""" saver = ReportStatsDBAccessor('myreport', self.manifest_id) saver.commit() returned_obj = saver._get_db_obj_query() self.assertEqual(returned_obj.first().report_name, 'myreport') saver.update( cursor_position=33, last_completed_datetime='1/1/2011 11:11:11', last_started_datetime='2/2/22 22:22:22', etag='myetag', ) saver.commit() self.assertEqual(saver.get_cursor_position(), 33) last_completed = saver.get_last_completed_datetime() self.assertEqual(last_completed.year, 2011) self.assertEqual(last_completed.month, 1) self.assertEqual(last_completed.day, 1) self.assertEqual(last_completed.hour, 11) self.assertEqual(last_completed.minute, 11) self.assertEqual(last_completed.second, 11) last_started = saver.get_last_started_datetime() self.assertEqual(last_started.year, 2022) self.assertEqual(last_started.month, 2) self.assertEqual(last_started.day, 2) self.assertEqual(last_started.hour, 22) self.assertEqual(last_started.minute, 22) self.assertEqual(last_started.second, 22) saver.set_cursor_position(42) saver.commit() self.assertEqual(saver.get_cursor_position(), 42) self.assertEqual(saver.get_etag(), 'myetag') saver.update(cursor_position=100) saver.commit() self.assertEqual(saver.get_cursor_position(), 100) saver.delete() saver.commit() returned_obj = saver._get_db_obj_query() self.assertIsNone(returned_obj.first()) saver.close_session()
def _process_report_file(schema_name, report_path, compression): """ Task to process a Report. Args: schema_name (String) db schema name report_path (String) path to downloaded reports compression (String) 'PLAIN' or 'GZIP' Returns: None """ stmt = ('Processing Report:' ' schema_name: {},' ' report_path: {},' ' compression: {}') log_statement = stmt.format(schema_name, report_path, compression) LOG.info(log_statement) mem = psutil.virtual_memory() mem_msg = 'Avaiable memory: {} bytes ({}%)'.format(mem.free, mem.percent) LOG.info(mem_msg) file_name = report_path.split('/')[-1] stats_recorder = ReportStatsDBAccessor(file_name) stats_recorder.log_last_started_datetime() stats_recorder.commit() processor = ReportProcessor(schema_name=schema_name, report_path=report_path, compression=compression) processor.process() stats_recorder.log_last_completed_datetime() stats_recorder.commit() stats_recorder.close_session() files = remove_files.remove_temp_cur_files(path.dirname(report_path)) LOG.info('Temporary files removed: %s', str(files))
def test_log_last_completed_datetime(self): """Test convience function for last completed processing time.""" saver = ReportStatsDBAccessor('myreport') saver.commit() saver.log_last_completed_datetime() saver.commit() saver.remove() saver.commit() saver.close_session()
def test_remove_temp_cur_files(self): """Test to remove temporary usage report files.""" insights_local_dir = tempfile.mkdtemp() cluster_id = 'my-ocp-cluster' manifest_date = "2018-05-01" manifest_data = { "uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5", "cluster_id": cluster_id, "date": manifest_date } manifest = '{}/{}'.format(insights_local_dir, 'manifest.json') with open(manifest, 'w') as outfile: json.dump(manifest_data, outfile) file_list = [{ 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3) }, { 'file': '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=3) }, { 'file': '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=2) }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz', 'processed_date': datetime.datetime(year=2018, month=5, day=1) }, { 'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz', 'processed_date': None }] expected_delete_list = [] for item in file_list: path = '{}/{}'.format(insights_local_dir, item['file']) f = open(path, 'w') obj = self.manifest_accessor.get_manifest(self.assembly_id, self.provider_id) stats = ReportStatsDBAccessor(item['file'], obj.id) stats.update(last_completed_datetime=item['processed_date']) stats.commit() stats.close_session() f.close() if not item['file'].startswith( manifest_data.get('uuid')) and item['processed_date']: expected_delete_list.append(path) fake_dir = tempfile.mkdtemp() with patch.object(Config, 'INSIGHTS_LOCAL_REPORT_DIR', fake_dir): destination_dir = '{}/{}/{}'.format( fake_dir, cluster_id, month_date_range(parser.parse(manifest_date))) os.makedirs(destination_dir, exist_ok=True) removed_files = self.ocp_processor.remove_temp_cur_files( insights_local_dir) self.assertEqual(sorted(removed_files), sorted(expected_delete_list)) shutil.rmtree(insights_local_dir) shutil.rmtree(fake_dir)
def test_download_current_report(self): fake_report_date = datetime.today().replace(day=1) fake_report_end_date = fake_report_date + relativedelta(months=+1) report_range = '{}-{}'.format(fake_report_date.strftime('%Y%m%d'), fake_report_end_date.strftime('%Y%m%d')) # Moto setup conn = boto3.resource('s3', region_name=self.selected_region) conn.create_bucket(Bucket=self.fake_bucket_name) # push mocked csvs into Moto env fake_csv_files = {} for x in range(0, random.randint(2, 10)): csv_filename = '{}.csv'.format('-'.join( self.fake.words(random.randint(2, 5)))) # mocked report file definition fake_report_file = '{}/{}/{}/{}/{}'.format(self.fake_bucket_prefix, self.fake_report_name, report_range, uuid.uuid4(), csv_filename) fake_csv_files[csv_filename] = fake_report_file fake_csv_body = ','.join(self.fake.words(random.randint(5, 10))) conn.Object(self.fake_bucket_name, fake_report_file).put(Body=fake_csv_body) key = conn.Object(self.fake_bucket_name, fake_report_file).get() self.assertEqual(fake_csv_body, str(key['Body'].read(), 'utf-8')) # mocked Manifest definition selected_csv = random.choice(list(fake_csv_files.keys())) fake_object = '{}/{}/{}/{}-Manifest.json'.format( self.fake_bucket_prefix, self.fake_report_name, report_range, self.fake_report_name) fake_object_body = {'reportKeys': [fake_csv_files[selected_csv]]} # push mocked manifest into Moto env conn.Object(self.fake_bucket_name, fake_object).put(Body=json.dumps(fake_object_body)) key = conn.Object(self.fake_bucket_name, fake_object).get() self.assertEqual(fake_object_body, json.load(key['Body'])) # actual test out = self.report_downloader.download_current_report() files_list = [] for cur_dict in out: files_list.append(cur_dict['file']) self.assertIsNotNone(cur_dict['compression']) report_key = fake_object_body.get('reportKeys').pop() expected_assembly_id = utils.get_assembly_id_from_cur_key(report_key) expected_csv = '{}/{}/aws/{}/{}-{}'.format(DATA_DIR, self.fake_customer_name, self.fake_bucket_name, expected_assembly_id, selected_csv) self.assertEqual(files_list, [expected_csv]) # Verify etag is stored for cur_dict in out: cur_file = cur_dict['file'] file_name = cur_file.split('/')[-1] stats_recorder = ReportStatsDBAccessor(file_name) self.assertIsNotNone(stats_recorder.get_etag()) # Cleanup stats_recorder.remove() stats_recorder.commit() stats_recorder2 = ReportStatsDBAccessor(file_name) self.assertIsNone(stats_recorder2.get_etag()) stats_recorder.close_session() stats_recorder2.close_session()