def _run_crawler(self, config): """Runs the crawler with a specific InventoryConfig. Args: config (InventoryConfig): The configuration to test. Returns: dict: the resource counts returned by the crawler. """ # Mock download to return correct test data file def _fake_download(full_bucket_path, output_file): if 'resource' in full_bucket_path: fake_file = os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_resources.dump') elif 'iam_policy' in full_bucket_path: fake_file = os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_iam_policies.dump') with open(fake_file, 'rb') as f: output_file.write(f.read()) with MemoryStorage() as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp() as gcp_mocks: gcp_mocks.mock_storage.download.side_effect = _fake_download run_crawler(storage, progresser, config, parallel=False, threads=1) self.assertEqual(0, progresser.errors, 'No errors should have occurred') return self._get_resource_counts_from_storage(storage)
def _run_crawler(self, config, has_org_access=True): """Runs the crawler with a specific InventoryConfig. Args: config (InventoryConfig): The configuration to test. has_org_access (bool): True if crawler has access to the org resource. client (object): An API Client implementation, used for CAI testing. Returns: dict: the resource counts returned by the crawler. """ with MemoryStorage() as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp(has_org_access=has_org_access): run_crawler(storage, progresser, config, parallel=False, threads=1) self.assertEqual(0, progresser.errors, 'No errors should have occurred') return self._get_resource_counts_from_storage(storage)
def _run_crawler(self, config, has_org_access=True, session=None): """Runs the crawler with a specific InventoryConfig. Args: config (InventoryConfig): The configuration to test. has_org_access (bool): True if crawler has access to the org resource. session (object): An existing sql session, required for testing Cloud Asset API integration. Returns: dict: the resource counts returned by the crawler. """ with MemoryStorage(session=session) as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp(has_org_access=has_org_access): run_crawler(storage, progresser, config, parallel=True) self.assertEqual(0, progresser.errors, 'No errors should have occurred') return self._get_resource_counts_from_storage(storage)
def main(): """Create CAI dump files from fake data.""" logger.enable_console_log() config = InventoryConfig(gcp_api_mocks.ORGANIZATION_ID, '', {}, '', {'enabled': False}) service_config = TestServiceConfig('sqlite', config) config.set_service_config(service_config) resources = [] iam_policies = [] with MemoryStorage() as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp(): run_crawler(storage, progresser, config, parallel=False) for item in storage.mem.values(): (resource, iam_policy) = convert_item_to_assets(item) if resource: resources.append(resource) if iam_policy: iam_policies.append(iam_policy) with open(ADDITIONAL_RESOURCES_FILE, 'r') as f: for line in f: if line.startswith('#'): continue resources.append(line.strip()) with open(ADDITIONAL_IAM_POLCIIES_FILE, 'r') as f: for line in f: if line.startswith('#'): continue iam_policies.append(line.strip()) write_data(resources, RESOURCE_DUMP_FILE) write_data(iam_policies, IAM_POLICY_DUMP_FILE)
def run_inventory(service_config, queue, session, progresser, background): """Runs the inventory given the environment configuration. Args: service_config (object): Service configuration. queue (object): Queue to push status updates into. session (object): Database session. progresser (object): Progresser implementation to use. background (bool): whether to run the inventory in background Returns: QueueProgresser: Returns the result of the crawl. Raises: Exception: Reraises any exception. """ storage_cls = service_config.get_storage_class() with storage_cls(session) as storage: try: progresser.inventory_index_id = storage.inventory_index.id progresser.final_message = True if background else False queue.put(progresser) result = run_crawler(storage, progresser, service_config.get_inventory_config()) except Exception as e: LOGGER.exception(e) storage.rollback() raise else: storage.commit() return result
def test_crawl_cai_data_with_asset_types(self): """Validate including asset_types in the CAI inventory config works.""" asset_types = [ 'cloudresourcemanager.googleapis.com/Folder', 'cloudresourcemanager.googleapis.com/Organization', 'cloudresourcemanager.googleapis.com/Project' ] inventory_config = InventoryConfig(gcp_api_mocks.ORGANIZATION_ID, '', {}, 0, { 'enabled': True, 'gcs_path': 'gs://test-bucket', 'asset_types': asset_types }) inventory_config.set_service_config(FakeServerConfig('fake_engine')) # Create subsets of the mock resource dumps that only contain the # filtered asset types filtered_assets = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_resources.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_assets.append(line) filtered_assets = ''.join(filtered_assets) filtered_iam = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_iam_policies.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_iam.append(line) filtered_iam = ''.join(filtered_iam) filtered_org = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_org_policies.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_org.append(line) filtered_org = ''.join(filtered_org) filtered_access = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_access_policies.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_access.append(line) filtered_access = ''.join(filtered_access) with unittest_utils.create_temp_file(filtered_assets) as resources: with unittest_utils.create_temp_file(filtered_iam) as iam_policies: with unittest_utils.create_temp_file( filtered_org) as org_policies: with unittest_utils.create_temp_file( filtered_access) as access_policies: # Mock download to return correct test data file def _fake_download(full_bucket_path, output_file): if 'resource' in full_bucket_path: fake_file = resources elif 'iam_policy' in full_bucket_path: fake_file = iam_policies elif 'org_policy' in full_bucket_path: fake_file = org_policies elif 'access_policy' in full_bucket_path: fake_file = access_policies with open(fake_file, 'rb') as f: output_file.write(f.read()) with MemoryStorage() as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp() as gcp_mocks: gcp_mocks.mock_storage.download.side_effect = ( _fake_download) run_crawler(storage, progresser, inventory_config) # Validate export_assets called with asset_types expected_calls = [ mock.call(gcp_api_mocks.ORGANIZATION_ID, output_config=mock.ANY, content_type='RESOURCE', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY), mock.call(gcp_api_mocks.ORGANIZATION_ID, output_config=mock.ANY, content_type='IAM_POLICY', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY), mock.call(gcp_api_mocks.ORGANIZATION_ID, output_config=mock.ANY, content_type='ORG_POLICY', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY), mock.call(gcp_api_mocks.ORGANIZATION_ID, output_config=mock.ANY, content_type='ACCESS_POLICY', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY) ] (gcp_mocks.mock_cloudasset.export_assets. assert_has_calls(expected_calls, any_order=True)) self.assertEqual(0, progresser.errors, 'No errors should have occurred') result_counts = self._get_resource_counts_from_storage( storage) expected_counts = { 'crm_access_level': { 'resource': 3 }, 'crm_access_policy': { 'resource': 1 }, 'crm_org_policy': { 'resource': 3 }, 'crm_service_perimeter': { 'resource': 1 }, 'folder': { 'iam_policy': 3, 'resource': 3 }, 'gsuite_group': { 'resource': 4 }, 'gsuite_group_member': { 'resource': 1 }, 'gsuite_groups_settings': { 'resource': 4 }, 'gsuite_user': { 'resource': 4 }, 'gsuite_user_member': { 'resource': 3 }, 'lien': { 'resource': 1 }, 'organization': { 'iam_policy': 1, 'resource': 1 }, 'project': { 'billing_info': 4, 'enabled_apis': 4, 'iam_policy': 4, 'resource': 4 }, 'role': { 'resource': 18 }, 'sink': { 'resource': 6 }, } self.assertEqual(expected_counts, result_counts)
def test_crawl_cai_data_with_asset_types(self): """Validate including asset_types in the CAI inventory config works.""" asset_types = [ 'cloudresourcemanager.googleapis.com/Folder', 'cloudresourcemanager.googleapis.com/Organization', 'cloudresourcemanager.googleapis.com/Project' ] inventory_config = InventoryConfig(gcp_api_mocks.ORGANIZATION_ID, '', {}, 0, { 'enabled': True, 'gcs_path': 'gs://test-bucket', 'asset_types': asset_types }) inventory_config.set_service_config(FakeServerConfig(self.engine)) # Create subsets of the mock resource dumps that only contain the # filtered asset types filtered_assets = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_resources.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_assets.append(line) filtered_assets = ''.join(filtered_assets) filtered_iam = [] with open( os.path.join(TEST_RESOURCE_DIR_PATH, 'mock_cai_iam_policies.dump'), 'r') as f: for line in f: if any('"%s"' % asset_type in line for asset_type in asset_types): filtered_iam.append(line) filtered_iam = ''.join(filtered_iam) with unittest_utils.create_temp_file(filtered_assets) as resources: with unittest_utils.create_temp_file(filtered_iam) as iam_policies: def _copy_file_from_gcs(file_path, *args, **kwargs): """Fake copy_file_from_gcs.""" del args, kwargs if 'resource' in file_path: return resources elif 'iam_policy' in file_path: return iam_policies self.mock_copy_file_from_gcs.side_effect = _copy_file_from_gcs with MemoryStorage(session=self.session) as storage: progresser = NullProgresser() with gcp_api_mocks.mock_gcp() as gcp_mocks: run_crawler(storage, progresser, inventory_config) # Validate export_assets called with asset_types expected_calls = [ mock.call(gcp_api_mocks.ORGANIZATION_ID, mock.ANY, content_type='RESOURCE', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY), mock.call(gcp_api_mocks.ORGANIZATION_ID, mock.ANY, content_type='IAM_POLICY', asset_types=asset_types, blocking=mock.ANY, timeout=mock.ANY) ] (gcp_mocks.mock_cloudasset.export_assets. assert_has_calls(expected_calls, any_order=True)) self.assertEqual(0, progresser.errors, 'No errors should have occurred') result_counts = self._get_resource_counts_from_storage( storage) expected_counts = { 'crm_org_policy': { 'resource': 5 }, 'folder': { 'iam_policy': 3, 'resource': 3 }, 'gsuite_group': { 'resource': 4 }, 'gsuite_group_member': { 'resource': 1 }, 'gsuite_groups_settings': { 'resource': 4 }, 'gsuite_user': { 'resource': 4 }, 'gsuite_user_member': { 'resource': 3 }, 'kubernetes_cluster': { 'resource': 1, 'service_config': 1 }, 'lien': { 'resource': 1 }, 'organization': { 'iam_policy': 1, 'resource': 1 }, 'project': { 'billing_info': 4, 'enabled_apis': 4, 'iam_policy': 4, 'resource': 4 }, 'role': { 'resource': 18 }, 'sink': { 'resource': 6 }, } self.assertEqual(expected_counts, result_counts)