async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" context = self.ctx.corpus.storage.fetch_adapter( self.in_document._namespace).create_file_query_cache_context() try: full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.entity_path, self.in_document) modified_time = await self.ctx.corpus._compute_last_modified_time_async( full_path, self) for pattern in self.data_partition_patterns: await pattern.file_status_check_async() for partition in self.data_partitions: await partition.file_status_check_async() self.last_file_status_check_time = datetime.now(timezone.utc) self.set_last_file_modified_time( time_utils._max_time(modified_time, self.last_file_modified_time)) await self.report_most_recent_time_async( self.last_file_modified_time) finally: context.dispose()
async def test_loads_and_sets_times_correctly(self): """Test modified times for manifest and files beneath it""" input_path = TestHelper.get_input_folder_path(self.tests_subpath, 'test_loads_and_sets_times_correctly') time_before_load = datetime.now(timezone.utc) cdm_corpus = self.get_corpus() cdm_corpus.storage.mount('someNamespace', LocalAdapter(input_path)) cdm_corpus.storage.mount('local', LocalAdapter(input_path)) cdm_corpus.storage.unmount('cdm') cdm_corpus.storage.default_namespace = 'local' cdm_manifest = await cdm_corpus.fetch_object_async('someNamespace:/default.manifest.cdm.json') status_time_at_load = cdm_manifest.last_file_status_check_time # hard coded because the time comes from inside the file self.assertEqual(time_utils._get_formatted_date_string(status_time_at_load), '2019-02-01T15:36:19.410Z') self.assertIsNotNone(cdm_manifest._file_system_modified_time) self.assertGreater(time_before_load, cdm_manifest._file_system_modified_time) time.sleep(1) await cdm_manifest.file_status_check_async() self.assertGreater(cdm_manifest.last_file_status_check_time, time_before_load) self.assertGreater(cdm_manifest.last_file_status_check_time, status_time_at_load) self.assertEqual(1, len(cdm_manifest.sub_manifests)) self.assertGreater(cdm_manifest.sub_manifests[0].last_file_status_check_time, time_before_load) self.assertEqual(1, len(cdm_manifest.entities)) self.assertEqual(1, len(cdm_manifest.entities[0].data_partitions)) entity = cdm_manifest.entities[0] sub_manifest = cdm_manifest.sub_manifests[0] max_time = time_utils._max_time(entity.last_file_modified_time, sub_manifest.last_file_modified_time) self.assertEqual(time_utils._get_formatted_date_string(cdm_manifest.last_child_file_modified_time), time_utils._get_formatted_date_string(max_time))
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): adapter = self.ctx.corpus.storage.fetch_adapter( self.in_document.namespace) if adapter: context = adapter.create_file_query_cache_context() try: modified_time = await self.ctx.corpus._fetch_last_modified_time_from_object_async( self) self.last_file_status_check_time = datetime.now( timezone.utc) if not self.last_file_modified_time: self.last_file_modified_time = self._file_system_modified_time # Reload the manifest if it has been updated in the file system. if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time: await self._reload_async() self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) self._file_system_modified_time = self.last_file_modified_time for entity in self.entities: await entity.file_status_check_async() for sub_manifest in self.sub_manifests: await sub_manifest.file_status_check_async() finally: context.dispose()
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" full_path = self.ctx.corpus.storage.create_absolute_corpus_path(self.definition, self.in_document) modified_time = await cast('CdmCorpusDefinition', self.ctx.corpus)._compute_last_modified_time_async(full_path, self) self.last_file_status_check_time = datetime.now(timezone.utc) self.last_file_modified_time = time_utils._max_time(modified_time, self.last_file_modified_time) await self.report_most_recent_time_async(self.last_file_modified_time)
async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" self.last_child_file_modified_time = child_time most_recent_at_this_level = time_utils._max_time( child_time, self.last_file_modified_time) if isinstance(self.owner, CdmFileStatus) and most_recent_at_this_level: await cast('CdmFileStatus', self.owner).report_most_recent_time_async( most_recent_at_this_level)
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.location, self.in_document) modified_time = await self.ctx.corpus._fetch_last_modified_time_from_partition_path_async( full_path) # Update modified times. self.last_file_status_check_time = datetime.now(timezone.utc) self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) await self.report_most_recent_time_async(self.last_file_modified_time)
async def file_status_check_async( self, partition_file_status_check_type: Optional[ 'PartitionFileStatusCheckType'] = PartitionFileStatusCheckType. FULL, incremental_type: Optional[ 'CdmIncrementalPartitionType'] = CdmIncrementalPartitionType.NONE ) -> None: """ Check the modified time for this object and any children. """ with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): adapter = self.ctx.corpus.storage.fetch_adapter( self.in_document._namespace) if adapter: context = adapter.create_file_query_cache_context() try: modified_time = await self.ctx.corpus._get_last_modified_time_from_object_async( self) self.last_file_status_check_time = datetime.now( timezone.utc) if not self.last_file_modified_time: self.last_file_modified_time = self._file_system_modified_time # Reload the manifest if it has been updated in the file system. if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time: await self._reload_async() self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) self._file_system_modified_time = self.last_file_modified_time for entity in self.entities: from cdm.objectmodel import CdmLocalEntityDeclarationDefinition if isinstance( entity, CdmReferencedEntityDeclarationDefinition): await entity.file_status_check_async() elif isinstance(entity, CdmLocalEntityDeclarationDefinition): await cast(CdmLocalEntityDeclarationDefinition, entity).file_status_check_async( partition_file_status_check_type, incremental_type) for sub_manifest in self.sub_manifests: await sub_manifest.file_status_check_async() finally: context.dispose()
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.entity_path, self.in_document) modified_time = await self.ctx.corpus._compute_last_modified_time_async( full_path, self) for partition in self.data_partitions: await partition.file_status_check_async() for pattern in self.data_partition_patterns: await pattern.file_status_check_async() self.last_file_status_check_time = datetime.now(timezone.utc) self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) await self.report_most_recent_time_async(self.last_file_modified_time)
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" modified_time = await self.ctx.corpus._fetch_last_modified_time_from_object_async( self) for entity in self.entities: await entity.file_status_check_async() for sub_manifest in self.sub_manifests: await sub_manifest.file_status_check_async() self.last_file_status_check_time = datetime.now(timezone.utc) if not self.last_file_modified_time: self.last_file_modified_time = self._file_system_modified_time # Reload the manifest if it has been updated in the file system. if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time: await self._reload_async() self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) self._file_system_modified_time = self.last_file_modified_time
async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if child_time: self.last_child_file_modified_time = time_utils._max_time( child_time, self.last_child_file_modified_time)
async def file_status_check_async( self, partition_file_status_check_type: Optional[ 'PartitionFileStatusCheckType'] = PartitionFileStatusCheckType. FULL, incremental_type: Optional[ 'CdmIncrementalPartitionType'] = CdmIncrementalPartitionType.NONE ) -> None: """Check the modified time for this object and any children.""" context = self.ctx.corpus.storage.fetch_adapter( self.in_document._namespace).create_file_query_cache_context() try: full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.entity_path, self.in_document) modified_time = await self.ctx.corpus._compute_last_modified_time_async( full_path, self) # check patterns first as this is a more performant way of querying file modification times # from ADLS and we can cache the times for reuse in the individual partition checks below if partition_file_status_check_type == partition_file_status_check_type.FULL or partition_file_status_check_type == PartitionFileStatusCheckType.FULL_AND_INCREMENTAL: from cdm.objectmodel import CdmDataPartitionDefinition, CdmDataPartitionPatternDefinition for pattern in self.data_partition_patterns: if pattern.is_incremental: logger.error( pattern.ctx, self._TAG, self.file_status_check_async.__name__, pattern.at_corpus_path, CdmLogCode. ERR_UNEXPECTED_INCREMENTAL_PARTITION_TRAIT, CdmDataPartitionPatternDefinition.__name__, pattern.fetch_object_definition_name(), Constants._INCREMENTAL_TRAIT_NAME, CdmLocalEntityDeclarationDefinition. data_partition_patterns.fget.__name__) else: await pattern.file_status_check_async() for partition in self.data_partitions: if partition.is_incremental: logger.error( partition.ctx, self._TAG, self.file_status_check_async.__name__, partition.at_corpus_path, CdmLogCode. ERR_UNEXPECTED_INCREMENTAL_PARTITION_TRAIT, CdmDataPartitionDefinition.__name__, partition.fetch_object_definition_name(), Constants._INCREMENTAL_TRAIT_NAME, CdmLocalEntityDeclarationDefinition. data_partitions.fget.__name__) else: await partition.file_status_check_async() if partition_file_status_check_type == partition_file_status_check_type.INCREMENTAL or partition_file_status_check_type == PartitionFileStatusCheckType.FULL_AND_INCREMENTAL: for pattern in self.incremental_partition_patterns: if self._should_call_file_status_check( incremental_type, True, pattern): await pattern.file_status_check_async() for partition in self.incremental_partitions: if self._should_call_file_status_check( incremental_type, False, partition): await partition.file_status_check_async() # update modified times self.last_file_status_check_time = datetime.now(timezone.utc) self.set_last_file_modified_time( time_utils._max_time(modified_time, self.last_file_modified_time)) await self.report_most_recent_time_async( self.last_file_modified_time) finally: context.dispose()