Ejemplo n.º 1
0
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""

        context = self.ctx.corpus.storage.fetch_adapter(
            self.in_document._namespace).create_file_query_cache_context()
        try:
            full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
                self.entity_path, self.in_document)
            modified_time = await self.ctx.corpus._compute_last_modified_time_async(
                full_path, self)

            for pattern in self.data_partition_patterns:
                await pattern.file_status_check_async()

            for partition in self.data_partitions:
                await partition.file_status_check_async()

            self.last_file_status_check_time = datetime.now(timezone.utc)
            self.set_last_file_modified_time(
                time_utils._max_time(modified_time,
                                     self.last_file_modified_time))

            await self.report_most_recent_time_async(
                self.last_file_modified_time)
        finally:
            context.dispose()
Ejemplo n.º 2
0
    async def test_loads_and_sets_times_correctly(self):
        """Test modified times for manifest and files beneath it"""

        input_path = TestHelper.get_input_folder_path(self.tests_subpath, 'test_loads_and_sets_times_correctly')
        time_before_load = datetime.now(timezone.utc)

        cdm_corpus = self.get_corpus()
        cdm_corpus.storage.mount('someNamespace', LocalAdapter(input_path))
        cdm_corpus.storage.mount('local', LocalAdapter(input_path))
        cdm_corpus.storage.unmount('cdm')
        cdm_corpus.storage.default_namespace = 'local'
        cdm_manifest = await cdm_corpus.fetch_object_async('someNamespace:/default.manifest.cdm.json')
        status_time_at_load = cdm_manifest.last_file_status_check_time
        # hard coded because the time comes from inside the file
        self.assertEqual(time_utils._get_formatted_date_string(status_time_at_load), '2019-02-01T15:36:19.410Z')

        self.assertIsNotNone(cdm_manifest._file_system_modified_time)
        self.assertGreater(time_before_load, cdm_manifest._file_system_modified_time)

        time.sleep(1)

        await cdm_manifest.file_status_check_async()

        self.assertGreater(cdm_manifest.last_file_status_check_time, time_before_load)
        self.assertGreater(cdm_manifest.last_file_status_check_time, status_time_at_load)
        self.assertEqual(1, len(cdm_manifest.sub_manifests))
        self.assertGreater(cdm_manifest.sub_manifests[0].last_file_status_check_time, time_before_load)
        self.assertEqual(1, len(cdm_manifest.entities))
        self.assertEqual(1, len(cdm_manifest.entities[0].data_partitions))

        entity = cdm_manifest.entities[0]
        sub_manifest = cdm_manifest.sub_manifests[0]
        max_time = time_utils._max_time(entity.last_file_modified_time, sub_manifest.last_file_modified_time)
        self.assertEqual(time_utils._get_formatted_date_string(cdm_manifest.last_child_file_modified_time), time_utils._get_formatted_date_string(max_time))
Ejemplo n.º 3
0
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        with logger._enter_scope(self._TAG, self.ctx,
                                 self.file_status_check_async.__name__):
            adapter = self.ctx.corpus.storage.fetch_adapter(
                self.in_document.namespace)
            if adapter:
                context = adapter.create_file_query_cache_context()
                try:
                    modified_time = await self.ctx.corpus._fetch_last_modified_time_from_object_async(
                        self)

                    self.last_file_status_check_time = datetime.now(
                        timezone.utc)
                    if not self.last_file_modified_time:
                        self.last_file_modified_time = self._file_system_modified_time

                    # Reload the manifest if it has been updated in the file system.
                    if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time:
                        await self._reload_async()
                        self.last_file_modified_time = time_utils._max_time(
                            modified_time, self.last_file_modified_time)
                        self._file_system_modified_time = self.last_file_modified_time

                    for entity in self.entities:
                        await entity.file_status_check_async()

                    for sub_manifest in self.sub_manifests:
                        await sub_manifest.file_status_check_async()

                finally:
                    context.dispose()
Ejemplo n.º 4
0
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        full_path = self.ctx.corpus.storage.create_absolute_corpus_path(self.definition, self.in_document)
        modified_time = await cast('CdmCorpusDefinition', self.ctx.corpus)._compute_last_modified_time_async(full_path, self)

        self.last_file_status_check_time = datetime.now(timezone.utc)
        self.last_file_modified_time = time_utils._max_time(modified_time, self.last_file_modified_time)

        await self.report_most_recent_time_async(self.last_file_modified_time)
Ejemplo n.º 5
0
    async def report_most_recent_time_async(self,
                                            child_time: datetime) -> None:
        """Report most recent modified time (of current or children objects) to the parent object."""
        self.last_child_file_modified_time = child_time
        most_recent_at_this_level = time_utils._max_time(
            child_time, self.last_file_modified_time)

        if isinstance(self.owner, CdmFileStatus) and most_recent_at_this_level:
            await cast('CdmFileStatus',
                       self.owner).report_most_recent_time_async(
                           most_recent_at_this_level)
Ejemplo n.º 6
0
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
            self.location, self.in_document)
        modified_time = await self.ctx.corpus._fetch_last_modified_time_from_partition_path_async(
            full_path)

        # Update modified times.
        self.last_file_status_check_time = datetime.now(timezone.utc)
        self.last_file_modified_time = time_utils._max_time(
            modified_time, self.last_file_modified_time)

        await self.report_most_recent_time_async(self.last_file_modified_time)
Ejemplo n.º 7
0
    async def file_status_check_async(
        self,
        partition_file_status_check_type: Optional[
            'PartitionFileStatusCheckType'] = PartitionFileStatusCheckType.
        FULL,
        incremental_type: Optional[
            'CdmIncrementalPartitionType'] = CdmIncrementalPartitionType.NONE
    ) -> None:
        """
        Check the modified time for this object and any children.
        """
        with logger._enter_scope(self._TAG, self.ctx,
                                 self.file_status_check_async.__name__):
            adapter = self.ctx.corpus.storage.fetch_adapter(
                self.in_document._namespace)
            if adapter:
                context = adapter.create_file_query_cache_context()
                try:
                    modified_time = await self.ctx.corpus._get_last_modified_time_from_object_async(
                        self)

                    self.last_file_status_check_time = datetime.now(
                        timezone.utc)
                    if not self.last_file_modified_time:
                        self.last_file_modified_time = self._file_system_modified_time

                    # Reload the manifest if it has been updated in the file system.
                    if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time:
                        await self._reload_async()
                        self.last_file_modified_time = time_utils._max_time(
                            modified_time, self.last_file_modified_time)
                        self._file_system_modified_time = self.last_file_modified_time

                    for entity in self.entities:
                        from cdm.objectmodel import CdmLocalEntityDeclarationDefinition
                        if isinstance(
                                entity,
                                CdmReferencedEntityDeclarationDefinition):
                            await entity.file_status_check_async()
                        elif isinstance(entity,
                                        CdmLocalEntityDeclarationDefinition):
                            await cast(CdmLocalEntityDeclarationDefinition,
                                       entity).file_status_check_async(
                                           partition_file_status_check_type,
                                           incremental_type)

                    for sub_manifest in self.sub_manifests:
                        await sub_manifest.file_status_check_async()

                finally:
                    context.dispose()
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
            self.entity_path, self.in_document)
        modified_time = await self.ctx.corpus._compute_last_modified_time_async(
            full_path, self)

        for partition in self.data_partitions:
            await partition.file_status_check_async()

        for pattern in self.data_partition_patterns:
            await pattern.file_status_check_async()

        self.last_file_status_check_time = datetime.now(timezone.utc)
        self.last_file_modified_time = time_utils._max_time(
            modified_time, self.last_file_modified_time)

        await self.report_most_recent_time_async(self.last_file_modified_time)
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        modified_time = await self.ctx.corpus._fetch_last_modified_time_from_object_async(
            self)

        for entity in self.entities:
            await entity.file_status_check_async()

        for sub_manifest in self.sub_manifests:
            await sub_manifest.file_status_check_async()

        self.last_file_status_check_time = datetime.now(timezone.utc)
        if not self.last_file_modified_time:
            self.last_file_modified_time = self._file_system_modified_time

        # Reload the manifest if it has been updated in the file system.
        if modified_time and self._file_system_modified_time and modified_time != self._file_system_modified_time:
            await self._reload_async()
            self.last_file_modified_time = time_utils._max_time(
                modified_time, self.last_file_modified_time)
            self._file_system_modified_time = self.last_file_modified_time
Ejemplo n.º 10
0
 async def report_most_recent_time_async(self,
                                         child_time: datetime) -> None:
     """Report most recent modified time (of current or children objects) to the parent object."""
     if child_time:
         self.last_child_file_modified_time = time_utils._max_time(
             child_time, self.last_child_file_modified_time)
    async def file_status_check_async(
        self,
        partition_file_status_check_type: Optional[
            'PartitionFileStatusCheckType'] = PartitionFileStatusCheckType.
        FULL,
        incremental_type: Optional[
            'CdmIncrementalPartitionType'] = CdmIncrementalPartitionType.NONE
    ) -> None:
        """Check the modified time for this object and any children."""

        context = self.ctx.corpus.storage.fetch_adapter(
            self.in_document._namespace).create_file_query_cache_context()
        try:
            full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
                self.entity_path, self.in_document)
            modified_time = await self.ctx.corpus._compute_last_modified_time_async(
                full_path, self)

            # check patterns first as this is a more performant way of querying file modification times
            # from ADLS and we can cache the times for reuse in the individual partition checks below

            if partition_file_status_check_type == partition_file_status_check_type.FULL or partition_file_status_check_type == PartitionFileStatusCheckType.FULL_AND_INCREMENTAL:
                from cdm.objectmodel import CdmDataPartitionDefinition, CdmDataPartitionPatternDefinition
                for pattern in self.data_partition_patterns:
                    if pattern.is_incremental:
                        logger.error(
                            pattern.ctx, self._TAG,
                            self.file_status_check_async.__name__,
                            pattern.at_corpus_path, CdmLogCode.
                            ERR_UNEXPECTED_INCREMENTAL_PARTITION_TRAIT,
                            CdmDataPartitionPatternDefinition.__name__,
                            pattern.fetch_object_definition_name(),
                            Constants._INCREMENTAL_TRAIT_NAME,
                            CdmLocalEntityDeclarationDefinition.
                            data_partition_patterns.fget.__name__)
                    else:
                        await pattern.file_status_check_async()

                for partition in self.data_partitions:
                    if partition.is_incremental:
                        logger.error(
                            partition.ctx, self._TAG,
                            self.file_status_check_async.__name__,
                            partition.at_corpus_path, CdmLogCode.
                            ERR_UNEXPECTED_INCREMENTAL_PARTITION_TRAIT,
                            CdmDataPartitionDefinition.__name__,
                            partition.fetch_object_definition_name(),
                            Constants._INCREMENTAL_TRAIT_NAME,
                            CdmLocalEntityDeclarationDefinition.
                            data_partitions.fget.__name__)
                    else:
                        await partition.file_status_check_async()

            if partition_file_status_check_type == partition_file_status_check_type.INCREMENTAL or partition_file_status_check_type == PartitionFileStatusCheckType.FULL_AND_INCREMENTAL:
                for pattern in self.incremental_partition_patterns:
                    if self._should_call_file_status_check(
                            incremental_type, True, pattern):
                        await pattern.file_status_check_async()

                for partition in self.incremental_partitions:
                    if self._should_call_file_status_check(
                            incremental_type, False, partition):
                        await partition.file_status_check_async()

            # update modified times
            self.last_file_status_check_time = datetime.now(timezone.utc)
            self.set_last_file_modified_time(
                time_utils._max_time(modified_time,
                                     self.last_file_modified_time))

            await self.report_most_recent_time_async(
                self.last_file_modified_time)
        finally:
            context.dispose()