예제 #1
0
    def to_data(instance: 'CdmDataPartitionDefinition',
                res_opt: 'ResolveOptions',
                options: 'CopyOptions') -> 'DataPartition':
        data_partition = DataPartition()

        data_partition.location = instance.location
        data_partition.name = instance.name
        data_partition.specializedSchema = instance.specialized_schema
        data_partition.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        data_partition.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)
        data_partition.exhibitsTraits = copy_data_utils._array_copy_data(
            res_opt, instance.exhibits_traits, options)

        arguments = []
        if instance.arguments:
            for argument_name, argument_list in instance.arguments.items():
                for argument_value in argument_list:
                    argument = Argument()
                    argument.name = argument_name
                    argument.value = argument_value
                    arguments.append(argument)

        if len(arguments) > 0:
            data_partition.arguments = arguments

        return data_partition
    def to_data(instance: CdmLocalEntityDeclarationDefinition,
                res_opt: 'ResolveOptions', options: 'CopyOptions'):
        local_entity = LocalEntityDeclaration()

        local_entity.entityName = instance.entity_name
        local_entity.explanation = instance.explanation
        local_entity.entityPath = instance.entity_path
        local_entity.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        local_entity.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)
        local_entity.lastChildFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_child_file_modified_time)
        local_entity.exhibitsTraits = copy_data_utils._array_copy_data(
            res_opt, instance.exhibits_traits, options)
        local_entity.dataPartitions = copy_data_utils._array_copy_data(
            res_opt, instance.data_partitions, options,
            LocalEntityDeclarationPersistence.ensure_non_incremental(instance))
        local_entity.dataPartitionPatterns = copy_data_utils._array_copy_data(
            res_opt, instance.data_partition_patterns, options,
            LocalEntityDeclarationPersistence.ensure_non_incremental(instance))
        local_entity.incrementalPartitions = copy_data_utils._array_copy_data(
            res_opt, instance.incremental_partitions, options,
            LocalEntityDeclarationPersistence.ensure_incremental(instance))
        local_entity.incrementalPartitionPatterns = copy_data_utils._array_copy_data(
            res_opt, instance.incremental_partition_patterns, options,
            LocalEntityDeclarationPersistence.ensure_incremental(instance))

        return local_entity
예제 #3
0
    async def test_loads_and_sets_times_correctly(self):
        """Test modified times for manifest and files beneath it"""

        input_path = TestHelper.get_input_folder_path(self.tests_subpath, 'test_loads_and_sets_times_correctly')
        time_before_load = datetime.now(timezone.utc)

        cdm_corpus = self.get_corpus()
        cdm_corpus.storage.mount('someNamespace', LocalAdapter(input_path))
        cdm_corpus.storage.mount('local', LocalAdapter(input_path))
        cdm_corpus.storage.unmount('cdm')
        cdm_corpus.storage.default_namespace = 'local'
        cdm_manifest = await cdm_corpus.fetch_object_async('someNamespace:/default.manifest.cdm.json')
        status_time_at_load = cdm_manifest.last_file_status_check_time
        # hard coded because the time comes from inside the file
        self.assertEqual(time_utils._get_formatted_date_string(status_time_at_load), '2019-02-01T15:36:19.410Z')

        self.assertIsNotNone(cdm_manifest._file_system_modified_time)
        self.assertGreater(time_before_load, cdm_manifest._file_system_modified_time)

        time.sleep(1)

        await cdm_manifest.file_status_check_async()

        self.assertGreater(cdm_manifest.last_file_status_check_time, time_before_load)
        self.assertGreater(cdm_manifest.last_file_status_check_time, status_time_at_load)
        self.assertEqual(1, len(cdm_manifest.sub_manifests))
        self.assertGreater(cdm_manifest.sub_manifests[0].last_file_status_check_time, time_before_load)
        self.assertEqual(1, len(cdm_manifest.entities))
        self.assertEqual(1, len(cdm_manifest.entities[0].data_partitions))

        entity = cdm_manifest.entities[0]
        sub_manifest = cdm_manifest.sub_manifests[0]
        max_time = time_utils._max_time(entity.last_file_modified_time, sub_manifest.last_file_modified_time)
        self.assertEqual(time_utils._get_formatted_date_string(cdm_manifest.last_child_file_modified_time), time_utils._get_formatted_date_string(max_time))
    def to_data(instance: CdmManifestDefinition, res_opt: ResolveOptions,
                options: CopyOptions) -> ManifestContent:
        manifest = ManifestContent()

        manifest.manifestName = instance.manifest_name
        manifest.schema = instance.schema
        manifest.jsonSchemaSemanticVersion = instance.json_schema_semantic_version
        manifest.documentVersion = instance.document_version
        manifest.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        manifest.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)
        manifest.lastChildFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_child_file_modified_time)
        manifest.explanation = instance.explanation
        manifest.exhibitsTraits = copy_data_utils._array_copy_data(
            res_opt, instance.exhibits_traits, options)
        manifest.entities = copy_data_utils._array_copy_data(
            res_opt, instance.entities, options)
        manifest.subManifests = copy_data_utils._array_copy_data(
            res_opt, instance.sub_manifests, options)
        manifest.imports = copy_data_utils._array_copy_data(
            res_opt, instance.imports, options)
        manifest.relationships = copy_data_utils._array_copy_data(
            res_opt, instance.relationships, options)

        return manifest
예제 #5
0
    def create_database_propertybags(instance: CdmManifestDefinition, res_opt: ResolveOptions, options: CopyOptions):
        properties = {}
        last_file_status_check_time = time_utils._get_formatted_date_string(instance.last_file_status_check_time)
        last_file_modified_time = time_utils._get_formatted_date_string(instance.last_file_modified_time)
        last_child_file_modified_time = time_utils._get_formatted_date_string(instance.last_child_file_modified_time)

        if last_file_status_check_time is not None:
            properties["cdm:lastFileStatusCheckTime"] = last_file_status_check_time

        if last_file_modified_time is not None:
            properties["cdm:lastFileModifiedTime"] = last_file_modified_time

        if last_child_file_modified_time is not None:
            properties["cdm:lastChildFileModifiedTime"] = last_child_file_modified_time

        if instance.schema is not None:
            properties["cdm:schema"] = instance.schema

        if instance.document_version is not None:
            properties["cdm:documentVersion"] = instance.document_version

        if instance.json_schema_semantic_version is not None:
            properties["cdm:jsonSchemaSemanticVersion"] = instance.json_schema_semantic_version

        if instance.imports is not None and len(instance.imports) > 0:
            properties["cdm:imports"] = copy_data_utils._array_copy_data(res_opt, instance.imports, options)

        if instance.exhibits_traits is not None and len(instance.exhibits_traits) > 0:
            properties["cdm:traits"] = copy_data_utils._array_copy_data(res_opt, instance.exhibits_traits, options)

        return properties
    def to_data(instance: CdmManifestDeclarationDefinition,
                res_opt: ResolveOptions,
                options: CopyOptions) -> ManifestDeclaration:
        data = ManifestDeclaration()

        data.manifestName = instance.manifest_name
        data.definition = instance.definition
        data.explanation = instance.explanation
        data.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        data.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)

        return data
예제 #7
0
 def test_load_folder_with_no_entity_folders(self):
     """Testing for manifest impl instance with no entities and no sub manifests."""
     test_name = 'test_load_folder_with_no_entity_folders'
     corpus = self.get_corpus()
     content = TestHelper.get_input_file_content(self.tests_subpath,
                                                 test_name,
                                                 'empty.manifest.cdm.json')
     cdm_manifest = ManifestPersistence.from_object(
         corpus.ctx, 'cdmTest', 'someNamespace', '/',
         ManifestContent().decode(content))
     self.assertEqual(cdm_manifest.schema, 'CdmManifestDefinition.cdm.json')
     self.assertEqual(cdm_manifest.manifest_name, 'cdmTest')
     self.assertEqual(cdm_manifest.json_schema_semantic_version, '1.0.0')
     self.assertEqual(
         time_utils._get_formatted_date_string(
             cdm_manifest.last_file_modified_time),
         '2008-09-15T23:53:23.000Z')
     self.assertEqual(cdm_manifest.explanation,
                      'test cdm folder for cdm version 1.0+')
     self.assertEqual(1, len(cdm_manifest.imports))
     self.assertEqual(cdm_manifest.imports[0].corpus_path,
                      '/primitives.cdm.json')
     self.assertEqual(0, len(cdm_manifest.entities))
     self.assertEqual(1, len(cdm_manifest.exhibits_traits))
     self.assertEqual(0, len(cdm_manifest.sub_manifests))
예제 #8
0
def _log(level: 'CdmStatusLevel', ctx: 'CdmCorpusContext', tag: str,
         message: str, path: str, default_status_event: Callable) -> None:
    """
    Log to the specified status level by using the status event on the corpus context (if it exists) or to the default logger.
    The log level, tag, message and path values are also added as part of a new entry to the log recorder.
    """
    #  Write message to the configured logger
    if level >= ctx.report_at_level:
        timestamp = time_utils._get_formatted_date_string(datetime.utcnow())

        # Store a record of the event.
        # Save some dict init and string formatting cycles by checking
        # whether the recording is actually enabled.
        if ctx.events.is_recording:
            event = {
                'timestamp': timestamp,
                'level': level.name,
                'tag': tag,
                'message': message,
                'path': path
            }
            if ctx.correlation_id is not None:
                event['correlationId'] = ctx.correlation_id
            ctx.events.append(event)

        formatted_message = _format_message(tag, message, path,
                                            ctx.correlation_id)

        if ctx and ctx.status_event:
            ctx.status_event(level, formatted_message)
        else:
            default_status_event(formatted_message)
    def to_data(instance: CdmReferencedEntityDeclarationDefinition,
                res_opt: ResolveOptions,
                options: CopyOptions) -> ReferencedEntityDeclaration:
        data = ReferencedEntityDeclaration()

        data.entityName = instance.entity_name
        data.explanation = instance.explanation
        data.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        data.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)
        data.entityPath = instance.entity_path
        data.exhibitsTraits = copy_data_utils._array_copy_data(
            res_opt, instance.exhibits_traits, options)

        return data
예제 #10
0
    def to_data(instance: 'CdmDataPartitionDefinition',
                obj: 'StorageDescriptor', res_opt: 'ResolveOptions',
                options: 'CopyOptions') -> 'StorageDescriptor':
        obj.properties = {}

        if instance.name is not None:
            obj.properties['cdm:name'] = instance.name
        if instance.last_file_status_check_time is not None:
            obj.properties[
                'cdm:lastFileStatusCheckTime'] = time_utils._get_formatted_date_string(
                    instance.last_file_status_check_time)
        if instance.last_file_modified_time is not None:
            obj.properties[
                'cdm:lastFileModifiedTime'] = time_utils._get_formatted_date_string(
                    instance.last_file_modified_time)
        if instance.exhibits_traits is not None:
            tpm = TraitToPropertyMap(instance)
            csv_trait = tpm._fetch_trait_reference('is.partition.format.CSV')
            if csv_trait is not None:
                instance.exhibits_traits.remove('is.partition.format.CSV')
            if len(instance.exhibits_traits) > 0:
                obj.properties[
                    'cdm:traits'] = copy_data_utils._array_copy_data(
                        res_opt, instance.exhibits_traits, options)
            if csv_trait is not None:
                instance.exhibits_traits.append(csv_trait)

        properties = DataPartitionPersistence.fill_property_bag_from_csv_trait(
            instance)

        if properties is not None:
            obj.format = FormatInfo(
                input_format=InputFormat.
                orgapachehadoopmapred_sequence_file_input_format,
                output_format=OutputFormat.
                orgapachehadoophiveqlio_hive_sequence_file_output_format,
                serialize_lib=SerializeLib.
                orgapachehadoophiveserde2lazy_lazy_simple_ser_de,
                format_type=FormatType.csv,
                properties=properties)
        else:
            #error
            return None

        return obj
예제 #11
0
    def to_data(instance: CdmDataPartitionPatternDefinition,
                res_opt: ResolveOptions,
                options: CopyOptions) -> DataPartitionPattern:
        data = DataPartitionPattern()

        data.name = instance.name
        data.lastFileStatusCheckTime = time_utils._get_formatted_date_string(
            instance.last_file_status_check_time)
        data.lastFileModifiedTime = time_utils._get_formatted_date_string(
            instance.last_file_modified_time)
        data.explanation = instance.explanation
        data.rootLocation = instance.root_location
        data.regularExpression = instance.regular_expression
        data.parameters = instance.parameters
        data.specializedSchema = instance.specialized_schema
        data.exhibitsTraits = copy_data_utils._array_copy_data(
            res_opt, instance.exhibits_traits, options)

        return data
예제 #12
0
파일: logger.py 프로젝트: rt112000/CDM
def _log(level: 'CdmStatusLevel',
         ctx: 'CdmCorpusContext',
         class_name: str,
         message: str,
         method: str,
         default_status_event: Callable,
         corpus_path: str,
         code: 'CdmLogCode',
         ingest_telemetry: Optional[bool] = False) -> None:
    """
    Log to the specified status level by using the status event on the corpus context (if it exists) or to the default logger.
    The log level, class_name, message and path values are also added as part of a new entry to the log recorder.
    """
    if ctx.suppressed_log_codes.__contains__(code):
        return

    #  Write message to the configured logger
    if level >= ctx.report_at_level:
        timestamp = time_utils._get_formatted_date_string(datetime.utcnow())

        # Store a record of the event.
        # Save some dict init and string formatting cycles by checking
        # whether the recording is actually enabled.
        if ctx.events.is_recording:
            event = {
                'timestamp': timestamp,
                'level': level.name,
                'class': class_name,
                'message': message,
                'method': method
            }
            if CdmStatusLevel.ERROR == level or CdmStatusLevel.WARNING == level:
                event['code'] = code.name

            if ctx.correlation_id is not None:
                event['cid'] = ctx.correlation_id

            if corpus_path is not None:
                event['path'] = corpus_path
            ctx.events.append(event)

        formatted_message = _format_message(class_name, message, method,
                                            ctx.correlation_id, corpus_path)

        if ctx and ctx.status_event:
            ctx.status_event(level, formatted_message)
        else:
            default_status_event(formatted_message)

        # Ingest the logs into telemetry database
        if ctx.corpus.telemetry_client:
            ctx.corpus.telemetry_client.add_to_ingestion_queue(
                timestamp, level, class_name, method, corpus_path, message,
                ingest_telemetry, code)
예제 #13
0
    async def test_maximum_timeout_and_retries(self):
        """
        Test retry policy with max timeout set to be a small value
        """
        # Initialize credentials
        corpus = self._initialize_client_with_default_database(
        )  # type: CdmCorpusDefinition

        # Set timeout to 1 millisecond so the function will reach max retries and fail
        cast('TelemetryKustoClient',
             corpus.telemetry_client).timeout_milliseconds = 1

        query = '.ingest inline into table infoLogs<|\n{0},'.format(time_utils._get_formatted_date_string(datetime.utcnow())) \
            + 'class name,method name,some message,None,corpus path,correlation id,api correlation id,app id,property'

        try:
            await cast('TelemetryKustoClient',
                       corpus.telemetry_client).post_kusto_query(query)
        except Exception as ex:
            self.assertTrue('error timed out' in str(ex))
예제 #14
0
 def test_manifest_with_blank_fields(self):
     """Testing for manifest impl instance with blank or empty values for manifest schema, name etc."""
     test_name = 'test_manifest_with_blank_fields'
     corpus = self.get_corpus()
     content = TestHelper.get_input_file_content(self.tests_subpath,
                                                 test_name,
                                                 'blank.manifest.cdm.json')
     cdm_manifest = ManifestPersistence.from_object(
         corpus.ctx, 'cdmTest', 'someNamespace', '/',
         ManifestContent().decode(content))
     self.assertIsNone(cdm_manifest.schema)
     self.assertIsNone(cdm_manifest.document_version)
     self.assertEqual(
         time_utils._get_formatted_date_string(
             cdm_manifest.last_file_modified_time),
         '2008-09-15T23:53:23.000Z')
     self.assertEqual(cdm_manifest.explanation,
                      'test cdm folder for cdm version 1.0+')
     self.assertEqual(1, len(cdm_manifest.imports))
     self.assertEqual(cdm_manifest.imports[0].corpus_path,
                      '/primitives.cdm.json')
     self.assertEqual(0, len(cdm_manifest.entities))
     self.assertEqual(1, len(cdm_manifest.exhibits_traits))
     self.assertEqual(0, len(cdm_manifest.sub_manifests))