def to_data(instance: 'CdmDataPartitionDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'DataPartition': data_partition = DataPartition() data_partition.location = instance.location data_partition.name = instance.name data_partition.specializedSchema = instance.specialized_schema data_partition.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) data_partition.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) data_partition.exhibitsTraits = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) arguments = [] if instance.arguments: for argument_name, argument_list in instance.arguments.items(): for argument_value in argument_list: argument = Argument() argument.name = argument_name argument.value = argument_value arguments.append(argument) if len(arguments) > 0: data_partition.arguments = arguments return data_partition
def to_data(instance: CdmLocalEntityDeclarationDefinition, res_opt: 'ResolveOptions', options: 'CopyOptions'): local_entity = LocalEntityDeclaration() local_entity.entityName = instance.entity_name local_entity.explanation = instance.explanation local_entity.entityPath = instance.entity_path local_entity.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) local_entity.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) local_entity.lastChildFileModifiedTime = time_utils._get_formatted_date_string( instance.last_child_file_modified_time) local_entity.exhibitsTraits = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) local_entity.dataPartitions = copy_data_utils._array_copy_data( res_opt, instance.data_partitions, options, LocalEntityDeclarationPersistence.ensure_non_incremental(instance)) local_entity.dataPartitionPatterns = copy_data_utils._array_copy_data( res_opt, instance.data_partition_patterns, options, LocalEntityDeclarationPersistence.ensure_non_incremental(instance)) local_entity.incrementalPartitions = copy_data_utils._array_copy_data( res_opt, instance.incremental_partitions, options, LocalEntityDeclarationPersistence.ensure_incremental(instance)) local_entity.incrementalPartitionPatterns = copy_data_utils._array_copy_data( res_opt, instance.incremental_partition_patterns, options, LocalEntityDeclarationPersistence.ensure_incremental(instance)) return local_entity
async def test_loads_and_sets_times_correctly(self): """Test modified times for manifest and files beneath it""" input_path = TestHelper.get_input_folder_path(self.tests_subpath, 'test_loads_and_sets_times_correctly') time_before_load = datetime.now(timezone.utc) cdm_corpus = self.get_corpus() cdm_corpus.storage.mount('someNamespace', LocalAdapter(input_path)) cdm_corpus.storage.mount('local', LocalAdapter(input_path)) cdm_corpus.storage.unmount('cdm') cdm_corpus.storage.default_namespace = 'local' cdm_manifest = await cdm_corpus.fetch_object_async('someNamespace:/default.manifest.cdm.json') status_time_at_load = cdm_manifest.last_file_status_check_time # hard coded because the time comes from inside the file self.assertEqual(time_utils._get_formatted_date_string(status_time_at_load), '2019-02-01T15:36:19.410Z') self.assertIsNotNone(cdm_manifest._file_system_modified_time) self.assertGreater(time_before_load, cdm_manifest._file_system_modified_time) time.sleep(1) await cdm_manifest.file_status_check_async() self.assertGreater(cdm_manifest.last_file_status_check_time, time_before_load) self.assertGreater(cdm_manifest.last_file_status_check_time, status_time_at_load) self.assertEqual(1, len(cdm_manifest.sub_manifests)) self.assertGreater(cdm_manifest.sub_manifests[0].last_file_status_check_time, time_before_load) self.assertEqual(1, len(cdm_manifest.entities)) self.assertEqual(1, len(cdm_manifest.entities[0].data_partitions)) entity = cdm_manifest.entities[0] sub_manifest = cdm_manifest.sub_manifests[0] max_time = time_utils._max_time(entity.last_file_modified_time, sub_manifest.last_file_modified_time) self.assertEqual(time_utils._get_formatted_date_string(cdm_manifest.last_child_file_modified_time), time_utils._get_formatted_date_string(max_time))
def to_data(instance: CdmManifestDefinition, res_opt: ResolveOptions, options: CopyOptions) -> ManifestContent: manifest = ManifestContent() manifest.manifestName = instance.manifest_name manifest.schema = instance.schema manifest.jsonSchemaSemanticVersion = instance.json_schema_semantic_version manifest.documentVersion = instance.document_version manifest.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) manifest.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) manifest.lastChildFileModifiedTime = time_utils._get_formatted_date_string( instance.last_child_file_modified_time) manifest.explanation = instance.explanation manifest.exhibitsTraits = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) manifest.entities = copy_data_utils._array_copy_data( res_opt, instance.entities, options) manifest.subManifests = copy_data_utils._array_copy_data( res_opt, instance.sub_manifests, options) manifest.imports = copy_data_utils._array_copy_data( res_opt, instance.imports, options) manifest.relationships = copy_data_utils._array_copy_data( res_opt, instance.relationships, options) return manifest
def create_database_propertybags(instance: CdmManifestDefinition, res_opt: ResolveOptions, options: CopyOptions): properties = {} last_file_status_check_time = time_utils._get_formatted_date_string(instance.last_file_status_check_time) last_file_modified_time = time_utils._get_formatted_date_string(instance.last_file_modified_time) last_child_file_modified_time = time_utils._get_formatted_date_string(instance.last_child_file_modified_time) if last_file_status_check_time is not None: properties["cdm:lastFileStatusCheckTime"] = last_file_status_check_time if last_file_modified_time is not None: properties["cdm:lastFileModifiedTime"] = last_file_modified_time if last_child_file_modified_time is not None: properties["cdm:lastChildFileModifiedTime"] = last_child_file_modified_time if instance.schema is not None: properties["cdm:schema"] = instance.schema if instance.document_version is not None: properties["cdm:documentVersion"] = instance.document_version if instance.json_schema_semantic_version is not None: properties["cdm:jsonSchemaSemanticVersion"] = instance.json_schema_semantic_version if instance.imports is not None and len(instance.imports) > 0: properties["cdm:imports"] = copy_data_utils._array_copy_data(res_opt, instance.imports, options) if instance.exhibits_traits is not None and len(instance.exhibits_traits) > 0: properties["cdm:traits"] = copy_data_utils._array_copy_data(res_opt, instance.exhibits_traits, options) return properties
def to_data(instance: CdmManifestDeclarationDefinition, res_opt: ResolveOptions, options: CopyOptions) -> ManifestDeclaration: data = ManifestDeclaration() data.manifestName = instance.manifest_name data.definition = instance.definition data.explanation = instance.explanation data.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) data.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) return data
def test_load_folder_with_no_entity_folders(self): """Testing for manifest impl instance with no entities and no sub manifests.""" test_name = 'test_load_folder_with_no_entity_folders' corpus = self.get_corpus() content = TestHelper.get_input_file_content(self.tests_subpath, test_name, 'empty.manifest.cdm.json') cdm_manifest = ManifestPersistence.from_object( corpus.ctx, 'cdmTest', 'someNamespace', '/', ManifestContent().decode(content)) self.assertEqual(cdm_manifest.schema, 'CdmManifestDefinition.cdm.json') self.assertEqual(cdm_manifest.manifest_name, 'cdmTest') self.assertEqual(cdm_manifest.json_schema_semantic_version, '1.0.0') self.assertEqual( time_utils._get_formatted_date_string( cdm_manifest.last_file_modified_time), '2008-09-15T23:53:23.000Z') self.assertEqual(cdm_manifest.explanation, 'test cdm folder for cdm version 1.0+') self.assertEqual(1, len(cdm_manifest.imports)) self.assertEqual(cdm_manifest.imports[0].corpus_path, '/primitives.cdm.json') self.assertEqual(0, len(cdm_manifest.entities)) self.assertEqual(1, len(cdm_manifest.exhibits_traits)) self.assertEqual(0, len(cdm_manifest.sub_manifests))
def _log(level: 'CdmStatusLevel', ctx: 'CdmCorpusContext', tag: str, message: str, path: str, default_status_event: Callable) -> None: """ Log to the specified status level by using the status event on the corpus context (if it exists) or to the default logger. The log level, tag, message and path values are also added as part of a new entry to the log recorder. """ # Write message to the configured logger if level >= ctx.report_at_level: timestamp = time_utils._get_formatted_date_string(datetime.utcnow()) # Store a record of the event. # Save some dict init and string formatting cycles by checking # whether the recording is actually enabled. if ctx.events.is_recording: event = { 'timestamp': timestamp, 'level': level.name, 'tag': tag, 'message': message, 'path': path } if ctx.correlation_id is not None: event['correlationId'] = ctx.correlation_id ctx.events.append(event) formatted_message = _format_message(tag, message, path, ctx.correlation_id) if ctx and ctx.status_event: ctx.status_event(level, formatted_message) else: default_status_event(formatted_message)
def to_data(instance: CdmReferencedEntityDeclarationDefinition, res_opt: ResolveOptions, options: CopyOptions) -> ReferencedEntityDeclaration: data = ReferencedEntityDeclaration() data.entityName = instance.entity_name data.explanation = instance.explanation data.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) data.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) data.entityPath = instance.entity_path data.exhibitsTraits = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) return data
def to_data(instance: 'CdmDataPartitionDefinition', obj: 'StorageDescriptor', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'StorageDescriptor': obj.properties = {} if instance.name is not None: obj.properties['cdm:name'] = instance.name if instance.last_file_status_check_time is not None: obj.properties[ 'cdm:lastFileStatusCheckTime'] = time_utils._get_formatted_date_string( instance.last_file_status_check_time) if instance.last_file_modified_time is not None: obj.properties[ 'cdm:lastFileModifiedTime'] = time_utils._get_formatted_date_string( instance.last_file_modified_time) if instance.exhibits_traits is not None: tpm = TraitToPropertyMap(instance) csv_trait = tpm._fetch_trait_reference('is.partition.format.CSV') if csv_trait is not None: instance.exhibits_traits.remove('is.partition.format.CSV') if len(instance.exhibits_traits) > 0: obj.properties[ 'cdm:traits'] = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) if csv_trait is not None: instance.exhibits_traits.append(csv_trait) properties = DataPartitionPersistence.fill_property_bag_from_csv_trait( instance) if properties is not None: obj.format = FormatInfo( input_format=InputFormat. orgapachehadoopmapred_sequence_file_input_format, output_format=OutputFormat. orgapachehadoophiveqlio_hive_sequence_file_output_format, serialize_lib=SerializeLib. orgapachehadoophiveserde2lazy_lazy_simple_ser_de, format_type=FormatType.csv, properties=properties) else: #error return None return obj
def to_data(instance: CdmDataPartitionPatternDefinition, res_opt: ResolveOptions, options: CopyOptions) -> DataPartitionPattern: data = DataPartitionPattern() data.name = instance.name data.lastFileStatusCheckTime = time_utils._get_formatted_date_string( instance.last_file_status_check_time) data.lastFileModifiedTime = time_utils._get_formatted_date_string( instance.last_file_modified_time) data.explanation = instance.explanation data.rootLocation = instance.root_location data.regularExpression = instance.regular_expression data.parameters = instance.parameters data.specializedSchema = instance.specialized_schema data.exhibitsTraits = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) return data
def _log(level: 'CdmStatusLevel', ctx: 'CdmCorpusContext', class_name: str, message: str, method: str, default_status_event: Callable, corpus_path: str, code: 'CdmLogCode', ingest_telemetry: Optional[bool] = False) -> None: """ Log to the specified status level by using the status event on the corpus context (if it exists) or to the default logger. The log level, class_name, message and path values are also added as part of a new entry to the log recorder. """ if ctx.suppressed_log_codes.__contains__(code): return # Write message to the configured logger if level >= ctx.report_at_level: timestamp = time_utils._get_formatted_date_string(datetime.utcnow()) # Store a record of the event. # Save some dict init and string formatting cycles by checking # whether the recording is actually enabled. if ctx.events.is_recording: event = { 'timestamp': timestamp, 'level': level.name, 'class': class_name, 'message': message, 'method': method } if CdmStatusLevel.ERROR == level or CdmStatusLevel.WARNING == level: event['code'] = code.name if ctx.correlation_id is not None: event['cid'] = ctx.correlation_id if corpus_path is not None: event['path'] = corpus_path ctx.events.append(event) formatted_message = _format_message(class_name, message, method, ctx.correlation_id, corpus_path) if ctx and ctx.status_event: ctx.status_event(level, formatted_message) else: default_status_event(formatted_message) # Ingest the logs into telemetry database if ctx.corpus.telemetry_client: ctx.corpus.telemetry_client.add_to_ingestion_queue( timestamp, level, class_name, method, corpus_path, message, ingest_telemetry, code)
async def test_maximum_timeout_and_retries(self): """ Test retry policy with max timeout set to be a small value """ # Initialize credentials corpus = self._initialize_client_with_default_database( ) # type: CdmCorpusDefinition # Set timeout to 1 millisecond so the function will reach max retries and fail cast('TelemetryKustoClient', corpus.telemetry_client).timeout_milliseconds = 1 query = '.ingest inline into table infoLogs<|\n{0},'.format(time_utils._get_formatted_date_string(datetime.utcnow())) \ + 'class name,method name,some message,None,corpus path,correlation id,api correlation id,app id,property' try: await cast('TelemetryKustoClient', corpus.telemetry_client).post_kusto_query(query) except Exception as ex: self.assertTrue('error timed out' in str(ex))
def test_manifest_with_blank_fields(self): """Testing for manifest impl instance with blank or empty values for manifest schema, name etc.""" test_name = 'test_manifest_with_blank_fields' corpus = self.get_corpus() content = TestHelper.get_input_file_content(self.tests_subpath, test_name, 'blank.manifest.cdm.json') cdm_manifest = ManifestPersistence.from_object( corpus.ctx, 'cdmTest', 'someNamespace', '/', ManifestContent().decode(content)) self.assertIsNone(cdm_manifest.schema) self.assertIsNone(cdm_manifest.document_version) self.assertEqual( time_utils._get_formatted_date_string( cdm_manifest.last_file_modified_time), '2008-09-15T23:53:23.000Z') self.assertEqual(cdm_manifest.explanation, 'test cdm folder for cdm version 1.0+') self.assertEqual(1, len(cdm_manifest.imports)) self.assertEqual(cdm_manifest.imports[0].corpus_path, '/primitives.cdm.json') self.assertEqual(0, len(cdm_manifest.entities)) self.assertEqual(1, len(cdm_manifest.exhibits_traits)) self.assertEqual(0, len(cdm_manifest.sub_manifests))