def _create_new_projection_attribute_state_set( proj_ctx: 'ProjectionContext', proj_output_set: 'ProjectionAttributeStateSet', new_res_attr_FK: 'ResolvedAttribute', ref_attr_name: str ) -> 'ProjectionAttributeStateSet': pas_list = ProjectionResolutionCommonUtil._get_leaf_list(proj_ctx, ref_attr_name) source_entity = proj_ctx._projection_directive._original_source_entity_attribute_name if not source_entity: logger.warning(proj_output_set._ctx, CdmOperationReplaceAsForeignKey.__name__, \ CdmOperationReplaceAsForeignKey._create_new_projection_attribute_state_set.__name__, None, CdmLogCode.WARN_PROJ_FK_WITHOUT_SOURCE_ENTITY, ref_attr_name) if pas_list is not None: # update the new foreign key resolved attribute with trait param with reference details reqd_trait = new_res_attr_FK.resolved_traits.find(proj_ctx._projection_directive._res_opt, 'is.linkedEntity.identifier') if reqd_trait and source_entity: trait_param_ent_ref = ProjectionResolutionCommonUtil._create_foreign_key_linked_entity_identifier_trait_parameter(proj_ctx._projection_directive, proj_output_set._ctx.corpus, pas_list) reqd_trait.parameter_values.update_parameter_value(proj_ctx._projection_directive._res_opt, 'entityReferences', trait_param_ent_ref) # Create new output projection attribute state set for FK and add prevPas as previous state set new_proj_attr_state_FK = ProjectionAttributeState(proj_output_set._ctx) new_proj_attr_state_FK._current_resolved_attribute = new_res_attr_FK new_proj_attr_state_FK._previous_state_list = pas_list proj_output_set._add(new_proj_attr_state_FK) else: # Log error & return proj_output_set without any change logger.error(proj_output_set._ctx, CdmOperationReplaceAsForeignKey.__name__, CdmOperationReplaceAsForeignKey._create_new_projection_attribute_state_set.__name__, None, CdmLogCode.ERR_PROJ_REF_ATTR_STATE_FAILURE, ref_attr_name) return proj_output_set
def _add_new_artifact_attribute_state(self, proj_ctx: 'ProjectionContext', proj_output_set: 'ProjectionAttributeStateSet', attr_ctx: 'CdmAttributeContext') -> None: # Create a new attribute context for the operation attr_ctx_op_add_artifact_attr_param = AttributeContextParameters() attr_ctx_op_add_artifact_attr_param._under = attr_ctx attr_ctx_op_add_artifact_attr_param._type = CdmAttributeContextType.OPERATION_ADD_ARTIFACT_ATTRIBUTE attr_ctx_op_add_artifact_attr_param._name = 'operation/index{}/{}'.format(self._index, self.get_name()) attr_ctx_op_add_artifact_attr = CdmAttributeContext._create_child_under(proj_ctx._projection_directive._res_opt, attr_ctx_op_add_artifact_attr_param) from cdm.objectmodel import CdmTypeAttributeDefinition, CdmEntityAttributeDefinition, CdmAttributeGroupReference if isinstance(self.new_attribute, CdmTypeAttributeDefinition): # Create a new attribute context for the new artifact attribute we will create attr_ctx_new_attr_param = AttributeContextParameters() attr_ctx_new_attr_param._under = attr_ctx_op_add_artifact_attr attr_ctx_new_attr_param._type = CdmAttributeContextType.ADDED_ATTRIBUTE_NEW_ARTIFACT attr_ctx_new_attr_param._name = self.new_attribute.fetch_object_definition_name() attr_ctx_new_attr = CdmAttributeContext._create_child_under(proj_ctx._projection_directive._res_opt, attr_ctx_new_attr_param) new_res_attr = self._create_new_resolved_attribute(proj_ctx, attr_ctx_new_attr, self.new_attribute) # Create a new projection attribute state for the new artifact attribute and add it to the output set # There is no previous state for the newly created attribute new_PAS = ProjectionAttributeState(self.ctx) new_PAS._current_resolved_attribute = new_res_attr proj_output_set._add(new_PAS) elif isinstance(self.new_attribute, CdmEntityAttributeDefinition) or isinstance(self.new_attribute, CdmAttributeGroupReference): type_str = 'an entity attribute' if isinstance(self.new_attribute, CdmEntityAttributeDefinition) else 'an attribute group' logger.warning(self.ctx, self._TAG, CdmOperationAddArtifactAttribute._append_projection_attribute_state.__name__, self.at_corpus_path, CdmLogCode.WARN_PROJ_ADD_ARTIFACT_ATTR_NOT_SUPPORTED, type_str) else: logger.error(self.ctx, self._TAG, CdmOperationAddArtifactAttribute._append_projection_attribute_state.__name__, self.at_corpus_path, CdmLogCode.ERR_PROJ_UNSUPPORTED_SOURCE, str(self.new_attribute.object_type), self.get_name()) return proj_output_set
async def to_data(instance: 'CdmDataPartitionDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['Partition']: result = Partition() result.name = instance.name result.description = instance.description result.location = instance.ctx.corpus.storage.corpus_path_to_adapter_path( instance.ctx.corpus.storage.create_absolute_corpus_path(instance.location, instance.in_document)) result.refreshTime = instance.refresh_time result.lastFileModifiedTime = utils.get_formatted_date_string(instance.last_file_modified_time) result.lastFileStatusCheckTime = utils.get_formatted_date_string(instance.last_file_status_check_time) if not result.location: logger.warning(DataPartitionPersistence.__name__, instance.ctx, 'Couldn\'t find data partition\'s location for partition {}.'.format(result.name), DataPartitionPersistence.to_data.__name__) # filter description since it is mapped to a property exhibits_traits = filter(lambda t: t.named_reference != 'is.localized.describedAs', instance.exhibits_traits) await utils.process_annotations_to_data(instance.ctx, result, exhibits_traits) t2pm = TraitToPropertyMap(instance) is_hidden_trait = t2pm.fetch_trait_reference('is.hidden') result.isHidden = bool(is_hidden_trait) or None csv_trait = t2pm.fetch_trait_reference('is.partition.format.CSV') if csv_trait: csv_format_settings = utils.create_csv_format_settings(csv_trait) if csv_format_settings: result.fileFormatSettings = csv_format_settings result.fileFormatSettings.type = 'CsvFormatSettings' else: logger.error(DataPartitionPersistence.__name__, instance.ctx, 'There was a problem while processing csv format trait inside data partition.') return return result
def _construct_resolved_attributes( self, res_opt: 'ResolveOptions', under: Optional['CdmAttributeContext'] ) -> 'ResolvedAttributeSetBuilder': # find and cache the complete set of attributes from cdm.resolvedmodel import ResolvedAttributeSetBuilder from cdm.utilities import AttributeContextParameters rasb = ResolvedAttributeSetBuilder() rasb.ras.attribute_context = under definition = self.fetch_object_definition(res_opt) if definition: acp_ref = None if under: # ask for a 'pass through' context, that is, no new context at this level acp_ref = AttributeContextParameters( under=under, type=CdmAttributeContextType.PASS_THROUGH) res_atts = definition._fetch_resolved_attributes(res_opt, acp_ref) if res_atts and res_atts._set: res_atts = res_atts.copy() rasb.merge_attributes(res_atts) rasb.remove_requested_atts() else: def_name = self.fetch_object_definition_name() logger.warning( self._TAG, self.ctx, 'unable to resolve an object from the reference \'{}\''.format( def_name)) return rasb
def process_traits_and_annotations_to_data(ctx: 'CdmCorpusContext', entity_object: 'MetadataObject', traits: 'CdmTraitCollection'): if traits is None: return annotations = [] extensions = [] for trait in traits: if trait.named_reference.startswith('is.extension.'): extension_helper.process_extension_trait_to_object( trait, entity_object) continue if trait.named_reference == 'is.modelConversion.otherAnnotations': for annotation in trait.arguments[0].value: if isinstance(annotation, dict) and annotation.get('name'): annotations.append(annotation) else: logger.warning(_TAG, ctx, 'Unsupported annotation type.') elif trait.named_reference not in ignored_traits and not trait.named_reference.startswith( 'is.dataFormat') and not (trait.named_reference in model_json_property_traits and trait.is_from_property): extension = TraitReferencePersistence.to_data(trait, None, None) extensions.append(extension) if annotations: entity_object.annotations = annotations if extensions: entity_object.traits = extensions
async def from_data( ctx: 'CdmCorpusContext', data_obj: 'ReferenceEntity', location: str) -> 'CdmReferencedEntityDeclarationDefinition': referenced_entity = ctx.corpus.make_object( CdmObjectType.REFERENCED_ENTITY_DECLARATION_DEF, data_obj.name) referenced_entity.entity_name = data_obj.name corpus_path = ctx.corpus.storage.adapter_path_to_corpus_path(location) if corpus_path is None: logger.error( ctx, _TAG, "from_data", None, CdmLogCode.ERR_PERSIST_MODEL_JSON_REF_ENTITY_INVALID_LOCATION, location, referenced_entity.entity_name) return None referenced_entity.entity_path = '{}/{}'.format(corpus_path, data_obj.source) referenced_entity.explanation = data_obj.get('description') if data_obj.get('lastFileStatusCheckTime'): referenced_entity.last_file_status_check_time = dateutil.parser.parse( data_obj.get('lastFileStatusCheckTime')) if data_obj.get('lastFileModifiedTime'): referenced_entity.last_file_modified_time = dateutil.parser.parse( data_obj.get('lastFileModifiedTime')) await utils.process_annotations_from_data( ctx, data_obj, referenced_entity.exhibits_traits) if data_obj.get('isHidden'): is_hidden_trait = ctx.corpus.make_ref(CdmObjectType.TRAIT_REF, 'is.hidden', True) is_hidden_trait.is_from_property = True referenced_entity.exhibits_traits.append(is_hidden_trait) properties_trait = ctx.corpus.make_ref( CdmObjectType.TRAIT_REF, 'is.propertyContent.multiTrait', False) properties_trait.is_from_property = True argument = ctx.corpus.make_object(CdmObjectType.ARGUMENT_DEF, 'modelId') argument.value = data_obj.modelId properties_trait.arguments.append(argument) referenced_entity.exhibits_traits.append(properties_trait) extension_trait_def_list = [] # type: List[CdmTraitDefinition] extension_traits = CdmTraitCollection(ctx, referenced_entity) extension_helper.process_extension_from_json(ctx, data_obj, extension_traits, extension_trait_def_list) if extension_trait_def_list: logger.warning( ctx, _TAG, ReferencedEntityDeclarationPersistence.from_data.__name__, None, CdmLogCode.WARN_PERSIST_CUSTOM_EXT_NOT_SUPPORTED) return referenced_entity
def _compare_json_semantic_version(ctx: 'CdmCorpusContext', document_semantic_version: str) -> int: """Compares the document version with the json semantic version supported. 1 => if document_semantic_version > json_semantic_version 0 => if document_semantic_version == json_semantic_version or if document_semantic_version is invalid -1 => if document_semantic_version < json_semantic_version""" curr_semantic_version_split = [int(x) for x in DocumentPersistence.json_semantic_version.split('.')] error_message = 'jsonSemanticVersion must be set using the format <major>.<minor>.<patch>.' try: doc_semantic_version_split = [int(x) for x in document_semantic_version.split('.')] except ValueError: logger.warning(ctx, _TAG, DocumentPersistence._compare_json_semantic_version.__name__, None, CdmLogCode.WARN_PERSIST_JSON_SEM_VER_INVALID_FORMAT) return 0 if len(doc_semantic_version_split) != 3: logger.warning(ctx, _TAG, DocumentPersistence._compare_json_semantic_version.__name__, None, CdmLogCode.WARN_PERSIST_JSON_SEM_VER_INVALID_FORMAT) return 0 for i in range(3): if doc_semantic_version_split[i] != curr_semantic_version_split[i]: return -1 if doc_semantic_version_split[i] < curr_semantic_version_split[i] else 1 return 0
def _construct_resolved_attributes( self, res_opt: 'ResolveOptions', under: Optional['CdmAttributeContext'] ) -> 'ResolvedAttributeSetBuilder': # find and cache the complete set of attributes from cdm.resolvedmodel import ResolvedAttributeSetBuilder from cdm.utilities import AttributeContextParameters from cdm.objectmodel import CdmEntityDefinition rasb = ResolvedAttributeSetBuilder() rasb._resolved_attribute_set.attribute_context = under definition = self.fetch_object_definition(res_opt) if definition: acp_ref = None if under: # ask for a 'pass through' context, that is, no new context at this level acp_ref = AttributeContextParameters( under=under, type=CdmAttributeContextType.PASS_THROUGH) res_atts = definition._fetch_resolved_attributes(res_opt, acp_ref) if res_atts and res_atts._set: # res_atts = res_atts.copy() should not need this copy now that we copy from the cache. lets try! rasb.merge_attributes(res_atts) rasb.remove_requested_atts() else: logger.warning( self.ctx, self._TAG, CdmObjectReference._construct_resolved_traits.__name__, self.at_corpus_path, CdmLogCode.WARN_RESOLVE_OBJECT_FAILED, self.fetch_object_definition_name()) return rasb
def _compare_json_semantic_version(ctx: 'CdmCorpusContext', document_semantic_version: str) -> int: """Compares the document version with the json semantic version supported. 1 => if document_semantic_version > json_semantic_version 0 => if document_semantic_version == json_semantic_version or if document_semantic_version is invalid -1 => if document_semantic_version < json_semantic_version""" curr_semantic_version_split = [ int(x) for x in DocumentPersistence.json_semantic_version.split('.') ] error_message = 'jsonSemanticVersion must be set using the format <major>.<minor>.<patch>.' try: doc_semantic_version_split = [ int(x) for x in document_semantic_version.split('.') ] except ValueError: logger.warning('DocumentPersistence', ctx, error_message, '_compare_json_semantic_version') return 0 if len(doc_semantic_version_split) != 3: logger.warning('DocumentPersistence', ctx, error_message, '_compare_json_semantic_version') return 0 for i in range(3): if doc_semantic_version_split[i] != curr_semantic_version_split[i]: return -1 if doc_semantic_version_split[ i] < curr_semantic_version_split[i] else 1 return 0
async def to_data(document_object_or_path: Union[CdmDocumentDefinition, str], manifest: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions', ctx: 'CdmCorpusContext') -> Optional['LocalEntity']: if isinstance(document_object_or_path, str): # Fetch the document from entity schema. cdm_entity = await ctx.corpus.fetch_object_async(document_object_or_path, manifest) if not cdm_entity: logger.error(DocumentPersistence.__name__, ctx, 'There was an error while trying to fetch cdm entity doc.') return None entity = await EntityPersistence.to_data(cdm_entity, res_opt, options, ctx) if cdm_entity.owner and cdm_entity.owner.object_type == CdmObjectType.DOCUMENT_DEF: document = cdm_entity.owner # type: CdmDocumentDefinition for element in document.imports: imp = CdmImportPersistence.to_data(element, res_opt, options) # the corpus path in the imports are relative to the document where it was defined. # when saving in model.json the documents are flattened to the manifest level # so it is necessary to recalculate the path to be relative to the manifest. absolute_path = ctx.corpus.storage.create_absolute_corpus_path(imp.corpusPath, document) if document.namespace and absolute_path.startswith(document.namespace + ':'): absolute_path = absolute_path[len(document.namespace) + 1:] imp.corpusPath = ctx.corpus.storage.create_relative_corpus_path(absolute_path, manifest) entity.imports.append(imp) else: logger.warning(DocumentPersistence.__name__, ctx, 'Entity {} is not inside a document or its owner is not a document.'.format( cdm_entity.get_name())) return entity else: # TODO: Do something else when document_object_or_path is an object. pass
async def _fetch_document_from_folder_path_async(self, document_path: str, adapter: 'StorageAdapterBase', force_reload: bool, res_opt: Optional['ResolveOptions'] = None) -> 'CdmDocumentDefinition': """Gets the document from folder path. arguments: path: The path. adapter: The storage adapter where the document can be found.""" doc_name = None first = document_path.find('/') if first < 0: doc_name = document_path else: doc_name = document_path[0: first] # got that doc? doc = None # type: Optional[CdmDocumentDefinition] if doc_name in self._document_lookup: doc = self._document_lookup[doc_name] if not force_reload: return doc # remove them from the caches since they will be back in a moment if doc._is_dirty: logger.warning(self._TAG, self.ctx, 'discarding changes in document: {}'.format(doc.name)) self.documents.remove(doc_name) # go get the doc doc = await self._corpus.persistence._load_document_from_path_async(self, doc_name, doc, res_opt) return doc
def from_object(ctx: CdmCorpusContext, name: str, namespace: str, path: str, data: 'DocumentContent') -> 'CdmDocumentDefinition': document = ctx.corpus.make_object(CdmObjectType.DOCUMENT_DEF, name) document._folder_path = path document._namespace = namespace if data: if data.get('schema'): document.schema = data.schema # support old model syntax if data.get('schemaVersion'): document.json_schema_semantic_version = data.schemaVersion if data.get('documentVersion'): document.document_version = data.documentVersion if data.get('imports'): for import_obj in data.imports: document.imports.append(ImportPersistence.from_data(ctx, import_obj)) if data.get('definitions') and isinstance(data.definitions, List): for definition in data.definitions: if definition.get('dataTypeName'): document.definitions.append(DataTypePersistence.from_data(ctx, definition)) elif definition.get('purposeName'): document.definitions.append(PurposePersistence.from_data(ctx, definition)) elif definition.get('attributeGroupName'): document.definitions.append(AttributeGroupPersistence.from_data(ctx, definition)) elif definition.get('traitName'): document.definitions.append(TraitPersistence.from_data(ctx, definition)) elif definition.get('traitGroupName'): document.definitions.append(TraitGroupPersistence.from_data(ctx, definition)) elif definition.get('entityShape'): document.definitions.append(ConstantEntityPersistence.from_data(ctx, definition)) elif definition.get('entityName'): document.definitions.append(EntityPersistence.from_data(ctx, definition)) is_resolved_doc = False if len(document.definitions) == 1 and isinstance(document.definitions[0], CdmEntityDefinition): entity = document.definitions[0] # type: CdmEntityDefinition resolved_trait = entity.exhibits_traits.item('has.entitySchemaAbstractionLevel') # Tries to figure out if the document is in resolved form by looking for the schema abstraction trait # or the presence of the attribute context. is_resolved_doc = resolved_trait and resolved_trait.arguments[0].value == 'resolved' is_resolved_doc = is_resolved_doc or entity.attribute_context if data.jsonSchemaSemanticVersion: document.json_schema_semantic_version = data.jsonSchemaSemanticVersion if DocumentPersistence._compare_json_semantic_version(ctx, document.json_schema_semantic_version) > 0: if is_resolved_doc: logger.warning(ctx, _TAG, DocumentPersistence.from_data.__name__, None, CdmLogCode.WARN_PERSIST_UNSUPPORTED_JSON_SEM_VER, DocumentPersistence.json_semantic_version, document.json_schema_semantic_version) else: logger.error(ctx, _TAG, DocumentPersistence.from_data.__name__, None, CdmLogCode.ERR_PERSIST_UNSUPPORTED_JSON_SEM_VER, DocumentPersistence.json_semantic_version, document.json_schema_semantic_version) else: logger.warning(ctx, _TAG, DocumentPersistence.from_data.__name__, document.at_corpus_path, CdmLogCode.WARN_PERSIST_JSON_SEM_VER_MANDATORY) return document
async def from_data(ctx: 'CdmCorpusContext', data: 'Partition', extension_trait_def_list: List['CdmTraitDefinition'], local_extension_trait_def_list: List['CdmTraitDefinition'], document_folder: 'CdmFolderDefinition') \ -> Optional['CdmDataPartitionDefinition']: data_partition = ctx.corpus.make_object( CdmObjectType.DATA_PARTITION_DEF, data.name if data.get('name') else None) if data.get('description') and not data.get('description').isspace(): data_partition.description = data.get('description') data_partition.location = ctx.corpus.storage.create_relative_corpus_path( ctx.corpus.storage.adapter_path_to_corpus_path(data.location), document_folder) if not data_partition.location: logger.warning( DataPartitionPersistence.__name__, ctx, 'Couldn\'t find data partition\'s location for partition {}.'. format(data_partition.name), DataPartitionPersistence.from_data.__name__) if data.get('refreshTime'): data_partition.refresh_time = data.refreshTime if data.get('lastFileModifiedTime'): data_partition.last_file_modified_time = dateutil.parser.parse( data.get('lastFileModifiedTime')) if data.get('lastFileStatusCheckTime'): data_partition.last_file_status_check_time = dateutil.parser.parse( data.get('lastFileStatusCheckTime')) if data.get('isHidden'): is_hidden_trait = ctx.corpus.make_ref(CdmObjectType.TRAIT_REF, 'is.hidden', True) is_hidden_trait.is_from_property = True data_partition.exhibits_traits.append(is_hidden_trait) await utils.process_annotations_from_data( ctx, data, data_partition.exhibits_traits) file_format_settings = data.get('fileFormatSettings') if file_format_settings and file_format_settings.type == 'CsvFormatSettings': csv_format_trait = utils.create_csv_trait(file_format_settings, ctx) if csv_format_trait: data_partition.exhibits_traits.append(csv_format_trait) else: logger.error( DataPartitionPersistence.__name__, ctx, 'There was a problem while processing csv format settings inside data partition.' ) return extension_helper.process_extension_from_json( ctx, data, data_partition.exhibits_traits, extension_trait_def_list, local_extension_trait_def_list) return data_partition
def add_to_ingestion_queue(self, timestamp: str, level: 'CdmStatusLevel', class_name: str, method: str, corpus_path: str, message: str, require_ingestion: bool, code: 'CdmLogCode') -> None: """ Enqueue the request queue with the information to be logged. :param timestamp: The log timestamp. :param level: Logging status level. :param class_name: Usually the class that is calling the method. :param method: Usually denotes method calling this method. :param corpus_path: Usually denotes corpus path of document. :param message: Informational message. :param require_ingestion: Whether the log needs to be ingested. :param code: Error or warning code. """ # Check if the Kusto config and the concurrent queue has been initialized if self._config is None or self._request_queue is None: return # Not ingest logs from telemetry client to avoid cycling if class_name == TelemetryKustoClient.__name__: return # If ingestion is not required and the level is Progress if level == CdmStatusLevel.PROGRESS and not require_ingestion: if method in self.LOG_EXEC_TIME_METHODS: # Check if the log contains execution time info exec_time_message = 'Leaving scope. Time elapsed:' # Skip if the log is not for execution time if not message.startswith(exec_time_message): return # Skip if the method execution time doesn't need to be logged else: return # Configured in case no user-created content can be ingested into Kusto due to compliance issue # Note: The remove_user_content property could be deleted in the if the compliance issue gets resolved if self._config.remove_user_content: corpus_path = None if level == CdmStatusLevel.WARNING or level == CdmStatusLevel.ERROR: message = None log_entry = self._process_log_entry( timestamp, class_name, method, message, code, corpus_path, self._ctx.correlation_id, self._ctx.events.api_correlation_id, self._ctx.corpus.app_id) # Add the status level and log entry to the queue to be ingested try: self._request_queue.put_nowait((level, log_entry)) except Full: logger.warning(self._ctx, TelemetryKustoClient.__name__, self.add_to_ingestion_queue.__name__, None, CdmLogCode.WARN_TELEMETRY_INGESTION_FAILED, 'The request queue is full.')
def from_data(ctx: CdmCorpusContext, data: TypeAttribute, entity_name: Optional[str] = None) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object(CdmObjectType.TYPE_ATTRIBUTE_DEF, data.get('name')) type_attribute.purpose = PurposeReferencePersistence.from_data(ctx, data.get('purpose')) type_attribute.data_type = DataTypeReferencePersistence.from_data(ctx, data.get('dataType')) type_attribute.attribute_context = AttributeContextReferencePersistence.from_data(ctx, data.get('attributeContext')) type_attribute.resolution_guidance = AttributeResolutionGuidancePersistence.from_data(ctx, data.get('resolutionGuidance')) applied_traits = utils.create_trait_reference_array(ctx, data.get('appliedTraits')) type_attribute.applied_traits.extend(applied_traits) if data.get('isPrimaryKey') and entity_name: t2p_map = TraitToPropertyMap(type_attribute) t2p_map._update_property_value('isPrimaryKey', entity_name + '/(resolvedAttributes)/' + type_attribute.name) type_attribute.explanation = data.get('explanation') if data.get('isReadOnly') is not None: type_attribute.is_read_only = TypeAttributePersistence._property_from_data_to_bool(data.isReadOnly) if data.get('isNullable') is not None: type_attribute.is_nullable = TypeAttributePersistence._property_from_data_to_bool(data.isNullable) if data.get('sourceName'): type_attribute.source_name = TypeAttributePersistence._property_from_data_to_string(data.sourceName) if data.get('sourceOrdering') is not None: type_attribute.source_ordering = TypeAttributePersistence._property_from_data_to_int(data.sourceOrdering) if data.get('displayName'): type_attribute.display_name = TypeAttributePersistence._property_from_data_to_string(data.displayName) if data.get('description'): type_attribute.description = TypeAttributePersistence._property_from_data_to_string(data.description) if data.get('valueConstrainedToList') is not None: type_attribute.value_constrained_to_list = TypeAttributePersistence._property_from_data_to_bool(data.valueConstrainedToList) if data.get('maximumLength') is not None: type_attribute.maximum_length = TypeAttributePersistence._property_from_data_to_int(data.maximumLength) if data.get('maximumValue') is not None: type_attribute.maximum_value = TypeAttributePersistence._property_from_data_to_string(data.maximumValue) if data.get('minimumValue') is not None: type_attribute.minimum_value = TypeAttributePersistence._property_from_data_to_string(data.minimumValue) if data.get('dataFormat') is not None: try: type_attribute.data_format = CdmDataFormat(data.dataFormat) except ValueError: logger.warning(TypeAttributePersistence.__name__, ctx, 'Couldn\'t find an enum value for {}.'.format( data.dataFormat), TypeAttributePersistence.from_data.__name__) if data.get('defaultValue') is not None: type_attribute.default_value = data.defaultValue return type_attribute
async def to_data(document_object_or_path: Union[CdmDocumentDefinition, str], manifest: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions', ctx: 'CdmCorpusContext') -> Optional['LocalEntity']: if isinstance(document_object_or_path, str): # Fetch the document from entity schema. cdm_entity = await ctx.corpus.fetch_object_async( document_object_or_path, manifest) if not isinstance(cdm_entity, CdmEntityDefinition): logger.error(ctx, DocumentPersistence.__name__, DocumentPersistence.to_data.__name__, manifest.at_corpus_path, CdmLogCode.ERR_INVALID_CAST, document_object_or_path, 'CdmEntityDefinition') return None if not cdm_entity: logger.error(ctx, DocumentPersistence.__name__, DocumentPersistence.to_data.__name__, manifest.at_corpus_path, CdmLogCode.ERR_PERSIST_CDM_ENTITY_FETCH_ERROR) return None entity = await EntityPersistence.to_data(cdm_entity, res_opt, options, ctx) if cdm_entity.owner and cdm_entity.owner.object_type == CdmObjectType.DOCUMENT_DEF: document = cdm_entity.owner # type: CdmDocumentDefinition entity.imports = [] for element in document.imports: imp = CdmImportPersistence.to_data(element, res_opt, options) # the corpus path in the imports are relative to the document where it was defined. # when saving in model.json the documents are flattened to the manifest level # so it is necessary to recalculate the path to be relative to the manifest. absolute_path = ctx.corpus.storage.create_absolute_corpus_path( imp.corpusPath, document) if not StringUtils.is_blank_by_cdm_standard( document._namespace) and absolute_path.startswith( document._namespace + ':'): absolute_path = absolute_path[len(document._namespace ) + 1:] imp.corpusPath = ctx.corpus.storage.create_relative_corpus_path( absolute_path, manifest) entity.imports.append(imp) else: logger.warning(ctx, _TAG, DocumentPersistence.to_data.__name__, manifest.at_corpus_path, CdmLogCode.WARN_PERSIST_ENTITY_MISSING, cdm_entity.get_name()) return entity else: # TODO: Do something else when document_object_or_path is an object. pass
def create_absolute_corpus_path(self, object_path: str, obj: 'CdmObject' = None) -> Optional[str]: """Takes a corpus path (relative or absolute) and creates a valid absolute path with namespace""" if not object_path: logger.error(self._TAG, self._ctx, 'The namespace cannot be null or empty.', StorageManager.create_absolute_corpus_path.__name__) return None if self._contains_unsupported_path_format(object_path): # Already called status_rpt when checking for unsupported path format. return None path_tuple = StorageUtils.split_namespace_path(object_path) if not path_tuple: logger.error(self._TAG, self._ctx, 'The object path cannot be null or empty.', self.create_absolute_corpus_path.__name__) return None final_namespace = '' prefix = None namespace_from_obj = None if obj and hasattr(obj, 'namespace') and hasattr(obj, 'folder_path'): prefix = obj.folder_path namespace_from_obj = obj.namespace elif obj: prefix = obj.in_document.folder_path namespace_from_obj = obj.in_document.namespace if prefix and self._contains_unsupported_path_format(prefix): # Already called status_rpt when checking for unsupported path format. return None if prefix and prefix[-1] != '/': logger.warning(self._TAG, self._ctx, 'Expected path prefix to end in /, but it didn\'t. Appended the /', prefix) prefix += '/' # check if this is a relative path if path_tuple[1][0] != '/': if not obj: # relative path and no other info given, assume default and root prefix = '/' if path_tuple[0] and path_tuple[0] != namespace_from_obj: logger.warning(self._TAG, self._ctx, 'The namespace "{}" found on the path does not match the namespace found on the object'.format(path_tuple[0])) return None path_tuple = (path_tuple[0], prefix + path_tuple[1]) final_namespace = namespace_from_obj or path_tuple[0] or self.default_namespace else: final_namespace = path_tuple[0] or namespace_from_obj or self.default_namespace return '{}:{}'.format(final_namespace, path_tuple[1]) if final_namespace else path_tuple[1]
def from_data(ctx: CdmCorpusContext, data: TypeAttribute) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object(CdmObjectType.TYPE_ATTRIBUTE_DEF, data.name) type_attribute.purpose = PurposeReferencePersistence.from_data(ctx, data.get('purpose')) type_attribute.data_type = DataTypeReferencePersistence.from_data(ctx, data.get('dataType')) type_attribute.attribute_context = AttributeContextReferencePersistence.from_data(ctx, data.get('attributeContext')) type_attribute.resolution_guidance = AttributeResolutionGuidancePersistence.from_data(ctx, data.get('resolutionGuidance')) applied_traits = utils.create_trait_reference_array(ctx, data.get('appliedTraits')) type_attribute.applied_traits.extend(applied_traits) type_attribute.explanation = data.get('explanation') if data.get('isReadOnly'): type_attribute.is_read_only = data.isReadOnly if data.get('isNullable'): type_attribute.is_nullable = data.isNullable if data.get('sourceName'): type_attribute.source_name = data.sourceName if data.get('sourceOrdering'): type_attribute.source_ordering = data.sourceOrdering if data.get('displayName'): type_attribute.display_name = data.displayName if data.get('description'): type_attribute.description = data.description if data.get('valueConstrainedToList'): type_attribute.value_constrained_to_list = data.valueConstrainedToList if data.get('maximumLength'): type_attribute.maximum_length = data.maximumLength if data.get('maximumValue'): type_attribute.maximum_value = data.maximumValue if data.get('minimumValue'): type_attribute.minimum_value = data.minimumValue if data.get('dataFormat'): try: type_attribute.data_format = CdmDataFormat(data.dataFormat) except ValueError: logger.warning(TypeAttributePersistence.__name__, ctx, 'Couldn\'t find an enum value for {}.'.format( data.dataFormat), TypeAttributePersistence.from_data.__name__) if data.get('defaultValue'): type_attribute.default_value = data.defaultValue return type_attribute
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" namespace = self.in_document.namespace adapter = self.ctx.corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error(self._TAG, self.ctx, 'Adapter not found for the document {}'.format(self.in_document.name), self.file_status_check_async.__name__) # make sure the root is a good full corpus path. root_cleaned = (self.root_location or '').rstrip('/') root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path(root_cleaned, self.in_document) try: # get a list of all corpus_paths under the root. file_info_list = await adapter.fetch_all_files_async(root_corpus) except Exception as e: logger.warning(self._TAG, self.ctx, 'The folder location \'{}\' described by a partition pattern does not exist'.format(root_corpus), self.file_status_check_async.__name__) if file_info_list is not None: # remove root of the search from the beginning of all paths so anything in the root is not found by regex. file_info_list = [(namespace + ':' + fi)[len(root_corpus):] for fi in file_info_list] reg = regex.compile(self.regular_expression) if isinstance(self.owner, CdmLocalEntityDeclarationDefinition): for fi in file_info_list: m = reg.fullmatch(fi) if m: # create a map of arguments out of capture groups. args = defaultdict(list) # type: Dict[str, List[str]] i_param = 0 for i in range(1, reg.groups + 1): captures = m.captures(i) if captures and self.parameters and i_param < len(self.parameters): # to be consistent with other languages, if a capture group captures # multiple things, only use the last thing that was captured single_capture = captures[-1] current_param = self.parameters[i_param] args[current_param].append(single_capture) i_param += 1 else: break # put the original but cleaned up root back onto the matched doc as the location stored in the partition. location_corpus_path = root_cleaned + fi last_modified_time = await adapter.compute_last_modified_time_async(adapter.create_adapter_path(location_corpus_path)) cast('CdmLocalEntityDeclarationDefinition', self.owner)._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time) # update modified times. self.last_file_status_check_time = datetime.now(timezone.utc)
async def from_data( ctx: 'CdmCorpusContext', data_obj: 'ReferenceEntity', location: str) -> 'CdmReferencedEntityDeclarationDefinition': referenced_entity = ctx.corpus.make_object( CdmObjectType.REFERENCED_ENTITY_DECLARATION_DEF, data_obj.name) corpus_path = ctx.corpus.storage.adapter_path_to_corpus_path(location) referenced_entity.entity_name = data_obj.name referenced_entity.entity_path = '{}/{}'.format(corpus_path, data_obj.source) referenced_entity.explanation = data_obj.get('description') if data_obj.get('lastFileStatusCheckTime'): referenced_entity.last_file_status_check_time = dateutil.parser.parse( data_obj.get('lastFileStatusCheckTime')) if data_obj.get('lastFileModifiedTime'): referenced_entity.last_file_modified_time = dateutil.parser.parse( data_obj.get('lastFileModifiedTime')) await utils.process_annotations_from_data( ctx, data_obj, referenced_entity.exhibits_traits) if data_obj.get('isHidden'): is_hidden_trait = ctx.corpus.make_ref(CdmObjectType.TRAIT_REF, 'is.hidden', True) is_hidden_trait.is_from_property = True referenced_entity.exhibits_traits.append(is_hidden_trait) properties_trait = ctx.corpus.make_ref( CdmObjectType.TRAIT_REF, 'is.propertyContent.multiTrait', False) properties_trait.is_from_property = True argument = ctx.corpus.make_object(CdmObjectType.ARGUMENT_DEF, 'modelId') argument.value = data_obj.modelId properties_trait.arguments.append(argument) referenced_entity.exhibits_traits.append(properties_trait) extension_trait_def_list = [] # type: List[CdmTraitDefinition] extension_traits = CdmTraitCollection(ctx, referenced_entity) extension_helper.process_extension_from_json(ctx, data_obj, extension_traits, extension_trait_def_list) if extension_trait_def_list: logger.warning( _TAG, ctx, 'Custom extensions are not supported in referenced entity.') return referenced_entity
def _fetch_resolved_reference(self, res_opt: 'ResolveOptions') -> 'CdmObject': res_opt = res_opt if res_opt is not None else ResolveOptions( self, self.ctx.corpus.default_resolution_directives) if self.explicit_reference: return self.explicit_reference if not self.ctx: return None res = None # if this is a special request for a resolved attribute, look that up now seek_res_att = CdmObjectReference._offset_attribute_promise( self.named_reference) if seek_res_att >= 0: ent_name = self.named_reference[:seek_res_att] att_name = self.named_reference[seek_res_att + len(RES_ATT_TOKEN):] # get the entity ent = self.ctx.corpus._resolve_symbol_reference( res_opt, self.in_document, ent_name, CdmObjectType.ENTITY_DEF, True) if not ent: logger.warning( self.ctx, self._TAG, CdmObjectReference._fetch_resolved_reference.__name__, self.at_corpus_path, CdmLogCode.WARN_RESOLVE_ENTITY_FAILED, ent_name, self.named_reference) return None # get the resolved attribute ras = ent._fetch_resolved_attributes(res_opt) ra = None if ras is not None: ra = ras.get(att_name) if ra: res = ra.target else: logger.warning( self.ctx, self._TAG, CdmObjectReference._fetch_resolved_reference.__name__, self.at_corpus_path, CdmLogCode.WARN_RESOLVE_ATTR_FAILED, self.named_reference) else: # normal symbolic reference, look up from the corpus, it knows where everything is res = self.ctx.corpus._resolve_symbol_reference( res_opt, self.in_document, self.named_reference, self.object_type, True) return res
def _resolve_object_definitions(self, res_opt: 'ResolveOptions') -> None: ctx = self.ctx res_opt._indexing_doc = self reference_type_set = { CdmObjectType.ATTRIBUTE_REF, CdmObjectType.ATTRIBUTE_GROUP_REF, CdmObjectType.ATTRIBUTE_CONTEXT_REF, CdmObjectType.DATA_TYPE_REF, CdmObjectType.ENTITY_REF, CdmObjectType.PURPOSE_REF, CdmObjectType.TRAIT_REF } for obj in self._internal_objects: if obj.object_type in reference_type_set: ctx._relative_path = obj._declared_path if obj._offset_attribute_promise(obj.named_reference) < 0: res_new = obj.fetch_object_definition(res_opt) if not res_new: # it's okay if references can't be resolved when shallow validation is enabled. if res_opt.shallow_validation: logger.warning( self.ctx, self._TAG, self._resolve_object_definitions.__name__, self.at_corpus_path, CdmLogCode.WARN_RESOLVE_REFERENCE_FAILURE, obj.named_reference) else: logger.error( self.ctx, self._TAG, self._resolve_object_definitions.__name__, self.at_corpus_path, CdmLogCode.ERR_RESOLVE_REFERENCE_FAILURE, obj.named_reference) # don't check in this file without both of these comments. handy for debug of failed lookups # res_test = obj.fetch_object_definition(res_opt) else: logger.info( self.ctx, self._TAG, self._resolve_object_definitions.__name__, self.at_corpus_path, 'resolved \'{}\''.format(obj.named_reference)) elif obj.object_type == CdmObjectType.PARAMETER_DEF: # when a parameter has a datatype that is a cdm object, validate that any default value is the # right kind object parameter = obj # type: CdmParameterDefinition parameter._const_type_check(res_opt, self, None) res_opt._indexing_doc = None
async def process_traits_and_annotations_to_data( ctx: 'CdmCorpusContext', entity_object: 'MetadataObject', traits: 'CdmTraitCollection'): if traits is None: return annotations = [] extensions = [] for trait in traits: if trait.named_reference.startswith('is.extension.'): extension_helper.process_extension_trait_to_object( trait, entity_object) elif trait.named_reference == 'is.modelConversion.otherAnnotations': for annotation in trait.arguments[0].value: if isinstance(annotation, NameValuePair): element = Annotation() element.name = annotation.name element.value = annotation.value annotations.append(element) elif isinstance(annotation, dict) and annotation.get('name'): annotations.append(annotation) else: logger.warning( ctx, _TAG, process_traits_and_annotations_to_data.__name__, None, CdmLogCode.WARN_ANNOTATION_TYPE_NOT_SUPPORTED) elif isinstance(trait, CdmTraitReference ) and trait.named_reference in trait_to_annotation_map: element = await ArgumentPersistence.to_data( trait.arguments[0], None, None) element.name = convert_trait_to_annotation(trait.named_reference) annotations.append(element) elif trait.named_reference not in ignored_traits and not trait.named_reference.startswith('is.dataFormat') \ and not (trait.named_reference in model_json_property_traits and trait.object_type == CdmObjectType.TRAIT_REF and trait.is_from_property): if trait.object_type == CdmObjectType.TRAIT_GROUP_REF: extension = TraitGroupReferencePersistence.to_data( trait, None, None) else: extension = TraitReferencePersistence.to_data( trait, None, None) extensions.append(extension) if annotations: entity_object.annotations = annotations if extensions: entity_object.traits = extensions
def validate(self) -> bool: if self.constant_values is None: path_split = self._declared_path.split('/') entity_name = path_split[0] if path_split else '' logger.warning( self._TAG, self.ctx, 'constant entity \'{}\' defined without a constant value.'. format(entity_name)) if not bool(self.entity_shape): logger.error( self._TAG, self.ctx, Errors.validate_error_string(self.at_corpus_path, ['entity_shape'])) return False return True
def _fetch_resolved_reference( self, res_opt: 'ResolveOptions') -> 'CdmObjectDefinition': if self.explicit_reference: return self.explicit_reference if not self.ctx: return None res = None # if this is a special request for a resolved attribute, look that up now seek_res_att = CdmObjectReference._offset_attribute_promise( self.named_reference) if seek_res_att >= 0: ent_name = self.named_reference[:seek_res_att] att_name = self.named_reference[seek_res_att + len(RES_ATT_TOKEN):] # get the entity ent = self.ctx.corpus._resolve_symbol_reference( res_opt, self.in_document, ent_name, CdmObjectType.ENTITY_DEF, True) if not ent: logger.warning( self._TAG, self.ctx, 'Unable to resolve an entity named \'{}\' from the reference \'{}\'' .format(ent_name, self.named_reference)) return None # get the resolved attribute ras = ent._fetch_resolved_attributes(res_opt) ra = None if ras is not None: ra = ras.get(att_name) if ra: res = ra.target else: logger.warning( self._TAG, self.ctx, 'Could not resolve the attribute promise for \'{}\''. format(self.named_reference), res_opt.wrt_doc.at_corpus_path) else: # normal symbolic reference, look up from the corpus, it knows where everything is res = self.ctx.corpus._resolve_symbol_reference( res_opt, self.in_document, self.named_reference, self.object_type, True) return res
def from_data(ctx: 'CdmCorpusContext', data: DataPartition) -> CdmDataPartitionDefinition: data_partition = ctx.corpus.make_object( CdmObjectType.DATA_PARTITION_DEF, data.get('name')) # type: CdmDataPartitionDefinition data_partition.location = data.get('location') if data.get('specializedSchema'): data_partition.specialized_schema = data.specializedSchema if data.get('lastFileStatusCheckTime'): data_partition.last_file_status_check_time = dateutil.parser.parse( data.lastFileStatusCheckTime) if data.get('lastFileModifiedTime'): data_partition.last_file_modified_time = dateutil.parser.parse( data.lastFileModifiedTime) if data.get('exhibitsTraits'): exhibits_traits = utils.create_trait_reference_array( ctx, data.exhibitsTraits) data_partition.exhibits_traits.extend(exhibits_traits) if data.get('arguments'): for argument in data.arguments: key = None value = None if len(argument) == 1: key, value = list(argument.items())[0] else: key = argument.get('key') or argument.get('name') value = argument.get('value') if key is None or value is None: logger.warning(ctx, _TAG, DataPartitionPersistence.from_data.__name__, None, CdmLogCode.WARN_PARTITION_INVALID_ARGUMENTS, data.location) continue if key in data_partition.arguments: data_partition.arguments[key].append(value) else: data_partition.arguments[key] = [value] return data_partition
def _construct_resolved_traits(self, rtsb: 'ResolvedTraitSetBuilder', res_opt: 'ResolveOptions') -> None: obj_def = self.fetch_object_definition(res_opt) if obj_def: rts_inh = obj_def._fetch_resolved_traits(res_opt) if rts_inh: rts_inh = rts_inh.deep_copy() rtsb.take_reference(rts_inh) elif not self.optional: logger.warning(self.ctx, self._TAG, CdmObjectReference._construct_resolved_traits.__name__, self.at_corpus_path, CdmLogCode.WARN_RESOLVE_OBJECT_FAILED, self.fetch_object_definition_name()) if self.applied_traits: for at in self.applied_traits: rtsb.merge_traits(at._fetch_resolved_traits(res_opt))
def unmount(self, namespace: str) -> None: """unregisters a storage adapter and its root folder""" if not namespace: logger.error(self._TAG, self._ctx, 'The namespace cannot be null or empty.', StorageManager.unmount.__name__) return None if namespace in self._namespace_adapters: self._namespace_adapters.pop(namespace, None) self._namespace_folders.pop(namespace, None) if namespace in self._system_defined_namespaces: self._system_defined_namespaces.remove(namespace) # The special case, use Resource adapter. if (namespace == 'cdm'): self.mount(namespace, ResourceAdapter()) else: logger.warning(self._TAG, self._ctx, 'Cannot remove the adapter from non-existing namespace.', StorageManager.mount.__name__)
async def _ingest_into_table(self, log_table: str, log_entries: str) -> None: """ Ingest log entries into the table specified. :param log_table: The table to be ingested into. :param log_entries: The batched log entries. """ # Ingest only if the entries are not empty if log_entries: query = '{0}{1} <|\n{2}'.format(self.INGESTION_COMMAND, log_table, log_entries) try: await self.post_kusto_query(query) except Exception as ex: logger.warning(self._ctx, TelemetryKustoClient.__name__, self._ingest_into_table.__name__, None, CdmLogCode.WARN_TELEMETRY_INGESTION_FAILED, ex)
async def _fetch_document_from_folder_path_async( self, document_path: str, force_reload: bool, res_opt: Optional['ResolveOptions'] = None ) -> 'CdmDocumentDefinition': """Gets the document from folder path. arguments: path: The path.""" doc_name = None first = document_path.find('/') if first < 0: doc_name = document_path else: doc_name = document_path[0:first] # got that doc? doc = None # type: Optional[CdmDocumentDefinition] if doc_name in self._document_lookup: doc = self._document_lookup[doc_name] if not force_reload: return doc # remove them from the caches since they will be back in a moment if doc._is_dirty: logger.warning( self.ctx, self._TAG, CdmFolderDefinition. _fetch_document_from_folder_path_async.__name__, self.at_corpus_path, CdmLogCode.WARN_DOC_CHANGES_DISCARDED, doc.name) self.documents.remove(doc_name) # go get the doc doc = await self._corpus.persistence._load_document_from_path_async( self, doc_name, doc, res_opt) return doc