def create_table_propertybags(instance: CdmLocalEntityDeclarationDefinition, res_opt: ResolveOptions, options: CopyOptions, properties): if properties == None: properties = {} if instance.entity_path is not None: path_tuple = StorageUtils.split_namespace_path(instance.entity_path) if path_tuple == None: logger.error(instance.ctx, _TAG, LocalEntityDeclarationPersistence.create_table_propertybags.__name__, instance.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_ENTITY_PATH_NULL, instance.entity_name) return None properties['cdm:entityPath'] = path_tuple[1] t2pm = TraitToPropertyMap(instance) is_hidden_trait = t2pm._fetch_trait_reference('is.hidden') if not 'cdm:description' in properties: properties['cdm:description'] = instance.explanation if instance.last_child_file_modified_time is not None: properties['cdm:lastChildFileModifiedTime'] = instance.last_child_file_modified_time if instance.last_file_modified_time is not None: properties['cdm:lastFileModifiedTime'] = instance.last_file_modified_time if instance.last_file_status_check_time is not None: properties['cdm:lastFileStatusCheckTime'] = instance.last_file_status_check_time if is_hidden_trait is not None: properties['cdm:isHidden'] = True if instance.exhibits_traits is not None and len(instance.exhibits_traits) > 0: properties['cdm:entityDecTraits'] = copy_data_utils._array_copy_data(res_opt, instance.exhibits_traits, options) return properties
async def to_data(instance: 'CdmReferencedEntityDeclarationDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['ReferenceEntity']: source_index = instance.entity_path.rfind('/') if source_index == -1: instance.ctx.logger.error( 'Source name is not present in entityDeclaration path. | %s', instance.at_corpus_path) return reference_entity = ReferenceEntity() t2pm = TraitToPropertyMap(instance) reference_entity.type = 'ReferenceEntity' reference_entity.name = instance.entity_name reference_entity.source = instance.entity_path[source_index + 1:] reference_entity.description = instance.explanation reference_entity.lastFileStatusCheckTime = utils.get_formatted_date_string( instance.last_file_status_check_time) reference_entity.lastFileModifiedTime = utils.get_formatted_date_string( instance.last_file_modified_time) reference_entity.isHidden = bool( t2pm.fetch_trait_reference('is.hidden')) or None properties_trait = t2pm.fetch_trait_reference( 'is.propertyContent.multiTrait') if properties_trait: reference_entity.modelId = properties_trait.arguments[0].value await utils.process_annotations_to_data(instance.ctx, reference_entity, instance.exhibits_traits) return reference_entity
def to_data(instance: CdmTypeAttributeDefinition, ctx: CdmCorpusContext, res_opt: ResolveOptions, options: CopyOptions) -> TypeAttribute: properties = TypeAttributePersistence.create_properties(instance, res_opt, options) origin_data_type_name = TypeInfo( type_name = '', properties = properties, is_complex_type = False, is_nullable = instance._get_property('isNullable'), type_family = 'cdm') t2pm = TraitToPropertyMap(instance) numeric_traits = t2pm._fetch_trait_reference('is.data_format.numeric.shaped') if numeric_traits is not None: for numeric_traits_arg in numeric_traits.argument: if numeric_traits_arg.name == 'precision': origin_data_type_name.precision = numeric_traits_arg.value if numeric_traits_arg.Name == 'scale': origin_data_type_name.scale = numeric_traits_arg.value data_format = instance._get_property('dataFormat') origin_data_type_name = utils.cdm_data_format_to_syms_data_type(data_format, origin_data_type_name) if origin_data_type_name == None: return None if origin_data_type_name.type_name == None: logger.error(ctx, _TAG, 'toData', instance.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_UNKNOWN_DATA_FORMAT, instance.display_name) return None data_col = DataColumn( origin_data_type_name = origin_data_type_name, name = instance.name) return data_col
async def to_data(instance: 'CdmLocalEntityDeclarationDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions', ctx: 'CdmCorpusContext') -> 'LocalEntity': # Fetch the document from entity schema. entity = await DocumentPersistence.to_data(instance.entity_path, res_opt, options, ctx) if not entity: return None entity.description = instance.explanation entity.lastFileStatusCheckTime = utils.get_formatted_date_string(instance.last_file_status_check_time) entity.lastFileModifiedTime = utils.get_formatted_date_string(instance.last_file_modified_time) entity.lastChildFileModifiedTime = utils.get_formatted_date_string(instance.last_child_file_modified_time) t2pm = TraitToPropertyMap(instance) # Find the trait containing the schema info. schemas = t2pm.fetch_property_value('cdmSchemas') if schemas: entity.schemas = schemas entity.isHidden = bool(t2pm.fetch_trait_reference('is.hidden')) or None if instance.data_partitions: entity.partitions = [] for partition in instance.data_partitions: partiton = await DataPartitionPersistence.to_data(partition, res_opt, options, ctx) if partition: entity.partitions.append(partiton) else: ctx.logger.error('There was an error while trying to convert cdm data partition to model.json partition.') return None return entity
def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionDefinition.__name__ # The name of a data partition. self.name = name # type: str # The corpus path for the data file location. self.location = None # type: Optional[str] # Indicates whether this partition is inferred. self.inferred = False # type: bool # The list of key value pairs to give names for the replacement values from the RegEx. self.arguments = {} # type: Dict[str, List[str]] # The path of a specialized schema to use specifically for the partitions generated. self.specialized_schema = None # type: Optional[str] # The refresh time of the partition. self.refresh_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] self.last_file_status_check_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self)
def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionPatternDefinition.__name__ # The partition pattern name. self.name = name # type: str # The starting location corpus path for searching for inferred data partitions. self.root_location = None # type: Optional[str] # The glob pattern to use for searching partitions. self.glob_pattern = None # type: Optional[str] # The regular expression to use for searching partitions. self.regular_expression = None # type: Optional[str] # the names for replacement values from regular expression. self.parameters = None # type: Optional[List[str]] # The corpus path for specialized schema to use for matched pattern partitions. self.specialized_schema = None # type: Optional[str] self.last_file_status_check_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self)
async def to_data(instance: 'CdmTypeAttributeDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'Attribute': applied_traits = \ [trait for trait in instance.applied_traits if not trait.is_from_property] \ if instance.applied_traits else None result = Attribute() result.name = instance.name description = instance._fetch_property('description') if description: result.description = description result.dataType = TypeAttributePersistence._data_type_to_data( instance.data_format) result.traits = copy_data_utils._array_copy_data( res_opt, applied_traits, options) await utils.process_annotations_to_data(instance.ctx, result, instance.applied_traits) t2pm = TraitToPropertyMap(instance) result.isHidden = bool( t2pm._fetch_trait_reference('is.hidden')) or None return result
async def to_data(instance: 'CdmDataPartitionDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['Partition']: result = Partition() result.name = instance.name result.description = instance.description result.location = instance.ctx.corpus.storage.corpus_path_to_adapter_path( instance.ctx.corpus.storage.create_absolute_corpus_path(instance.location, instance.in_document)) result.refreshTime = instance.refresh_time result.lastFileModifiedTime = utils.get_formatted_date_string(instance.last_file_modified_time) result.lastFileStatusCheckTime = utils.get_formatted_date_string(instance.last_file_status_check_time) if not result.location: logger.warning(DataPartitionPersistence.__name__, instance.ctx, 'Couldn\'t find data partition\'s location for partition {}.'.format(result.name), DataPartitionPersistence.to_data.__name__) # filter description since it is mapped to a property exhibits_traits = filter(lambda t: t.named_reference != 'is.localized.describedAs', instance.exhibits_traits) await utils.process_annotations_to_data(instance.ctx, result, exhibits_traits) t2pm = TraitToPropertyMap(instance) is_hidden_trait = t2pm.fetch_trait_reference('is.hidden') result.isHidden = bool(is_hidden_trait) or None csv_trait = t2pm.fetch_trait_reference('is.partition.format.CSV') if csv_trait: csv_format_settings = utils.create_csv_format_settings(csv_trait) if csv_format_settings: result.fileFormatSettings = csv_format_settings result.fileFormatSettings.type = 'CsvFormatSettings' else: logger.error(DataPartitionPersistence.__name__, instance.ctx, 'There was a problem while processing csv format trait inside data partition.') return return result
def from_data(ctx: CdmCorpusContext, data: TypeAttribute, entity_name: Optional[str] = None) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object(CdmObjectType.TYPE_ATTRIBUTE_DEF, data.get('name')) type_attribute.purpose = PurposeReferencePersistence.from_data(ctx, data.get('purpose')) type_attribute.data_type = DataTypeReferencePersistence.from_data(ctx, data.get('dataType')) type_attribute.attribute_context = AttributeContextReferencePersistence.from_data(ctx, data.get('attributeContext')) type_attribute.resolution_guidance = AttributeResolutionGuidancePersistence.from_data(ctx, data.get('resolutionGuidance')) applied_traits = utils.create_trait_reference_array(ctx, data.get('appliedTraits')) type_attribute.applied_traits.extend(applied_traits) if data.get('isPrimaryKey') and entity_name: t2p_map = TraitToPropertyMap(type_attribute) t2p_map._update_property_value('isPrimaryKey', entity_name + '/(resolvedAttributes)/' + type_attribute.name) type_attribute.explanation = data.get('explanation') if data.get('isReadOnly') is not None: type_attribute.is_read_only = TypeAttributePersistence._property_from_data_to_bool(data.isReadOnly) if data.get('isNullable') is not None: type_attribute.is_nullable = TypeAttributePersistence._property_from_data_to_bool(data.isNullable) if data.get('sourceName'): type_attribute.source_name = TypeAttributePersistence._property_from_data_to_string(data.sourceName) if data.get('sourceOrdering') is not None: type_attribute.source_ordering = TypeAttributePersistence._property_from_data_to_int(data.sourceOrdering) if data.get('displayName'): type_attribute.display_name = TypeAttributePersistence._property_from_data_to_string(data.displayName) if data.get('description'): type_attribute.description = TypeAttributePersistence._property_from_data_to_string(data.description) if data.get('valueConstrainedToList') is not None: type_attribute.value_constrained_to_list = TypeAttributePersistence._property_from_data_to_bool(data.valueConstrainedToList) if data.get('maximumLength') is not None: type_attribute.maximum_length = TypeAttributePersistence._property_from_data_to_int(data.maximumLength) if data.get('maximumValue') is not None: type_attribute.maximum_value = TypeAttributePersistence._property_from_data_to_string(data.maximumValue) if data.get('minimumValue') is not None: type_attribute.minimum_value = TypeAttributePersistence._property_from_data_to_string(data.minimumValue) if data.get('dataFormat') is not None: try: type_attribute.data_format = CdmDataFormat(data.dataFormat) except ValueError: logger.warning(TypeAttributePersistence.__name__, ctx, 'Couldn\'t find an enum value for {}.'.format( data.dataFormat), TypeAttributePersistence.from_data.__name__) if data.get('defaultValue') is not None: type_attribute.default_value = data.defaultValue return type_attribute
def test_trait_to_unknown_data_format(self): """Test trait to data format when unknown data format trait is in an attribute.""" cdm_attribute = CdmTypeAttributeDefinition(CdmCorpusContext(CdmCorpusDefinition(), None), 'SomeAttribute') cdm_attribute.applied_traits.append('is.data_format.someRandomDataFormat') trait_to_property_map = TraitToPropertyMap(cdm_attribute) data_format = trait_to_property_map._traits_to_data_format(False) self.assertEqual(CdmDataFormat.UNKNOWN, data_format)
def test_trait_to_json_data_format(self): """Test trait to data format when calculated data format should be JSON.""" cdm_attribute = CdmTypeAttributeDefinition(CdmCorpusContext(CdmCorpusDefinition(), None), 'SomeAttribute') cdm_attribute.applied_traits.append('is.dataFormat.array') cdm_attribute.applied_traits.append('means.content.text.JSON') trait_to_property_map = TraitToPropertyMap(cdm_attribute) data_format = trait_to_property_map._traits_to_data_format(False) self.assertEqual(CdmDataFormat.JSON, data_format)
def create_data_source(instance: CdmManifestDefinition) -> DataSource: source = DataSource(None) t2pm = TraitToPropertyMap(instance) source_traits = t2pm._fetch_trait_reference(ManifestPersistence.db_location_trait) if source_traits is not None and source_traits.arguments is not None and len(source_traits.arguments) == 1 and \ source_traits.arguments[0].name == ManifestPersistence.db_location_trait_arg_name: source.location = utils.corpus_path_to_syms_path(source_traits.arguments[0].value, instance.ctx.corpus.storage) if source.location == None: logger.error(instance.ctx, _TAG, 'create_data_source', instance.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_STORAGE_SOURCE_TRAIT_ERROR, ManifestPersistence.db_location_trait , ManifestPersistence.db_location_trait_arg_name) return None return source
async def from_data(ctx: 'CdmCorpusContext', document_folder: 'CdmFolderDefinition', data: 'LocalEntity', extension_trait_def_list: List['CdmTraitDefinition'], manifest: 'CdmManifestDefinition') -> 'CdmLocalEntityDeclarationDefinition': local_entity_dec = ctx.corpus.make_object(CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF, data.name) local_extension_trait_def_list = [] # type: List[CdmTraitDefinition] entity_doc = await DocumentPersistence.from_data(ctx, data, extension_trait_def_list, local_extension_trait_def_list) if not entity_doc: logger.error(_TAG, ctx, 'There was an error while trying to fetch the entity doc from local entity declaration persistence.') return None document_folder.documents.append(entity_doc) # Entity schema path is the path to the doc containing the entity definition. local_entity_dec.entity_path = ctx.corpus.storage.create_relative_corpus_path('{}/{}'.format(entity_doc.at_corpus_path, data.name), manifest) local_entity_dec.explanation = data.get('description') if data.get('lastFileStatusCheckTime'): local_entity_dec.last_file_status_check_time = dateutil.parser.parse(data.get('lastFileStatusCheckTime')) if data.get('lastFileModifiedTime'): local_entity_dec.last_file_modified_time = dateutil.parser.parse(data.get('lastFileModifiedTime')) if data.get('lastChildFileModifiedTime'): local_entity_dec.last_child_file_modified_time = dateutil.parser.parse(data.get('lastChildFileModifiedTime')) if data.get('isHidden'): is_hidden_trait = ctx.corpus.make_object(CdmObjectType.TRAIT_REF, 'is.hidden', True) local_entity_dec.exhibits_traits.append(is_hidden_trait) # Add traits for schema entity info. if data.get('schemas'): t2pm = TraitToPropertyMap(local_entity_dec) t2pm.update_property_value('cdmSchemas', data.get('schemas')) # Data partitions are part of the local entity, add them here. for element in (data.get('partitions') or []): data_partition = await DataPartitionPersistence.from_data(ctx, element, extension_trait_def_list, local_extension_trait_def_list, document_folder) if data_partition is not None: local_entity_dec.data_partitions.append(data_partition) else: logger.error(_TAG, ctx, 'There was an error while trying to convert model.json partition to cdm local data partition.') return None import_docs = await extension_helper.standard_import_detection(ctx, extension_trait_def_list, local_extension_trait_def_list) # type: List[CdmImport] extension_helper.add_import_docs_to_manifest(ctx, import_docs, entity_doc) return local_entity_dec
async def to_data(instance: 'CdmTypeAttributeDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'Attribute': result = Attribute() result.name = instance.name description = instance._get_property('description') if description: result.description = description result.dataType = TypeAttributePersistence._data_type_to_data(instance.data_format) utils.process_traits_and_annotations_to_data(instance.ctx, result, instance.applied_traits) t2pm = TraitToPropertyMap(instance) result.isHidden = bool(t2pm._fetch_trait_reference('is.hidden')) or None return result
async def to_data(instance: 'CdmLocalEntityDeclarationDefinition', manifest: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'LocalEntity': # Fetch the document from entity schema. entity = await DocumentPersistence.to_data(instance.entity_path, manifest, res_opt, options, instance.ctx) if not entity: return None if not entity.description: entity.description = instance.explanation entity.lastFileStatusCheckTime = utils.get_formatted_date_string( instance.last_file_status_check_time) entity.lastFileModifiedTime = utils.get_formatted_date_string( instance.last_file_modified_time) entity.lastChildFileModifiedTime = utils.get_formatted_date_string( instance.last_child_file_modified_time) t2pm = TraitToPropertyMap(instance) # Find the trait containing the schema info. schemas = t2pm._fetch_property_value('cdmSchemas') if schemas: entity.schemas = schemas entity.isHidden = bool( t2pm._fetch_trait_reference('is.hidden')) or None if instance.data_partitions: entity.partitions = [] for partition in instance.data_partitions: partiton = await DataPartitionPersistence.to_data( partition, res_opt, options) if partition: entity.partitions.append(partiton) else: logger.error( ctx, _TAG, "to_data", instance.at_corpus_path, CdmLogCode. ERR_PERSIST_MODELJSON_ENTITY_PARTITION_CONVERSION_ERROR ) return None return entity
def test_update_and_fetch_list_lookup(self): """Test update and fetch list lookup default value without attributeValue and displayOrder.""" corpus = CdmCorpusDefinition() cdm_attribute = CdmTypeAttributeDefinition(corpus.ctx, 'SomeAttribute') trait_to_property_map = TraitToPropertyMap(cdm_attribute) constant_values = [{'languageTag': 'en', 'displayText': 'Fax'}] trait_to_property_map.update_property_value('defaultValue', constant_values) result = trait_to_property_map.fetch_property_value('defaultValue') self.assertEqual(1, len(result)) self.assertEqual('en', result[0].get('languageTag')) self.assertEqual('Fax', result[0].get('displayText')) self.assertIsNone(result[0].get('attributeValue')) self.assertIsNone(result[0].get('displayOrder'))
def _trait_to_property_map(self) -> 'TraitToPropertyMap': from cdm.utilities import TraitToPropertyMap if self._ttpm is not None: return self._ttpm self._ttpm = TraitToPropertyMap(cast('CdmObject', self.target)) return self._ttpm
def from_data(ctx: CdmCorpusContext, obj: DataColumn, entity_name: Optional[str] = None) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object(CdmObjectType.TYPE_ATTRIBUTE_DEF, obj.name) properties = obj.origin_data_type_name.properties type_attribute.data_format = utils.syms_data_type_to_cdm_data_format(obj.origin_data_type_name) if obj.origin_data_type_name.scale != 0 or obj.origin_data_type_name.precision != 0: numeric_traits = ctx.corpus.make_ref(CdmObjectType.TRAIT_REF, 'is.data_format.numeric.shaped', True) scale_traits_arg = ctx.corpus.make_ref(CdmObjectType.ARGUMENT_DEF, 'scale', False) scale_traits_arg.Value = obj.origin_data_type_name.scale numeric_traits.arguments.append(scale_traits_arg) precision_traits_arg = ctx.corpus.make_ref(CdmObjectType.ARGUMENT_DEF, 'scale', False) precision_traits_arg.value = obj.origin_data_type_name.scale numeric_traits.arguments.append(precision_traits_arg) if properties is not None: if 'cdm:purpose' in properties: type_attribute.purpose = properties['cdm:purpose'] if 'cdm:dataType' in properties: type_attribute.data_type = properties['cdm:dataType'] if 'cdm:traits' in properties: utils.add_list_to_cdm_collection(type_attribute.applied_traits, utils.create_trait_reference_array(ctx, properties['cdm:traits'])) if 'cdm:is_primary_key' in properties and properties['cdm:is_primary_key']: t2pMap = TraitToPropertyMap(type_attribute) t2pMap._update_property_value('is_primary_key', entity_name + '/(resolvedAttributes)/' + type_attribute.name) if 'cdm:isReadOnly' in properties: type_attribute.is_read_only = properties['cdm:isReadOnly'] if 'cdm:sourceName' in properties: type_attribute.source_name = properties['cdm:source_name'] if 'cdm:sourceOrdering' in properties: type_attribute.source_ordering = properties['cdm:sourceOrdering'] if 'cdm:valueConstrainedToList' in properties: type_attribute.value_constrained_to_list = properties['cdm:valueConstrainedToList'] if 'cdm:maximumLength' in properties: type_attribute.maximum_length = properties['cdm:maximumLength'] if 'cdm:maximumValue' in properties: type_attribute.maximum_value = properties['cdm:maximumValue'] if 'cdm:minimumValue' in properties: type_attribute.minimum_value = properties['cdm:minimumValue'] if 'cdm:defaultValue' in properties: type_attribute.default_value = properties['cdm:defaultValue'] return type_attribute
def to_data(instance: 'CdmDataPartitionDefinition', obj: 'StorageDescriptor', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'StorageDescriptor': obj.properties = {} if instance.name is not None: obj.properties['cdm:name'] = instance.name if instance.last_file_status_check_time is not None: obj.properties[ 'cdm:lastFileStatusCheckTime'] = time_utils._get_formatted_date_string( instance.last_file_status_check_time) if instance.last_file_modified_time is not None: obj.properties[ 'cdm:lastFileModifiedTime'] = time_utils._get_formatted_date_string( instance.last_file_modified_time) if instance.exhibits_traits is not None: tpm = TraitToPropertyMap(instance) csv_trait = tpm._fetch_trait_reference('is.partition.format.CSV') if csv_trait is not None: instance.exhibits_traits.remove('is.partition.format.CSV') if len(instance.exhibits_traits) > 0: obj.properties[ 'cdm:traits'] = copy_data_utils._array_copy_data( res_opt, instance.exhibits_traits, options) if csv_trait is not None: instance.exhibits_traits.append(csv_trait) properties = DataPartitionPersistence.fill_property_bag_from_csv_trait( instance) if properties is not None: obj.format = FormatInfo( input_format=InputFormat. orgapachehadoopmapred_sequence_file_input_format, output_format=OutputFormat. orgapachehadoophiveqlio_hive_sequence_file_output_format, serialize_lib=SerializeLib. orgapachehadoophiveserde2lazy_lazy_simple_ser_de, format_type=FormatType.csv, properties=properties) else: #error return None return obj
def fill_property_bag_from_csv_trait( instance: 'CdmDataPartitionDefinition', properties=None): tpm = TraitToPropertyMap(instance) csv_trait = tpm._fetch_trait_reference('is.partition.format.CSV') if csv_trait is not None: if properties == None: properties = {} for csv_trait_arg in csv_trait.arguments: key = None # map to syms define properties if csv_trait_arg.name == 'columnHeaders': key = 'header' elif csv_trait_arg.name == 'delimiter': key = 'field.delim' else: key = csv_trait_arg.value properties[key] = csv_trait_arg.value return properties
async def to_data(instance: 'CdmDataPartitionDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['Partition']: result = Partition() result.name = instance.name result.description = instance.description result.location = instance.ctx.corpus.storage.corpus_path_to_adapter_path( instance.ctx.corpus.storage.create_absolute_corpus_path(instance.location, instance.in_document)) result.refreshTime = instance.refresh_time result.lastFileModifiedTime = utils.get_formatted_date_string(instance.last_file_modified_time) result.lastFileStatusCheckTime = utils.get_formatted_date_string(instance.last_file_status_check_time) if result.name is None: logger.warning(instance.ctx, _TAG, DataPartitionPersistence.to_data.__name__, instance.at_corpus_path, CdmLogCode.WARN_PERSIST_PARTITION_NAME_NULL) result.name = '' if not result.location: logger.warning(instance.ctx, _TAG, DataPartitionPersistence.to_data.__name__, instance.at_corpus_path, CdmLogCode.WARN_PERSIST_PARTITION_LOC_MISSING, result.Name) # filter description since it is mapped to a property exhibits_traits = filter(lambda t: t.named_reference != 'is.localized.describedAs', instance.exhibits_traits) await utils.process_traits_and_annotations_to_data(instance.ctx, result, exhibits_traits) t2pm = TraitToPropertyMap(instance) is_hidden_trait = t2pm._fetch_trait_reference('is.hidden') result.isHidden = bool(is_hidden_trait) or None csv_trait = t2pm._fetch_trait_reference('is.partition.format.CSV') if csv_trait: csv_format_settings = utils.create_csv_format_settings(csv_trait) if csv_format_settings: result.fileFormatSettings = csv_format_settings result.fileFormatSettings.type = 'CsvFormatSettings' else: logger.error(instance.ctx, _TAG, DataPartitionPersistence.to_data.__name__, instance.at_corpus_path, CdmLogCode.ERR_PERSIST_CSV_PROCESSING_ERROR) return return result
def from_data(ctx: CdmCorpusContext, data: TypeAttribute, entity_name: Optional[str] = None) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object(CdmObjectType.TYPE_ATTRIBUTE_DEF, data.get('name')) type_attribute.purpose = PurposeReferencePersistence.from_data(ctx, data.get('purpose')) type_attribute.data_type = DataTypeReferencePersistence.from_data(ctx, data.get('dataType')) cardinality = utils.cardinality_settings_from_data(data.get('cardinality'), type_attribute) if cardinality is not None: type_attribute.cardinality = cardinality type_attribute.attribute_context = AttributeContextReferencePersistence.from_data(ctx, data.get('attributeContext')) utils.add_list_to_cdm_collection(type_attribute.applied_traits, utils.create_trait_reference_array(ctx, data.get('appliedTraits'))) type_attribute.resolution_guidance = AttributeResolutionGuidancePersistence.from_data(ctx, data.get('resolutionGuidance')) if data.get('isPrimaryKey') and entity_name: t2p_map = TraitToPropertyMap(type_attribute) t2p_map._update_property_value('isPrimaryKey', entity_name + '/(resolvedAttributes)/' + type_attribute.name) type_attribute.explanation = data.explanation type_attribute.is_read_only = utils._property_from_data_to_bool(data.isReadOnly) type_attribute.is_nullable = utils._property_from_data_to_bool(data.isNullable) type_attribute.source_name = utils._property_from_data_to_string(data.sourceName) type_attribute.source_ordering = utils._property_from_data_to_int(data.sourceOrdering) type_attribute.display_name = utils._property_from_data_to_string(data.displayName) type_attribute.description = utils._property_from_data_to_string(data.description) type_attribute.value_constrained_to_list = utils._property_from_data_to_bool(data.valueConstrainedToList) type_attribute.maximum_length = utils._property_from_data_to_int(data.maximumLength) type_attribute.maximum_value = utils._property_from_data_to_string(data.maximumValue) type_attribute.minimum_value = utils._property_from_data_to_string(data.minimumValue) type_attribute.default_value = data.defaultValue type_attribute.projection = ProjectionPersistence.from_data(ctx, data.projection) if data.get('dataFormat') is not None: try: type_attribute.data_format = TypeAttributePersistence._data_type_from_data(data.dataFormat) except ValueError: logger.warning(ctx, _TAG, TypeAttributePersistence.from_data.__name__, None, CdmLogCode.WARN_PERSIST_ENUM_NOT_FOUND, data.dataFormat) return type_attribute
def _trait_to_property_map(self) -> 'TraitToPropertyMap': if not self._ttpm: self._ttpm = TraitToPropertyMap(self) return self._ttpm
class CdmDataPartitionPatternDefinition(CdmObjectDefinition, CdmFileStatus): def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionPatternDefinition.__name__ # The partition pattern name. self.name = name # type: str # The starting location corpus path for searching for inferred data partitions. self.root_location = None # type: Optional[str] # The glob pattern to use for searching partitions. self.glob_pattern = None # type: Optional[str] # The regular expression to use for searching partitions. self.regular_expression = None # type: Optional[str] # the names for replacement values from regular expression. self.parameters = None # type: Optional[List[str]] # The corpus path for specialized schema to use for matched pattern partitions. self.specialized_schema = None # type: Optional[str] self.last_file_status_check_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self) @property def object_type(self) -> 'CdmObjectType': return CdmObjectType.DATA_PARTITION_PATTERN_DEF @property def last_child_file_modified_time(self) -> datetime: raise NotImplementedError() @last_child_file_modified_time.setter def last_child_file_modified_time(self, val: datetime): raise NotImplementedError() @property def is_incremental(self) -> bool: """ Gets whether the data partition pattern is incremental. """ return cast(bool, self._ttpm._fetch_property_value('isIncremental')) def copy( self, res_opt: Optional['ResolveOptions'] = None, host: Optional['CdmDataPartitionPatternDefinition'] = None ) -> 'CdmDataPartitionPatternDefinition': if not res_opt: res_opt = ResolveOptions(wrt_doc=self) if not host: copy = CdmDataPartitionPatternDefinition(self.ctx, self.name) else: copy = host copy.name = self.name copy.root_location = self.root_location copy.glob_pattern = self.glob_pattern copy.regular_expression = self.regular_expression copy.parameters = list(self.parameters) if self.parameters else None copy.last_file_status_check_time = self.last_file_status_check_time copy.last_file_modified_time = self.last_file_modified_time if self.specialized_schema: copy.specialized_schema = self.specialized_schema self._copy_def(res_opt, copy) return copy async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): namespace = None adapter = None # make sure the root is a good full corpus path. root_cleaned = (self.root_location[:-1] if self.root_location and self.root_location.endswith('/') else self.root_location) or '' root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path( root_cleaned, self.in_document) try: # Remove namespace from path path_tuple = StorageUtils.split_namespace_path(root_corpus) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH) return namespace = path_tuple[0] adapter = self.ctx.corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_DOC_ADAPTER_NOT_FOUND, self.in_document.name) # get a list of all corpus_paths under the root. file_info_list = await adapter.fetch_all_files_async( path_tuple[1]) except Exception as e: file_info_list = None logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_FILE_FETCH_FAILED, root_corpus, e) if file_info_list is not None and namespace is not None: # remove root of the search from the beginning of all paths so anything in the root is not found by regex. file_info_list = [(namespace + ':' + fi)[len(root_corpus):] for fi in file_info_list] if isinstance(self.owner, CdmLocalEntityDeclarationDefinition): local_ent_dec_def_owner = cast( 'CdmLocalEntityDeclarationDefinition', self.owner) # if both are present log warning and use glob pattern, otherwise use regularExpression if self.glob_pattern and not self.glob_pattern.isspace( ) and self.regular_expression and not self.regular_expression.isspace( ): logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_GLOB_AND_REGEX_PRESENT, self.glob_pattern, self.regular_expression) regular_expression = self.glob_pattern_to_regex( self.glob_pattern ) if self.glob_pattern and not self.glob_pattern.isspace( ) else self.regular_expression try: reg = regex.compile(regular_expression) except Exception as e: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_VALDN_INVALID_EXPRESSION, 'glob pattern' if self.glob_pattern and not self.glob_pattern.isspace() else 'regular expression', self.glob_pattern if self.glob_pattern and not self.glob_pattern.isspace() else self.regular_expression, e) if reg: # a set to check if the data partition exists data_partition_path_set = set() if local_ent_dec_def_owner.data_partitions is not None: for data_partition in local_ent_dec_def_owner.data_partitions: data_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( data_partition.location, self.in_document) data_partition_path_set.add( data_partition_location_full_path) incremental_partition_path_hash_set = set() if local_ent_dec_def_owner.data_partitions is not None: for incremental_partition in local_ent_dec_def_owner.incremental_partitions: incremental_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( incremental_partition.location, self.in_document) incremental_partition_path_hash_set.add( incremental_partition_location_full_path) for fi in file_info_list: m = reg.fullmatch(fi) if m: # create a map of arguments out of capture groups. args = defaultdict( list) # type: Dict[str, List[str]] i_param = 0 for i in range(1, reg.groups + 1): captures = m.captures(i) if captures and self.parameters and i_param < len( self.parameters): # to be consistent with other languages, if a capture group captures # multiple things, only use the last thing that was captured single_capture = captures[-1] current_param = self.parameters[ i_param] args[current_param].append( single_capture) i_param += 1 else: break # put the original but cleaned up root back onto the matched doc as the location stored in the partition. location_corpus_path = root_cleaned + fi full_path = root_corpus + fi # Remove namespace from path path_tuple = StorageUtils.split_namespace_path( full_path) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode. ERR_STORAGE_NULL_CORPUS_PATH) return last_modified_time = await adapter.compute_last_modified_time_async( path_tuple[1]) if self.is_incremental and full_path not in incremental_partition_path_hash_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time, True, self.name) incremental_partition_path_hash_set.add( full_path) if not self.is_incremental and full_path not in data_partition_path_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time) data_partition_path_set.add(full_path) # update modified times. self.last_file_status_check_time = datetime.now(timezone.utc) def glob_pattern_to_regex(self, pattern: str) -> str: new_pattern = [] # all patterns should start with a slash new_pattern.append("[/\\\\]") # if pattern starts with slash, skip the first character. We already added it above i = 1 if pattern[0] == '/' or pattern[0] == '\\' else 0 while i < len(pattern): curr_char = pattern[i] if curr_char == '.': # escape '.' characters new_pattern.append('\\.') elif curr_char == '\\': # convert backslash into slash new_pattern.append('[/\\\\]') elif curr_char == '?': # question mark in glob matches any single character new_pattern.append('.') elif curr_char == '*': next_char = pattern[i + 1] if i + 1 < len(pattern) else None if next_char == '*': prev_char = pattern[i - 1] if i - 1 >= 0 else None post_char = pattern[i + 2] if i + 2 < len(pattern) else None # globstar must be at beginning of pattern, end of pattern, or wrapped in separator characters if (prev_char is None or prev_char == '/' or prev_char == '\\') and (post_char is None or post_char == '/' or post_char == '\\'): new_pattern.append('.*') # globstar can match zero or more subdirectories. If it matches zero, then there should not be # two consecutive '/' characters so make the second one optional if (prev_char == '/' or prev_char == '\\') and ( post_char == '/' or post_char == '\\'): new_pattern.append('/?') i = i + 1 else: # otherwise, treat the same as '*' new_pattern.append('[^/\\\\]*') i = i + 1 else: # * new_pattern.append('[^/\\\\]*') else: new_pattern.append(curr_char) i = i + 1 return ''.join(new_pattern) def get_name(self) -> str: return self.name def is_derived_from(self, base: str, res_opt: Optional['ResolveOptions'] = None) -> bool: return False async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if isinstance(self.owner, CdmFileStatus) and child_time: await cast(CdmFileStatus, self.owner).report_most_recent_time_async(child_time) def validate(self) -> bool: if not bool(self.root_location): missing_fields = ['root_location'] logger.error( self.ctx, self._TAG, 'validate', self.at_corpus_path, CdmLogCode.ERR_VALDN_INTEGRITY_CHECK_FAILURE, self.at_corpus_path, ', '.join(map(lambda s: '\'' + s + '\'', missing_fields))) return False return True def visit(self, path_from: str, pre_children: 'VisitCallback', post_children: 'VisitCallback') -> bool: path = self._fetch_declared_path(path_from) if pre_children and pre_children(self, path): return False if self._visit_def(path, pre_children, post_children): return True if post_children and post_children(self, path): return True return False def _fetch_declared_path(self, path_from: str) -> str: return '{}{}'.format(path_from, (self.get_name() or 'UNNAMED'))
class CdmDataPartitionDefinition(CdmObjectDefinition, CdmFileStatus): def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionDefinition.__name__ # The name of a data partition. self.name = name # type: str # The corpus path for the data file location. self.location = None # type: Optional[str] # Indicates whether this partition is inferred. self.inferred = False # type: bool # The list of key value pairs to give names for the replacement values from the RegEx. self.arguments = {} # type: Dict[str, List[str]] # The path of a specialized schema to use specifically for the partitions generated. self.specialized_schema = None # type: Optional[str] # The refresh time of the partition. self.refresh_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] self.last_file_status_check_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self) @property def object_type(self) -> 'CdmObjectType': return CdmObjectType.DATA_PARTITION_DEF @property def description(self) -> str: return cast(str, self._ttpm._fetch_property_value('description')) @description.setter def description(self, val: str) -> None: self._ttpm._update_property_value('description', val) @property def last_child_file_modified_time(self) -> datetime: raise NotImplementedError() @last_child_file_modified_time.setter def last_child_file_modified_time(self, val: datetime): raise NotImplementedError() def copy( self, res_opt: Optional['ResolveOptions'] = None, host: Optional['CdmDataPartitionDefinition'] = None ) -> 'CdmDataPartitionDefinition': if not res_opt: res_opt = ResolveOptions( wrt_doc=self, directives=self.ctx.corpus.default_resolution_directives) if not host: copy = CdmDataPartitionDefinition(self.ctx, self.name) else: copy = host copy.name = self.name copy.description = self.description copy.location = self.location copy.last_file_status_check_time = self.last_file_status_check_time copy.last_file_modified_time = self.last_file_modified_time copy.inferred = self.inferred if self.arguments: # deep copy the content copy.arguments = dict() for key in self.arguments.keys(): copy.arguments[key] = list(self.arguments[key]) copy.specialized_schema = self.specialized_schema self._copy_def(res_opt, copy) return copy async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.location, self.in_document) modified_time = await self.ctx.corpus._get_last_modified_time_from_partition_path_async( full_path) # Update modified times. self.last_file_status_check_time = datetime.now(timezone.utc) self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) await self.report_most_recent_time_async( self.last_file_modified_time) def get_name(self) -> str: return self.name def is_derived_from(self, base: str, res_opt: Optional['ResolveOptions'] = None) -> bool: return False async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if isinstance(self.owner, CdmFileStatus) and child_time: await cast(CdmFileStatus, self.owner).report_most_recent_time_async(child_time) def validate(self) -> bool: return True def visit(self, path_from: str, pre_children: 'VisitCallback', post_children: 'VisitCallback') -> bool: path = self._fetch_declared_path(path_from) if pre_children and pre_children(self, path): return False if self._visit_def(path, pre_children, post_children): return True if post_children and post_children(self, path): return True return False def _fetch_declared_path(self, path_from: str) -> str: return '{}{}'.format(path_from, (self.get_name() or 'UNNAMED'))
class CdmDataPartitionDefinition(CdmObjectDefinition, CdmFileStatus): def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) # The name of a data partition. self.name = name # type: str # The corpus path for the data file location. self.location = None # type: Optional[str] # Indicates whether this partition is inferred. self.inferred = False # type: bool # The list of key value pairs to give names for the replacement values from the RegEx. self.arguments = {} # type: Dict[str, List[str]] # The path of a specialized schema to use specifically for the partitions generated. self.specialized_schema = None # type: Optional[str] # The refresh time of the partition. self.refresh_time = None # type: Optional[datetime] self.last_child_file_modified_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] self.last_file_status_check_time = None # type: Optional[datetime] # --- Internal --- self._ttpm = TraitToPropertyMap(self) @property def object_type(self) -> 'CdmObjectType': return CdmObjectType.DATA_PARTITION_DEF @property def description(self) -> str: return cast(str, self._ttpm.fetch_property_value('description')) @description.setter def description(self, val: str) -> None: self._ttpm.update_property_value('description', val) def copy( self, res_opt: Optional['ResolveOptions'] = None, host: Optional['CdmDataPartitionDefinition'] = None ) -> 'CdmDataPartitionDefinition': if not res_opt: res_opt = ResolveOptions(wrt_doc=self) if not host: copy = CdmDataPartitionDefinition(self.ctx, self.name) else: copy = host copy.ctx = self.ctx copy.name = self.name copy.description = self.description copy.location = self.location copy.last_file_status_check_time = self.last_file_status_check_time copy.last_file_modified_time = self.last_file_modified_time copy.inferred = self.inferred copy.arguments = self.arguments copy.specialized_schema = self.specialized_schema self._copy_def(res_opt, copy) return copy async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" namespace = self.in_document.namespace full_path = self.location if ':' in self.location else (namespace + ':' + self.location) modified_time = await ( cast('CdmCorpusDefinition', self.ctx.corpus) )._fetch_last_modified_time_from_partition_path_async(full_path) # Update modified times. self.last_file_modified_time = time_utils.max_time( modified_time, self.last_file_modified_time) await self.report_most_recent_time_async(self.last_file_modified_time) def get_name(self) -> str: return self.name def is_derived_from(self, base: str, res_opt: Optional['ResolveOptions'] = None) -> bool: return False async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if cast(CdmFileStatus, self.owner).report_most_recent_time_async and child_time: await cast(CdmFileStatus, self.owner).report_most_recent_time_async(child_time) def validate(self) -> bool: return bool(self.location) def visit(self, path_from: str, pre_children: 'VisitCallback', post_children: 'VisitCallback') -> bool: path = '' if self.ctx.corpus.block_declared_path_changes is False: path = self._declared_path if not path: path = '{}{}'.format(path_from, (self.get_name() or 'UNNAMED')) self._declared_path = path if pre_children and pre_children(self, path): return False if self._visit_def(path, pre_children, post_children): return True if post_children and post_children(self, path): return False return False
def _trait_to_property_map(self) -> 'TraitToPropertyMap': from cdm.utilities import TraitToPropertyMap if not self._ttpm: self._ttpm = TraitToPropertyMap(self) return self._ttpm
def from_data( ctx: CdmCorpusContext, data: TypeAttribute, entity_name: Optional[str] = None) -> CdmTypeAttributeDefinition: type_attribute = ctx.corpus.make_object( CdmObjectType.TYPE_ATTRIBUTE_DEF, data.get('name')) type_attribute.purpose = PurposeReferencePersistence.from_data( ctx, data.get('purpose')) type_attribute.data_type = DataTypeReferencePersistence.from_data( ctx, data.get('dataType')) if data.get('cardinality'): min_cardinality = None if data.get('cardinality').get('minimum'): min_cardinality = data.get('cardinality').get('minimum') max_cardinality = None if data.get('cardinality').get('maximum'): max_cardinality = data.get('cardinality').get('maximum') if not min_cardinality or not max_cardinality: logger.error( _TAG, ctx, 'Both minimum and maximum are required for the Cardinality property.' ) if not CardinalitySettings._is_minimum_valid(min_cardinality): logger.error( _TAG, ctx, 'Invalid minimum cardinality {}.'.format(min_cardinality)) if not CardinalitySettings._is_maximum_valid(max_cardinality): logger.error( _TAG, ctx, 'Invalid maximum cardinality {}.'.format(max_cardinality)) if min_cardinality and max_cardinality and CardinalitySettings._is_minimum_valid( min_cardinality) and CardinalitySettings._is_maximum_valid( max_cardinality): type_attribute.cardinality = CardinalitySettings( type_attribute) type_attribute.cardinality.minimum = min_cardinality type_attribute.cardinality.maximum = max_cardinality type_attribute.attribute_context = AttributeContextReferencePersistence.from_data( ctx, data.get('attributeContext')) type_attribute.resolution_guidance = AttributeResolutionGuidancePersistence.from_data( ctx, data.get('resolutionGuidance')) applied_traits = utils.create_trait_reference_array( ctx, data.get('appliedTraits')) type_attribute.applied_traits.extend(applied_traits) if data.get('isPrimaryKey') and entity_name: t2p_map = TraitToPropertyMap(type_attribute) t2p_map._update_property_value( 'isPrimaryKey', entity_name + '/(resolvedAttributes)/' + type_attribute.name) type_attribute.explanation = data.explanation type_attribute.is_read_only = utils._property_from_data_to_bool( data.isReadOnly) type_attribute.is_nullable = utils._property_from_data_to_bool( data.isNullable) type_attribute.source_name = utils._property_from_data_to_string( data.sourceName) type_attribute.source_ordering = utils._property_from_data_to_int( data.sourceOrdering) type_attribute.display_name = utils._property_from_data_to_string( data.displayName) type_attribute.description = utils._property_from_data_to_string( data.description) type_attribute.value_constrained_to_list = utils._property_from_data_to_bool( data.valueConstrainedToList) type_attribute.maximum_length = utils._property_from_data_to_int( data.maximumLength) type_attribute.maximum_value = utils._property_from_data_to_string( data.maximumValue) type_attribute.minimum_value = utils._property_from_data_to_string( data.minimumValue) type_attribute.default_value = data.defaultValue type_attribute.projection = ProjectionPersistence.from_data( ctx, data.projection) if data.get('dataFormat') is not None: try: type_attribute.data_format = TypeAttributePersistence._data_type_from_data( data.dataFormat) except ValueError: logger.warning( TypeAttributePersistence.__name__, ctx, 'Couldn\'t find an enum value for {}.'.format( data.dataFormat), TypeAttributePersistence.from_data.__name__) return type_attribute
async def to_data(instance: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['Model']: result = Model() # process_traits_and_annotations_to_data also processes extensions. utils.process_traits_and_annotations_to_data(instance.ctx, result, instance.exhibits_traits) result.name = instance.manifest_name result.description = instance.explanation result.modifiedTime = utils.get_formatted_date_string(instance.last_file_modified_time) result.lastChildFileModifiedTime = utils.get_formatted_date_string(instance.last_child_file_modified_time) result.lastFileStatusCheckTime = utils.get_formatted_date_string(instance.last_file_status_check_time) result.documentVersion = instance.document_version t2pm = TraitToPropertyMap(instance) result.isHidden = bool(t2pm._fetch_trait_reference('is.hidden')) or None application_trait = t2pm._fetch_trait_reference('is.managedBy') if application_trait: result.application = application_trait.arguments[0].value version_trait = t2pm._fetch_trait_reference('is.modelConversion.modelVersion') if version_trait: result.version = version_trait.arguments[0].value else: result.version = '1.0' culture_trait = t2pm._fetch_trait_reference('is.partition.culture') if culture_trait: result.culture = culture_trait.arguments[0].value reference_entity_locations = {} reference_models = OrderedDict() reference_models_trait = t2pm._fetch_trait_reference('is.modelConversion.referenceModelMap') if reference_models_trait: ref_models = reference_models_trait.arguments[0].value for element in ref_models: reference_models[element.id] = element.location reference_entity_locations[element.location] = element.id if instance.entities: result.entities = [] # Schedule processing of each entity to be added to the manifest for entity in instance.entities: element = None if entity.object_type == CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF: element = await LocalEntityDeclarationPersistence.to_data(entity, instance, res_opt, options) elif entity.object_type == CdmObjectType.REFERENCED_ENTITY_DECLARATION_DEF: element = await ReferencedEntityDeclarationPersistence.to_data(entity, res_opt, options) location = instance.ctx.corpus.storage.corpus_path_to_adapter_path(entity.entity_path) if not location: logger.error(_TAG, instance.ctx, 'Invalid entity path set in entity {}'.format(entity.entity_name)) element = None reference_entity = element # type: ReferenceEntity if reference_entity: location = location[:location.rfind('/')] if reference_entity.modelId: saved_location = reference_models.get(reference_entity.modelId) if saved_location is not None and saved_location != location: logger.error(_TAG, instance.ctx, 'Same ModelId pointing to different locations') element = None elif saved_location is None: reference_models[reference_entity.modelId] = location reference_entity_locations[location] = reference_entity.modelId elif not reference_entity.modelId and location in reference_entity_locations: reference_entity.modelId = reference_entity_locations[location] else: reference_entity.modelId = str(uuid.uuid4()) reference_models[reference_entity.modelId] = location reference_entity_locations[location] = reference_entity.modelId if element: result.entities.append(element) else: logger.error(_TAG, instance.ctx, 'There was an error while trying to convert {}\'s entity declaration to model json format.'.format(entity.entity_name)) if reference_models: result.referenceModels = [] for value, key in reference_models.items(): model = ReferenceModel() model.id = value model.location = key result.referenceModels.append(model) if instance.relationships is not None and len(instance.relationships) > 0: result.relationships = [] # type: List[SingleKeyRelationship] for cdm_relationship in instance.relationships: relationship = await RelationshipPersistence.to_data(cdm_relationship, res_opt, options, instance.ctx) if relationship is not None: result.relationships.append(relationship) else: logger.error(_TAG, instance.ctx, 'There was an error while trying to convert cdm relationship to model.json relationship.') return None if instance.imports: result.imports = [] for element in instance.imports: import_obj = ImportPersistence.to_data(element, res_opt, options) if import_obj: result.imports.append(import_obj) return result
async def to_data(instance: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> Optional['Model']: result = Model() # process_traits_and_annotations_to_data also processes extensions. await utils.process_traits_and_annotations_to_data( instance.ctx, result, instance.exhibits_traits) result.name = instance.manifest_name result.description = instance.explanation result.modifiedTime = utils.get_formatted_date_string( instance.last_file_modified_time) result.lastChildFileModifiedTime = utils.get_formatted_date_string( instance.last_child_file_modified_time) result.lastFileStatusCheckTime = utils.get_formatted_date_string( instance.last_file_status_check_time) result.documentVersion = instance.document_version t2pm = TraitToPropertyMap(instance) result.isHidden = bool( t2pm._fetch_trait_reference('is.hidden')) or None application_trait = t2pm._fetch_trait_reference('is.managedBy') if application_trait: result.application = application_trait.arguments[0].value version_trait = t2pm._fetch_trait_reference( 'is.modelConversion.modelVersion') if version_trait: result.version = version_trait.arguments[0].value else: result.version = '1.0' culture_trait = t2pm._fetch_trait_reference('is.partition.culture') if culture_trait: result.culture = culture_trait.arguments[0].value reference_entity_locations = {} reference_models = OrderedDict() reference_models_trait = t2pm._fetch_trait_reference( 'is.modelConversion.referenceModelMap') if reference_models_trait: ref_models = reference_models_trait.arguments[0].value for element in ref_models: reference_models[element.id] = element.location reference_entity_locations[element.location] = element.id if instance.entities: result.entities = [] # Schedule processing of each entity to be added to the manifest for entity in instance.entities: element = None if entity.object_type == CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF: element = await LocalEntityDeclarationPersistence.to_data( entity, instance, res_opt, options) elif entity.object_type == CdmObjectType.REFERENCED_ENTITY_DECLARATION_DEF: element = await ReferencedEntityDeclarationPersistence.to_data( entity, res_opt, options) location = instance.ctx.corpus.storage.corpus_path_to_adapter_path( entity.entity_path) if StringUtils.is_blank_by_cdm_standard(location): logger.error( instance.ctx, _TAG, 'to_data', instance.at_corpus_path, CdmLogCode. ERR_PERSIST_MODELJSON_INVALID_ENTITY_PATH, entity.entity_name) element = None reference_entity = element # type: ReferenceEntity if reference_entity: # path separator can differ depending on the adapter, cover the case where path uses '/' or '\' last_slash_location = location.rfind( '/') if location.rfind('/') > location.rfind( '\\') else location.rfind('\\') if last_slash_location > 0: location = location[:last_slash_location] if reference_entity.modelId: saved_location = reference_models.get( reference_entity.modelId) if saved_location is not None and saved_location != location: logger.error( instance.ctx, 'to_data', instance.at_corpus_path, _TAG, CdmLogCode. ERR_PERSIST_MODELJSON_MODEL_ID_DUPLICATION) element = None elif saved_location is None: reference_models[ reference_entity.modelId] = location reference_entity_locations[ location] = reference_entity.modelId elif not reference_entity.modelId and location in reference_entity_locations: reference_entity.modelId = reference_entity_locations[ location] else: reference_entity.modelId = str(uuid.uuid4()) reference_models[ reference_entity.modelId] = location reference_entity_locations[ location] = reference_entity.modelId if element: result.entities.append(element) else: logger.error( instance.ctx, _TAG, 'to_data', instance.at_corpus_path, CdmLogCode. ERR_PERSIST_MODELJSON_ENTITY_DECLARATION_CONVERSION_ERROR, entity.entity_name) if reference_models: result.referenceModels = [] for value, key in reference_models.items(): model = ReferenceModel() model.id = value model.location = key result.referenceModels.append(model) if instance.relationships is not None and len( instance.relationships) > 0: result.relationships = [] # type: List[SingleKeyRelationship] for cdm_relationship in instance.relationships: relationship = await RelationshipPersistence.to_data( cdm_relationship, res_opt, options, instance.ctx) if relationship is not None: result.relationships.append(relationship) result.imports = [] if instance.imports: for element in instance.imports: import_obj = ImportPersistence.to_data(element, res_opt, options) if import_obj: result.imports.append(import_obj) # Importing foundations.cdm.json to resolve trait properly on manifest if instance.imports is None or instance.imports.item( Constants._FOUNDATIONS_CORPUS_PATH, check_moniker=False) is None: foundations_import = Import() foundations_import.corpusPath = Constants._FOUNDATIONS_CORPUS_PATH result.imports.append(foundations_import) return result