async def to_data(instance: 'CdmLocalEntityDeclarationDefinition', manifest: 'CdmManifestDefinition', res_opt: 'ResolveOptions', options: 'CopyOptions') -> 'LocalEntity': # Fetch the document from entity schema. entity = await DocumentPersistence.to_data(instance.entity_path, manifest, res_opt, options, instance.ctx) if not entity: return None if not entity.description: entity.description = instance.explanation entity.lastFileStatusCheckTime = utils.get_formatted_date_string( instance.last_file_status_check_time) entity.lastFileModifiedTime = utils.get_formatted_date_string( instance.last_file_modified_time) entity.lastChildFileModifiedTime = utils.get_formatted_date_string( instance.last_child_file_modified_time) t2pm = TraitToPropertyMap(instance) # Find the trait containing the schema info. schemas = t2pm._fetch_property_value('cdmSchemas') if schemas: entity.schemas = schemas entity.isHidden = bool( t2pm._fetch_trait_reference('is.hidden')) or None if instance.data_partitions: entity.partitions = [] for partition in instance.data_partitions: partiton = await DataPartitionPersistence.to_data( partition, res_opt, options) if partition: entity.partitions.append(partiton) else: logger.error( ctx, _TAG, "to_data", instance.at_corpus_path, CdmLogCode. ERR_PERSIST_MODELJSON_ENTITY_PARTITION_CONVERSION_ERROR ) return None return entity
def test_update_and_fetch_list_lookup(self): """Test update and fetch list lookup default value without attributeValue and displayOrder.""" corpus = CdmCorpusDefinition() cdm_attribute = CdmTypeAttributeDefinition(corpus.ctx, 'SomeAttribute') trait_to_property_map = TraitToPropertyMap(cdm_attribute) constant_values = [{'languageTag': 'en', 'displayText': 'Fax'}] trait_to_property_map._update_property_value('defaultValue', constant_values) result = trait_to_property_map._fetch_property_value('defaultValue') self.assertEqual(1, len(result)) self.assertEqual('en', result[0].get('languageTag')) self.assertEqual('Fax', result[0].get('displayText')) self.assertIsNone(result[0].get('attributeValue')) self.assertIsNone(result[0].get('displayOrder'))
class CdmDataPartitionPatternDefinition(CdmObjectDefinition, CdmFileStatus): def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionPatternDefinition.__name__ # The partition pattern name. self.name = name # type: str # The starting location corpus path for searching for inferred data partitions. self.root_location = None # type: Optional[str] # The glob pattern to use for searching partitions. self.glob_pattern = None # type: Optional[str] # The regular expression to use for searching partitions. self.regular_expression = None # type: Optional[str] # the names for replacement values from regular expression. self.parameters = None # type: Optional[List[str]] # The corpus path for specialized schema to use for matched pattern partitions. self.specialized_schema = None # type: Optional[str] self.last_file_status_check_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self) @property def object_type(self) -> 'CdmObjectType': return CdmObjectType.DATA_PARTITION_PATTERN_DEF @property def last_child_file_modified_time(self) -> datetime: raise NotImplementedError() @last_child_file_modified_time.setter def last_child_file_modified_time(self, val: datetime): raise NotImplementedError() @property def is_incremental(self) -> bool: """ Gets whether the data partition pattern is incremental. """ return cast(bool, self._ttpm._fetch_property_value('isIncremental')) def copy( self, res_opt: Optional['ResolveOptions'] = None, host: Optional['CdmDataPartitionPatternDefinition'] = None ) -> 'CdmDataPartitionPatternDefinition': if not res_opt: res_opt = ResolveOptions(wrt_doc=self) if not host: copy = CdmDataPartitionPatternDefinition(self.ctx, self.name) else: copy = host copy.name = self.name copy.root_location = self.root_location copy.glob_pattern = self.glob_pattern copy.regular_expression = self.regular_expression copy.parameters = list(self.parameters) if self.parameters else None copy.last_file_status_check_time = self.last_file_status_check_time copy.last_file_modified_time = self.last_file_modified_time if self.specialized_schema: copy.specialized_schema = self.specialized_schema self._copy_def(res_opt, copy) return copy async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): namespace = None adapter = None # make sure the root is a good full corpus path. root_cleaned = (self.root_location[:-1] if self.root_location and self.root_location.endswith('/') else self.root_location) or '' root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path( root_cleaned, self.in_document) try: # Remove namespace from path path_tuple = StorageUtils.split_namespace_path(root_corpus) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH) return namespace = path_tuple[0] adapter = self.ctx.corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_DOC_ADAPTER_NOT_FOUND, self.in_document.name) # get a list of all corpus_paths under the root. file_info_list = await adapter.fetch_all_files_async( path_tuple[1]) except Exception as e: file_info_list = None logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_FILE_FETCH_FAILED, root_corpus, e) if file_info_list is not None and namespace is not None: # remove root of the search from the beginning of all paths so anything in the root is not found by regex. file_info_list = [(namespace + ':' + fi)[len(root_corpus):] for fi in file_info_list] if isinstance(self.owner, CdmLocalEntityDeclarationDefinition): local_ent_dec_def_owner = cast( 'CdmLocalEntityDeclarationDefinition', self.owner) # if both are present log warning and use glob pattern, otherwise use regularExpression if self.glob_pattern and not self.glob_pattern.isspace( ) and self.regular_expression and not self.regular_expression.isspace( ): logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_GLOB_AND_REGEX_PRESENT, self.glob_pattern, self.regular_expression) regular_expression = self.glob_pattern_to_regex( self.glob_pattern ) if self.glob_pattern and not self.glob_pattern.isspace( ) else self.regular_expression try: reg = regex.compile(regular_expression) except Exception as e: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_VALDN_INVALID_EXPRESSION, 'glob pattern' if self.glob_pattern and not self.glob_pattern.isspace() else 'regular expression', self.glob_pattern if self.glob_pattern and not self.glob_pattern.isspace() else self.regular_expression, e) if reg: # a set to check if the data partition exists data_partition_path_set = set() if local_ent_dec_def_owner.data_partitions is not None: for data_partition in local_ent_dec_def_owner.data_partitions: data_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( data_partition.location, self.in_document) data_partition_path_set.add( data_partition_location_full_path) incremental_partition_path_hash_set = set() if local_ent_dec_def_owner.data_partitions is not None: for incremental_partition in local_ent_dec_def_owner.incremental_partitions: incremental_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( incremental_partition.location, self.in_document) incremental_partition_path_hash_set.add( incremental_partition_location_full_path) for fi in file_info_list: m = reg.fullmatch(fi) if m: # create a map of arguments out of capture groups. args = defaultdict( list) # type: Dict[str, List[str]] i_param = 0 for i in range(1, reg.groups + 1): captures = m.captures(i) if captures and self.parameters and i_param < len( self.parameters): # to be consistent with other languages, if a capture group captures # multiple things, only use the last thing that was captured single_capture = captures[-1] current_param = self.parameters[ i_param] args[current_param].append( single_capture) i_param += 1 else: break # put the original but cleaned up root back onto the matched doc as the location stored in the partition. location_corpus_path = root_cleaned + fi full_path = root_corpus + fi # Remove namespace from path path_tuple = StorageUtils.split_namespace_path( full_path) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode. ERR_STORAGE_NULL_CORPUS_PATH) return last_modified_time = await adapter.compute_last_modified_time_async( path_tuple[1]) if self.is_incremental and full_path not in incremental_partition_path_hash_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time, True, self.name) incremental_partition_path_hash_set.add( full_path) if not self.is_incremental and full_path not in data_partition_path_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time) data_partition_path_set.add(full_path) # update modified times. self.last_file_status_check_time = datetime.now(timezone.utc) def glob_pattern_to_regex(self, pattern: str) -> str: new_pattern = [] # all patterns should start with a slash new_pattern.append("[/\\\\]") # if pattern starts with slash, skip the first character. We already added it above i = 1 if pattern[0] == '/' or pattern[0] == '\\' else 0 while i < len(pattern): curr_char = pattern[i] if curr_char == '.': # escape '.' characters new_pattern.append('\\.') elif curr_char == '\\': # convert backslash into slash new_pattern.append('[/\\\\]') elif curr_char == '?': # question mark in glob matches any single character new_pattern.append('.') elif curr_char == '*': next_char = pattern[i + 1] if i + 1 < len(pattern) else None if next_char == '*': prev_char = pattern[i - 1] if i - 1 >= 0 else None post_char = pattern[i + 2] if i + 2 < len(pattern) else None # globstar must be at beginning of pattern, end of pattern, or wrapped in separator characters if (prev_char is None or prev_char == '/' or prev_char == '\\') and (post_char is None or post_char == '/' or post_char == '\\'): new_pattern.append('.*') # globstar can match zero or more subdirectories. If it matches zero, then there should not be # two consecutive '/' characters so make the second one optional if (prev_char == '/' or prev_char == '\\') and ( post_char == '/' or post_char == '\\'): new_pattern.append('/?') i = i + 1 else: # otherwise, treat the same as '*' new_pattern.append('[^/\\\\]*') i = i + 1 else: # * new_pattern.append('[^/\\\\]*') else: new_pattern.append(curr_char) i = i + 1 return ''.join(new_pattern) def get_name(self) -> str: return self.name def is_derived_from(self, base: str, res_opt: Optional['ResolveOptions'] = None) -> bool: return False async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if isinstance(self.owner, CdmFileStatus) and child_time: await cast(CdmFileStatus, self.owner).report_most_recent_time_async(child_time) def validate(self) -> bool: if not bool(self.root_location): missing_fields = ['root_location'] logger.error( self.ctx, self._TAG, 'validate', self.at_corpus_path, CdmLogCode.ERR_VALDN_INTEGRITY_CHECK_FAILURE, self.at_corpus_path, ', '.join(map(lambda s: '\'' + s + '\'', missing_fields))) return False return True def visit(self, path_from: str, pre_children: 'VisitCallback', post_children: 'VisitCallback') -> bool: path = self._fetch_declared_path(path_from) if pre_children and pre_children(self, path): return False if self._visit_def(path, pre_children, post_children): return True if post_children and post_children(self, path): return True return False def _fetch_declared_path(self, path_from: str) -> str: return '{}{}'.format(path_from, (self.get_name() or 'UNNAMED'))
class CdmDataPartitionDefinition(CdmObjectDefinition, CdmFileStatus): def __init__(self, ctx: 'CdmCorpusContext', name: str) -> None: super().__init__(ctx) self._TAG = CdmDataPartitionDefinition.__name__ # The name of a data partition. self.name = name # type: str # The corpus path for the data file location. self.location = None # type: Optional[str] # Indicates whether this partition is inferred. self.inferred = False # type: bool # The list of key value pairs to give names for the replacement values from the RegEx. self.arguments = {} # type: Dict[str, List[str]] # The path of a specialized schema to use specifically for the partitions generated. self.specialized_schema = None # type: Optional[str] # The refresh time of the partition. self.refresh_time = None # type: Optional[datetime] self.last_file_modified_time = None # type: Optional[datetime] self.last_file_status_check_time = None # type: Optional[datetime] # --- internal --- self._ttpm = TraitToPropertyMap(self) @property def object_type(self) -> 'CdmObjectType': return CdmObjectType.DATA_PARTITION_DEF @property def description(self) -> str: return cast(str, self._ttpm._fetch_property_value('description')) @description.setter def description(self, val: str) -> None: self._ttpm._update_property_value('description', val) @property def last_child_file_modified_time(self) -> datetime: raise NotImplementedError() @last_child_file_modified_time.setter def last_child_file_modified_time(self, val: datetime): raise NotImplementedError() def copy( self, res_opt: Optional['ResolveOptions'] = None, host: Optional['CdmDataPartitionDefinition'] = None ) -> 'CdmDataPartitionDefinition': if not res_opt: res_opt = ResolveOptions( wrt_doc=self, directives=self.ctx.corpus.default_resolution_directives) if not host: copy = CdmDataPartitionDefinition(self.ctx, self.name) else: copy = host copy.name = self.name copy.description = self.description copy.location = self.location copy.last_file_status_check_time = self.last_file_status_check_time copy.last_file_modified_time = self.last_file_modified_time copy.inferred = self.inferred if self.arguments: # deep copy the content copy.arguments = dict() for key in self.arguments.keys(): copy.arguments[key] = list(self.arguments[key]) copy.specialized_schema = self.specialized_schema self._copy_def(res_opt, copy) return copy async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): full_path = self.ctx.corpus.storage.create_absolute_corpus_path( self.location, self.in_document) modified_time = await self.ctx.corpus._get_last_modified_time_from_partition_path_async( full_path) # Update modified times. self.last_file_status_check_time = datetime.now(timezone.utc) self.last_file_modified_time = time_utils._max_time( modified_time, self.last_file_modified_time) await self.report_most_recent_time_async( self.last_file_modified_time) def get_name(self) -> str: return self.name def is_derived_from(self, base: str, res_opt: Optional['ResolveOptions'] = None) -> bool: return False async def report_most_recent_time_async(self, child_time: datetime) -> None: """Report most recent modified time (of current or children objects) to the parent object.""" if isinstance(self.owner, CdmFileStatus) and child_time: await cast(CdmFileStatus, self.owner).report_most_recent_time_async(child_time) def validate(self) -> bool: return True def visit(self, path_from: str, pre_children: 'VisitCallback', post_children: 'VisitCallback') -> bool: path = self._fetch_declared_path(path_from) if pre_children and pre_children(self, path): return False if self._visit_def(path, pre_children, post_children): return True if post_children and post_children(self, path): return True return False def _fetch_declared_path(self, path_from: str) -> str: return '{}{}'.format(path_from, (self.get_name() or 'UNNAMED'))