def from_data(ctx: CdmCorpusContext, table: TableEntity, manifest: CdmManifestDefinition, syms_root_path: str) -> CdmLocalEntityDeclarationDefinition: table_name = str(table.name) local_dec = ctx.corpus.make_object(CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF, table_name) local_dec.entity_path = ctx.corpus.storage.create_relative_corpus_path(table_name + '.cdm.json/' + table_name, manifest) table_properties = TableProperties(None, None, None).deserialize(table.properties) properties = table_properties.properties if properties is not None: if 'cdm:isHidden' in properties: is_hidden_trait = ctx.corpus.make_ref(CdmObjectType.TRAIT_REF, 'is.hidden', True) is_hidden_trait.is_from_property = True local_dec.exhibits_traits.append(is_hidden_trait) if 'cdm:lastChildFileModifiedTime' in properties: local_dec.last_child_file_modified_time = dateutil.parser.parse(properties['cdm:lastChildFileModifiedTime']) if 'cdm:lastFileModifiedTime' in properties: local_dec.last_file_modified_time = dateutil.parser.parse(properties['cdm:lastFileModifiedTime']) if 'cdm:lastFileStatusCheckTime' in properties: local_dec.last_file_status_check_time = dateutil.parser.parse(properties['cdm:lastFileStatusCheckTime']) if 'cdm:explanation' in properties: local_dec.explanation = properties['cdm:explanation'] if 'cdm:entityDecTraits' in properties: utils.add_list_to_cdm_collection(local_dec.exhibits_traits, utils.create_trait_reference_array(ctx, properties['cdm:entityDecTraits'])) if table_properties.partitioning is not None and table_properties.partitioning.keys is not None: # TODO:This is spark data partitioning logger.error(ctx, _TAG, LocalEntityDeclarationPersistence.from_data.__name__, local_dec.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_PARTITION_NOT_SUPPORTED, table_name) return None else: if table_properties.storage_descriptor is not None and table_properties.storage_descriptor.format is not None: if table_properties.storage_descriptor.source.location == '': logger.error(ctx, _TAG, LocalEntityDeclarationPersistence.from_data.__name__, local_dec.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_TABLE_MISSING_DATA_LOCATION, table_name) return None if table_properties.storage_descriptor.format.format_type == FormatType.csv: if table_properties.storage_descriptor.source.location.lower().endswith('.csv'): # location points to file.create data partition. data_partition = DataPartitionPersistence.from_data(ctx, table_properties.storage_descriptor, syms_root_path) local_dec.data_partitions.append(data_partition) elif os.path.splitext(table_properties.storage_descriptor.source.location)[1] == '': data_partition_pattern = DataPartitionPatternPersistence.from_data(ctx, table_properties.storage_descriptor, table.name + 'PartitionPattern', syms_root_path) local_dec.data_partition_patterns.append(data_partition_pattern) else: # restore data partition pattern if exist if properties is not None and 'cdm:data_partition_patterns' in properties: data_partition_pattern = DataPartitionPatternPersistence.from_data(ctx, properties['cdm:data_partition_patterns'], table.name + 'PartitionPattern', syms_root_path) local_dec.data_partition_patterns.append(data_partition_pattern) else: logger.error(ctx, _TAG, LocalEntityDeclarationPersistence.from_data.__name__, local_dec.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_TABLE_INVALID_DATA_LOCATION, table_name) return None elif table_properties.storage_descriptor.format.format_type == FormatType.Parquet: # TODO: Parquet or other. logger.error(ctx, _TAG, LocalEntityDeclarationPersistence.from_data.__name__, local_dec.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_TABLE_FORMAT_TYPE_NOT_SUPPORTED, table_name) return None else: logger.error(ctx, _TAG, LocalEntityDeclarationPersistence.from_data.__name__, local_dec.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_TABLE_FORMAT_TYPE_NOT_SUPPORTED, table_name) return None return local_dec
async def to_data_async(instance: CdmLocalEntityDeclarationDefinition, manifest: CdmManifestDefinition, syms_root_path: str, res_opt: 'ResolveOptions', options: 'CopyOptions') -> TableEntity: table_entity = await DocumentPersistence.to_data_async( instance.entity_path, manifest, instance.ctx, res_opt, options) if table_entity is not None: te_properties = table_entity.properties properties = LocalEntityDeclarationPersistence.create_table_propertybags( instance, res_opt, options, te_properties.properties) if instance.data_partitions is not None and len( instance.data_partitions) > 0: paths = [] for element in instance.data_partitions: if element.location is not None: adls_path = instance.ctx.corpus.storage.corpus_path_to_adapter_path( element.location) location = element.location if adls_path == None: logger.error( instance.ctx, _TAG, 'to_data_async', instance.at_corpus_path, CdmLogCode. ERR_PERSIST_SYMS_ADLS_ADAPTER_MISSING, element.location) return None syms_path = utils.adls_adapter_path_to_syms_path( adls_path) if syms_path is not None: location = syms_path else: path_tuple = StorageUtils.split_namespace_path( element.location) location = utils.create_syms_absolute_path( syms_root_path, path_tuple[1]) paths.append(location) te_properties.storage_descriptor = DataPartitionPersistence.to_data( element, te_properties.storage_descriptor, res_opt, options) # Logic to find common root folder. source = DataSource(''.join(c[0] for c in takewhile( lambda x: all(x[0] == y for y in x), zip(*paths)))) te_properties.storage_descriptor.source = source else: # location and format is mandatory for syms. source = DataSource( utils.create_syms_absolute_path(syms_root_path, instance.entity_name)) te_properties.storage_descriptor.source = source te_properties.properties = properties return table_entity