def __create_tag(cls): tag = datacatalog.Tag() tag.name = 'tag_template' tag.template = 'template' bool_field = datacatalog.TagField() bool_field.bool_value = True tag.fields['bool-field'] = bool_field double_field = datacatalog.TagField() double_field.double_value = 1 tag.fields['double-field'] = double_field string_field = datacatalog.TagField() string_field.string_value = 'Test String Value' tag.fields['string-field'] = string_field timestamp = timestamp_pb2.Timestamp() timestamp.FromJsonString('2019-09-06T11:00:00-03:00') timestamp_field = datacatalog.TagField() timestamp_field.timestamp_value = timestamp tag.fields['timestamp-field'] = timestamp_field enum_field = datacatalog.TagField() enum_field.enum_value.display_name = 'Test ENUM Value' tag.fields['enum-field'] = enum_field return tag
def make_fake_tag(): tag = datacatalog.Tag() tag.template = 'test_template' bool_field = datacatalog.TagField() bool_field.bool_value = True tag.fields['test_bool_field'] = bool_field double_field = datacatalog.TagField() double_field.double_value = 1 tag.fields['test_double_field'] = double_field string_field = datacatalog.TagField() string_field.string_value = 'Test String Value' tag.fields['test_string_field'] = string_field timestamp = timestamp_pb2.Timestamp() timestamp.FromJsonString('2019-10-15T01:00:00-03:00') timestamp_field = datacatalog.TagField() timestamp_field.timestamp_value = timestamp tag.fields['test_timestamp_field'] = timestamp_field enum_field = datacatalog.TagField() enum_field.enum_value.display_name = 'Test ENUM Value' tag.fields['test_enum_field'] = enum_field return tag
def _map_related_entry(cls, assembled_entry_data, related_asset_type, source_field_id, target_field_id, id_name_pairs): relationship_tags_dict = {} tags = assembled_entry_data.tags or [] related_asset_ids = [] for tag in tags: if source_field_id not in tag.fields: continue source_field = tag.fields[source_field_id] related_asset_id = source_field.string_value \ if source_field.string_value \ else int(source_field.double_value) related_asset_ids.append(related_asset_id) if not relationship_tags_dict.get(related_asset_id): relationship_tags_dict[related_asset_id] = [] relationship_tags_dict[related_asset_id].append(tag) for related_asset_id in related_asset_ids: related_asset_key = '{}-{}'.format(related_asset_type, related_asset_id) if related_asset_key not in id_name_pairs: continue for relationship_tag in relationship_tags_dict[related_asset_id]: string_field = datacatalog.TagField() string_field.string_value = cls.__format_related_entry_url( id_name_pairs[related_asset_key]) relationship_tag.fields[target_field_id] = string_field
def __make_fake_tag(cls, string_fields=None, double_fields=None): tag = datacatalog.Tag() if string_fields: for string_field in string_fields: tag_field = datacatalog.TagField() tag_field.string_value = string_field[1] tag.fields[string_field[0]] = tag_field if double_fields: for double_field in double_fields: tag_field = datacatalog.TagField() tag_field.double_value = double_field[1] tag.fields[double_field[0]] = tag_field return tag
def __set_tag_field(cls, attribute_defs, enum_types_dict, field_name, tag, tag_value): formatted_name = attr_normalizer.DataCatalogAttributeNormalizer.\ format_name(field_name) if formatted_name not in cls.__IGNORED_ATTRIBUTES_LIST: attribute_def = cls.__find_attribute_def(attribute_defs, field_name) type_name = attribute_def.get('typeName') enum_type = enum_types_dict.get(type_name) if type_name in constant.DATACATALOG_TARGET_PRIMITIVE_TYPES: if type_name in constant.DATACATALOG_TARGET_DOUBLE_TYPE: super()._set_double_field(tag, formatted_name, tag_value) elif type_name in constant.DATACATALOG_TARGET_BOOLEAN_TYPE: super()._set_bool_field(tag, formatted_name, bool(tag_value)) else: super()._set_string_field(tag, formatted_name, str(tag_value)) elif enum_type: enum_field = datacatalog.TagField() enum_field.enum_value.display_name = tag_value tag.fields[formatted_name] = enum_field else: super()._set_string_field(tag, formatted_name, str(tag_value))
def __make_fake_tag(cls, string_fields=None, double_fields=None): tag = datacatalog.Tag() if string_fields: for field in string_fields: string_field = datacatalog.TagField() string_field.string_value = field[1] tag.fields[field[0]] = string_field if double_fields: for field in double_fields: double_field = datacatalog.TagField() double_field.double_value = field[1] tag.fields[field[0]] = double_field return tag
def _set_timestamp_field(cls, tag, field_id, value): if value: timestamp = timestamp_pb2.Timestamp() timestamp.FromDatetime(value) timestamp_field = datacatalog.TagField() timestamp_field.timestamp_value = timestamp tag.fields[field_id] = timestamp_field
def __set_tag_fields(cls, tag: Tag, tag_template: TagTemplate, fields: Dict[str, object]): valid_fields = cls.__get_valid_tag_fields(tag_template, fields) for field_id, field_value in valid_fields.items(): field = datacatalog.TagField() field_type = tag_template.fields[field_id].type_ cls.__set_field_value(field, field_type, field_value) tag.fields[field_id] = field
def create_tag(self, entry, tag_template, fields_descriptors): """Create a Tag.""" tag = datacatalog.Tag() tag.template = tag_template.name for descriptor in fields_descriptors: field = datacatalog.TagField() self.__set_tag_field_value(field, descriptor['value'], descriptor['primitive_type']) tag.fields[descriptor['id']] = field return self.__datacatalog.create_tag(parent=entry.name, tag=tag)
def datacatalog_tag(datacatalog_table_entry, datacatalog_tag_template): tag = datacatalog.Tag() tag.template = datacatalog_tag_template.name bool_field = datacatalog.TagField() bool_field.bool_value = True tag.fields['boolean_field'] = bool_field double_field = datacatalog.TagField() double_field.double_value = 10.5 tag.fields['double_field'] = double_field string_field = datacatalog.TagField() string_field.string_value = 'test' tag.fields['string_field'] = string_field timestamp = timestamp_pb2.Timestamp() timestamp.FromJsonString('2019-10-15T01:00:00-03:00') timestamp_field = datacatalog.TagField() timestamp_field.timestamp_value = timestamp tag.fields['timestamp_field'] = timestamp_field enum_field = datacatalog.TagField() enum_field.enum_value.display_name = 'VALUE 1' tag.fields['enum_field'] = enum_field tag = datacatalog_client.create_tag(parent=datacatalog_table_entry.name, tag=tag) time.sleep( 2) # Wait a few seconds for Data Catalog's search index sync/update. yield tag datacatalog_client.delete_tag(name=tag.name) time.sleep( 2) # Wait a few seconds for Data Catalog's search index sync/update.
def make_fake_tag(template: Optional[str] = 'template', column: Optional[str] = None, string_fields: List[Tuple[str, str]] = None) -> Tag: tag = datacatalog.Tag() tag.template = template if column: tag.column = column if string_fields: for string_field in string_fields: tag_field = datacatalog.TagField() tag_field.string_value = string_field[1] tag.fields[string_field[0]] = tag_field return tag
def _set_string_field(cls, tag, field_id, value): """ String field values are limited by Data Catalog API at 2000 chars length when encoded in UTF-8. UTF-8 chars may need from 1 to 4 bytes (https://en.wikipedia.org/wiki/UTF-8 for details): - the first 128 characters (US-ASCII) need one byte; - the next 1,920 characters need two bytes to encode, which covers the remainder of almost all Latin-script alphabets, and also Greek, Cyrillic, Coptic, Armenian, Hebrew, Arabic, Syriac, Thaana and N'Ko alphabets, as well as Combining Diacritical Marks; - three bytes are needed for characters in the rest of the Basic Multilingual Plane, which contains virtually all characters in common use, including most Chinese, Japanese and Korean characters; - four bytes are needed for characters in the other planes of Unicode, which include less common CJK characters, various historic scripts, mathematical symbols, and emoji (pictographic symbols). Given a value and a string Tag Field, this method assigns the field the value. Before assigning it checks the value's UTF-8 byte-size and truncates if needed. When it happens, 3 periods are appended to the result string so users will know it's different from the original value. """ if not (value and isinstance(value, six.string_types)): return encoding = cls.__UTF8_CHARACTER_ENCODING max_length = cls.__STRING_VALUE_UTF8_MAX_LENGTH suffix_length = cls.__SUFFIX_CHARS_LENGTH encoded = value.encode(encoding) # the max length supported is stored at max_length # we leave some chars as the suffix_length to be used when # creating the new string, so this line truncates the existing string. truncated_string_field = encoded[:max_length - suffix_length] decoded = u'{}...'.format( truncated_string_field.decode( encoding, 'ignore')) if len(encoded) > max_length else encoded.decode( encoding, 'ignore') string_field = datacatalog.TagField() string_field.string_value = decoded tag.fields[field_id] = string_field
def _set_string_field(cls, tag, field_id, value): """ String field values are limited by Data Catalog API at 2000 chars length when encoded in UTF-8. Given a string Tag Field and its value, this method assigns the value to the field, truncating if needed. """ if not (value and isinstance(value, six.string_types)): return truncated_string = prepare.DataCatalogStringsHelper.truncate_string( value, cls.__STRING_VALUE_UTF8_MAX_LENGTH) string_field = datacatalog.TagField() string_field.string_value = truncated_string tag.fields[field_id] = string_field
def __set_enum_field(cls, tag, field_id, value): if value is not None: enum_field = datacatalog.TagField() enum_field.enum_value.display_name = value tag.fields[field_id] = enum_field
def _set_double_field(cls, tag, field_id, value): if value is not None: double_field = datacatalog.TagField() double_field.double_value = value tag.fields[field_id] = double_field
def _set_bool_field(cls, tag, field_id, value): if value is not None: bool_field = datacatalog.TagField() bool_field.bool_value = value tag.fields[field_id] = bool_field