def __create_tag(cls):
        tag = datacatalog.Tag()
        tag.name = 'tag_template'
        tag.template = 'template'

        bool_field = datacatalog.TagField()
        bool_field.bool_value = True
        tag.fields['bool-field'] = bool_field

        double_field = datacatalog.TagField()
        double_field.double_value = 1
        tag.fields['double-field'] = double_field

        string_field = datacatalog.TagField()
        string_field.string_value = 'Test String Value'
        tag.fields['string-field'] = string_field

        timestamp = timestamp_pb2.Timestamp()
        timestamp.FromJsonString('2019-09-06T11:00:00-03:00')
        timestamp_field = datacatalog.TagField()
        timestamp_field.timestamp_value = timestamp
        tag.fields['timestamp-field'] = timestamp_field

        enum_field = datacatalog.TagField()
        enum_field.enum_value.display_name = 'Test ENUM Value'
        tag.fields['enum-field'] = enum_field

        return tag
def make_fake_tag():
    tag = datacatalog.Tag()
    tag.template = 'test_template'

    bool_field = datacatalog.TagField()
    bool_field.bool_value = True
    tag.fields['test_bool_field'] = bool_field

    double_field = datacatalog.TagField()
    double_field.double_value = 1
    tag.fields['test_double_field'] = double_field

    string_field = datacatalog.TagField()
    string_field.string_value = 'Test String Value'
    tag.fields['test_string_field'] = string_field

    timestamp = timestamp_pb2.Timestamp()
    timestamp.FromJsonString('2019-10-15T01:00:00-03:00')
    timestamp_field = datacatalog.TagField()
    timestamp_field.timestamp_value = timestamp
    tag.fields['test_timestamp_field'] = timestamp_field

    enum_field = datacatalog.TagField()
    enum_field.enum_value.display_name = 'Test ENUM Value'
    tag.fields['test_enum_field'] = enum_field

    return tag
    def _map_related_entry(cls, assembled_entry_data, related_asset_type,
                           source_field_id, target_field_id, id_name_pairs):

        relationship_tags_dict = {}
        tags = assembled_entry_data.tags or []
        related_asset_ids = []

        for tag in tags:
            if source_field_id not in tag.fields:
                continue
            source_field = tag.fields[source_field_id]
            related_asset_id = source_field.string_value \
                if source_field.string_value \
                else int(source_field.double_value)
            related_asset_ids.append(related_asset_id)
            if not relationship_tags_dict.get(related_asset_id):
                relationship_tags_dict[related_asset_id] = []
            relationship_tags_dict[related_asset_id].append(tag)

        for related_asset_id in related_asset_ids:
            related_asset_key = '{}-{}'.format(related_asset_type,
                                               related_asset_id)
            if related_asset_key not in id_name_pairs:
                continue

            for relationship_tag in relationship_tags_dict[related_asset_id]:
                string_field = datacatalog.TagField()
                string_field.string_value = cls.__format_related_entry_url(
                    id_name_pairs[related_asset_key])
                relationship_tag.fields[target_field_id] = string_field
Esempio n. 4
0
    def __make_fake_tag(cls, string_fields=None, double_fields=None):
        tag = datacatalog.Tag()

        if string_fields:
            for string_field in string_fields:
                tag_field = datacatalog.TagField()
                tag_field.string_value = string_field[1]
                tag.fields[string_field[0]] = tag_field

        if double_fields:
            for double_field in double_fields:
                tag_field = datacatalog.TagField()
                tag_field.double_value = double_field[1]
                tag.fields[double_field[0]] = tag_field

        return tag
    def __set_tag_field(cls, attribute_defs, enum_types_dict, field_name, tag,
                        tag_value):
        formatted_name = attr_normalizer.DataCatalogAttributeNormalizer.\
            format_name(field_name)

        if formatted_name not in cls.__IGNORED_ATTRIBUTES_LIST:
            attribute_def = cls.__find_attribute_def(attribute_defs,
                                                     field_name)

            type_name = attribute_def.get('typeName')

            enum_type = enum_types_dict.get(type_name)

            if type_name in constant.DATACATALOG_TARGET_PRIMITIVE_TYPES:
                if type_name in constant.DATACATALOG_TARGET_DOUBLE_TYPE:
                    super()._set_double_field(tag, formatted_name, tag_value)
                elif type_name in constant.DATACATALOG_TARGET_BOOLEAN_TYPE:
                    super()._set_bool_field(tag, formatted_name,
                                            bool(tag_value))
                else:
                    super()._set_string_field(tag, formatted_name,
                                              str(tag_value))
            elif enum_type:
                enum_field = datacatalog.TagField()
                enum_field.enum_value.display_name = tag_value
                tag.fields[formatted_name] = enum_field
            else:
                super()._set_string_field(tag, formatted_name, str(tag_value))
Esempio n. 6
0
    def __make_fake_tag(cls, string_fields=None, double_fields=None):
        tag = datacatalog.Tag()

        if string_fields:
            for field in string_fields:
                string_field = datacatalog.TagField()
                string_field.string_value = field[1]
                tag.fields[field[0]] = string_field

        if double_fields:
            for field in double_fields:
                double_field = datacatalog.TagField()
                double_field.double_value = field[1]
                tag.fields[field[0]] = double_field

        return tag
    def _set_timestamp_field(cls, tag, field_id, value):
        if value:
            timestamp = timestamp_pb2.Timestamp()
            timestamp.FromDatetime(value)

            timestamp_field = datacatalog.TagField()
            timestamp_field.timestamp_value = timestamp
            tag.fields[field_id] = timestamp_field
 def __set_tag_fields(cls, tag: Tag, tag_template: TagTemplate,
                      fields: Dict[str, object]):
     valid_fields = cls.__get_valid_tag_fields(tag_template, fields)
     for field_id, field_value in valid_fields.items():
         field = datacatalog.TagField()
         field_type = tag_template.fields[field_id].type_
         cls.__set_field_value(field, field_type, field_value)
         tag.fields[field_id] = field
    def create_tag(self, entry, tag_template, fields_descriptors):
        """Create a Tag."""

        tag = datacatalog.Tag()
        tag.template = tag_template.name

        for descriptor in fields_descriptors:
            field = datacatalog.TagField()
            self.__set_tag_field_value(field, descriptor['value'],
                                       descriptor['primitive_type'])
            tag.fields[descriptor['id']] = field

        return self.__datacatalog.create_tag(parent=entry.name, tag=tag)
def datacatalog_tag(datacatalog_table_entry, datacatalog_tag_template):
    tag = datacatalog.Tag()
    tag.template = datacatalog_tag_template.name

    bool_field = datacatalog.TagField()
    bool_field.bool_value = True
    tag.fields['boolean_field'] = bool_field

    double_field = datacatalog.TagField()
    double_field.double_value = 10.5
    tag.fields['double_field'] = double_field

    string_field = datacatalog.TagField()
    string_field.string_value = 'test'
    tag.fields['string_field'] = string_field

    timestamp = timestamp_pb2.Timestamp()
    timestamp.FromJsonString('2019-10-15T01:00:00-03:00')
    timestamp_field = datacatalog.TagField()
    timestamp_field.timestamp_value = timestamp
    tag.fields['timestamp_field'] = timestamp_field

    enum_field = datacatalog.TagField()
    enum_field.enum_value.display_name = 'VALUE 1'
    tag.fields['enum_field'] = enum_field

    tag = datacatalog_client.create_tag(parent=datacatalog_table_entry.name,
                                        tag=tag)

    time.sleep(
        2)  # Wait a few seconds for Data Catalog's search index sync/update.
    yield tag

    datacatalog_client.delete_tag(name=tag.name)
    time.sleep(
        2)  # Wait a few seconds for Data Catalog's search index sync/update.
def make_fake_tag(template: Optional[str] = 'template',
                  column: Optional[str] = None,
                  string_fields: List[Tuple[str, str]] = None) -> Tag:

    tag = datacatalog.Tag()
    tag.template = template

    if column:
        tag.column = column

    if string_fields:
        for string_field in string_fields:
            tag_field = datacatalog.TagField()
            tag_field.string_value = string_field[1]
            tag.fields[string_field[0]] = tag_field

    return tag
Esempio n. 12
0
    def _set_string_field(cls, tag, field_id, value):
        """
        String field values are limited by Data Catalog API at 2000 chars
        length when encoded in UTF-8. UTF-8 chars may need from 1 to 4 bytes
        (https://en.wikipedia.org/wiki/UTF-8 for details):
        - the first 128 characters (US-ASCII) need one byte;
        - the next 1,920 characters need two bytes to encode, which covers the
          remainder of almost all Latin-script alphabets, and also Greek,
          Cyrillic, Coptic, Armenian, Hebrew, Arabic, Syriac, Thaana and N'Ko
          alphabets, as well as Combining Diacritical Marks;
        - three bytes are needed for characters in the rest of the Basic
          Multilingual Plane, which contains virtually all characters in common
          use, including most Chinese, Japanese and Korean characters;
        - four bytes are needed for characters in the other planes of Unicode,
          which include less common CJK characters, various historic scripts,
          mathematical symbols, and emoji (pictographic symbols).

        Given a value and a string Tag Field, this method assigns the field the
        value. Before assigning it checks the value's UTF-8 byte-size and
        truncates if needed. When it happens, 3 periods are appended to the
        result string so users will know it's different from the original
        value.
        """
        if not (value and isinstance(value, six.string_types)):
            return

        encoding = cls.__UTF8_CHARACTER_ENCODING
        max_length = cls.__STRING_VALUE_UTF8_MAX_LENGTH
        suffix_length = cls.__SUFFIX_CHARS_LENGTH

        encoded = value.encode(encoding)

        # the max length supported is stored at max_length
        # we leave some chars as the suffix_length to be used when
        # creating the new string, so this line truncates the existing string.
        truncated_string_field = encoded[:max_length - suffix_length]

        decoded = u'{}...'.format(
            truncated_string_field.decode(
                encoding,
                'ignore')) if len(encoded) > max_length else encoded.decode(
                    encoding, 'ignore')

        string_field = datacatalog.TagField()
        string_field.string_value = decoded
        tag.fields[field_id] = string_field
    def _set_string_field(cls, tag, field_id, value):
        """
        String field values are limited by Data Catalog API at 2000 chars
        length when encoded in UTF-8. Given a string Tag Field and its
        value, this method assigns the value to the field, truncating
        if needed.
        """

        if not (value and isinstance(value, six.string_types)):
            return

        truncated_string = prepare.DataCatalogStringsHelper.truncate_string(
            value, cls.__STRING_VALUE_UTF8_MAX_LENGTH)

        string_field = datacatalog.TagField()
        string_field.string_value = truncated_string

        tag.fields[field_id] = string_field
Esempio n. 14
0
 def __set_enum_field(cls, tag, field_id, value):
     if value is not None:
         enum_field = datacatalog.TagField()
         enum_field.enum_value.display_name = value
         tag.fields[field_id] = enum_field
 def _set_double_field(cls, tag, field_id, value):
     if value is not None:
         double_field = datacatalog.TagField()
         double_field.double_value = value
         tag.fields[field_id] = double_field
 def _set_bool_field(cls, tag, field_id, value):
     if value is not None:
         bool_field = datacatalog.TagField()
         bool_field.bool_value = value
         tag.fields[field_id] = bool_field