Example #1
0
    def __init__(self,
                 description,
                 rule,
                 available_field_names,
                 location=None):
        super(DistinctCountCheck,
              self).__init__(description, rule, available_field_names,
                             location)

        rule_read_line = _compat.token_io_readline(rule)
        tokens = tokenize.generate_tokens(rule_read_line)
        first_token = next(tokens)

        # Obtain and validate field to count.
        if first_token[0] != tokenize.NAME:
            raise errors.InterfaceError(
                _("rule must start with a field name but found: %r") %
                first_token[1], self.location_of_rule)
        self._field_name_to_count = first_token[1]
        fields.field_name_index(self._field_name_to_count,
                                available_field_names, location)
        line_where_field_name_ends, column_where_field_name_ends = first_token[
            3]
        assert column_where_field_name_ends > 0
        assert line_where_field_name_ends == 1

        # Build and test Python expression for validation.
        self._expression = DistinctCountCheck._COUNT_NAME + rule[
            column_where_field_name_ends:]
        self._distinct_value_to_count_map = None
        self.reset()
        self._eval()
Example #2
0
def tokenize_without_space(text):
    """
    ``text`` split into token with any white space tokens removed.
    """
    assert text is not None
    for toky in tokenize.generate_tokens(_compat.token_io_readline(text)):
        toky_type = toky[0]
        toky_text = toky[1]
        if ((toky_type != token.INDENT) and toky_text.strip()) or (toky_type == token.ENDMARKER):
            yield toky
Example #3
0
def tokenize_without_space(text):
    """
    ``text`` split into token with any white space tokens removed.
    """
    assert text is not None
    for toky in tokenize.generate_tokens(_compat.token_io_readline(text)):
        toky_type = toky[0]
        toky_text = toky[1]
        if ((toky_type != token.INDENT) and toky_text.strip()) or (toky_type == token.ENDMARKER):
            yield toky
Example #4
0
    def __init__(self,
                 description,
                 rule,
                 available_field_names,
                 location=None):
        super(IsUniqueCheck, self).__init__(description, rule,
                                            available_field_names, location)

        self._field_names_to_check = []
        self._row_key_to_location_map = None
        self.reset()

        # Extract field names to check from rule.
        rule_read_line = _compat.token_io_readline(rule)
        toky = tokenize.generate_tokens(rule_read_line)
        after_comma = True
        next_token = next(toky)
        unique_field_names = set()
        while (not _tools.is_eof_token(next_token)) and (next_token[0] !=
                                                         tokenize.NEWLINE):
            token_type = next_token[0]
            token_value = next_token[1]
            if after_comma:
                if token_type != tokenize.NAME:
                    raise errors.InterfaceError(
                        _("field name must contain only ASCII letters, numbers and underscores (_) "
                          "but found: %r [token type=%r]") %
                        (token_value, token_type), self.location_of_rule)
                try:
                    fields.field_name_index(token_value, available_field_names,
                                            location)
                    if token_value in unique_field_names:
                        raise errors.InterfaceError(
                            _("duplicate field name for unique check must be removed: %s"
                              ) % token_value, self.location_of_rule)
                    unique_field_names.add(token_value)
                except errors.InterfaceError as error:
                    raise errors.InterfaceError(six.text_type(error))
                self._field_names_to_check.append(token_value)
            elif not _tools.is_comma_token(next_token):
                raise errors.InterfaceError(
                    _("after field name a comma (,) must follow but found: %r")
                    % token_value, self.location_of_rule)
            after_comma = not after_comma
            next_token = next(toky)
        if not len(self._field_names_to_check):
            raise errors.InterfaceError(
                _("rule must contain at least one field name to check for uniqueness"
                  ), self.location_of_rule)
Example #5
0
    def __init__(self, description, rule, available_field_names, location=None):
        super(DistinctCountCheck, self).__init__(description, rule, available_field_names, location)

        rule_read_line = _compat.token_io_readline(rule)
        tokens = tokenize.generate_tokens(rule_read_line)
        first_token = next(tokens)

        # Obtain and validate field to count.
        if first_token[0] != tokenize.NAME:
            raise errors.InterfaceError(
                "rule must start with a field name but found: %r" % first_token[1], self.location_of_rule)
        self._field_name_to_count = first_token[1]
        fields.field_name_index(self._field_name_to_count, available_field_names, location)
        line_where_field_name_ends, column_where_field_name_ends = first_token[3]
        assert column_where_field_name_ends > 0
        assert line_where_field_name_ends == 1

        # Build and test Python expression for validation.
        self._expression = DistinctCountCheck._COUNT_NAME + rule[column_where_field_name_ends:]
        self._distinct_value_to_count_map = None
        self.reset()
        self._eval()
Example #6
0
    def __init__(self, description, rule, available_field_names, location=None):
        super(IsUniqueCheck, self).__init__(description, rule, available_field_names, location)

        self._field_names_to_check = []
        self._row_key_to_location_map = None
        self.reset()

        # Extract field names to check from rule.
        rule_read_line = _compat.token_io_readline(rule)
        toky = tokenize.generate_tokens(rule_read_line)
        after_comma = True
        next_token = next(toky)
        unique_field_names = set()
        while not _tools.is_eof_token(next_token):
            token_type = next_token[0]
            token_value = next_token[1]
            if after_comma:
                if token_type != tokenize.NAME:
                    raise errors.InterfaceError(
                        "field name must contain only ASCII letters, numbers and underscores (_) "
                        + "but found: %r [token type=%r]" % (token_value, token_type), self.location_of_rule)
                try:
                    fields.field_name_index(token_value, available_field_names, location)
                    if token_value in unique_field_names:
                        raise errors.InterfaceError(
                            "duplicate field name for unique check must be removed: %s" % token_value,
                            self.location_of_rule)
                    unique_field_names.add(token_value)
                except errors.InterfaceError as error:
                    raise errors.InterfaceError(six.text_type(error))
                self._field_names_to_check.append(token_value)
            elif not _tools.is_comma_token(next_token):
                raise errors.InterfaceError(
                    "after field name a comma (,) must follow but found: %r" % token_value, self.location_of_rule)
            after_comma = not after_comma
            next_token = next(toky)
        if not len(self._field_names_to_check):
            raise errors.InterfaceError(
                "rule must contain at least one field name to check for uniqueness", self.location_of_rule)