コード例 #1
0
    def validate_set(self, line):
        """Validate a set line (i.e. {a, b, ...}).
    See validate_line for return values.
    """
        if not line.endswith('}'):
            # Set with missing close brackets
            return validatorlib.ValidationError(
                "Missing } for word set.", C.QUERY_EXP_VALIDATION_SET_SYNTAX)

        if string.find(line, "=") >= 0 or string.find(line, ">") >= 0:
            return validatorlib.ValidationError(
                "Sets can not contain operators = and >.",
                C.QUERY_EXP_VALIDATION_SET_SYNTAX)

        word_set = string.split(line[1:-1], ',')
        if len(word_set) > 32:
            return validatorlib.ValidationError(
                "Too many elements in word set. The limit is 32.",
                C.QUERY_EXP_VALIDATION_SET_TOO_BIG)

        if len(word_set) == 0:
            return validatorlib.ValidationError(
                "Empty word set.", C.QUERY_EXP_VALIDATION_SET_SYNTAX)

        for w in word_set:
            if string.strip(w) == '':
                return validatorlib.ValidationError(
                    "Empty elements in word set.",
                    C.QUERY_EXP_VALIDATION_SET_SYNTAX)
            error = self.check_characters(
                w, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS)
            if error:
                return error

        return None
コード例 #2
0
    def Upload(self, coll_obj, patch, params, max_errors, contents):
        """Upload (make) an entry, provided the contents pass validation.
    coll_obj is a collection object for this entry.
    patch is 1 if we are to patch an existing entry (see Create for the
    collection object for details).
    param is a dictionary of additional parameters, also passed to Create.
    It must contain the entry type, but everything else is optional. The
    entry count will be filled in.
    max_errors is the maximum number of errors in validation.
    contents is the contents of the entry.

    Returns either VALID_OK, VALID_SHORT_CIRCUIT or a list of validation
    errors.
    """

        name = coll_obj.name
        logging.info("Uploading dictionary %s" % name)
        contents = entconfig.RepairUTF8(contents)

        entry_type = params[C.ENTRY_TYPE]
        validator = None
        if entry_type == C.QUERY_EXP_FILETYPE_SYNONYMS:
            validator = SynonymsValidator()
        elif entry_type == C.QUERY_EXP_FILETYPE_BLACKLIST:
            validator = BlacklistValidator()
        else:
            logging.error("Unknown entry_type: %s" % entry_type)
            return validatorlib.VALID_SHORT_CIRCUIT

        entry_count, errors = validator.validate(contents, int(max_errors))

        if errors != validatorlib.VALID_OK:
            logging.error("Errors validating query exp upload for %s" % name)
            return errors

        logging.info("Successful validation for query exp entry %s" % name)
        params[C.ENTRY_COUNT] = entry_count

        # Setting "needs apply" could be overzealous if the next stage fails,
        # but we prefer to err on the side of caution
        self.cfg.setGlobalParam(C.QUERY_EXP_STATUS,
                                int(C.QUERY_EXP_STATUS_NEEDS_APPLY))

        # Now we can actually create the object.
        try:
            if not coll_obj.Create(patch, params):
                return validatorlib.ValidationError(
                    "Unable to create query exp entry",
                    QUERYEXP_UNABLE_TO_CREATE_ENTRY)
        except Exception, e:
            t, v, tb = sys.exc_info()
            exc_msg = string.join(traceback.format_exception(t, v, tb))
            logging.error(exc_msg)
            return validatorlib.ValidationError(
                "Unable to create query exp entry",
                QUERYEXP_UNABLE_TO_CREATE_ENTRY)
コード例 #3
0
    def setparams(self, policy_name, encoded_args):
        """Set the parameters for groups of scoring adjustments, including
       validation. Creates the group if it does not exist.
       If no errors, returns an empty list.
       Otherwise, returns a list of tuples of error code and detail string."""

        settings = {}
        config_utils.SafeExec(string.strip(encoded_args), settings)
        errors = []

        # Validate settings for each group.
        for group in settings.keys():
            if group == "patterns":
                # Params should be a list containing the scoring weight, then
                # alternating patterns and adjust levels. We only validate the
                # patterns.
                errors = self.validate_patterns(settings["patterns"])
            elif group == "datebias":
                # the only param, weight [0..100] is already validated in the
                # handler code (ScoringAdjustHandler)
                pass
            elif group == "metadata":
                # Params should be a list containing the scoring weight, then
                # alternating name:value metadata information and adjust levels.
                # We only validate the name:value metadata information.
                errors = self.validate_metadata(settings["metadata"])
            else:
                logging.info("Ignoring unknown scoring group " + group)

        # If no errors yet, make sure policy is present.
        policy = None
        if not errors:
            if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME:
                policy = self.cfg.getGlobalParam(C.ENT_SCORING_ADJUST)
            else:
                policies = self.cfg.getGlobalParam(
                    C.ENT_SCORING_ADDITIONAL_POLICIES)
                if policies and policy_name in policies:
                    policy = policies[policy_name]
            if policy is None:
                errors.append(
                    validatorlib.ValidationError(
                        policy_name, C.SCORING_ADJUST_POLICY_MISSING))

        # If no errors, now save each group (even unknown ones)
        if not errors:
            for group in settings.keys():
                policy[group] = settings[group]
            if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME:
                self.cfg.setGlobalParam(C.ENT_SCORING_ADJUST, policy)
            else:
                policies = self.cfg.getGlobalParam(
                    C.ENT_SCORING_ADDITIONAL_POLICIES)
                policies[policy_name] = policy
                self.cfg.setGlobalParam(C.ENT_SCORING_ADDITIONAL_POLICIES,
                                        policies)
            errors = validatorlib.VALID_OK

        return admin_handler.formatValidationErrors(errors)
コード例 #4
0
 def check_characters(self, item, special):
     """Check item for a character in special. Return an error if found,
 None otherwise.
 """
     for c in special:
         if c in item:
             return validatorlib.ValidationError(
                 "Item contains invalid character",
                 C.QUERY_EXP_VALIDATION_INVALID_CHAR)
     return None
コード例 #5
0
    def validate_line(self, line):
        """Validate a line and return either None or an error, in the
    form of a validatorlib.ValidationError object.
    """
        # Only allow lines with no white space
        if self.whitespace.search(line):
            return validatorlib.ValidationError(
                "Cannot have space in blacklist entry",
                C.QUERY_EXP_VALIDATION_WHITESPACE)

        return self.check_characters(
            line, self.QUERY_EXP_BLACKLIST_SPECIAL_CHARACTERS)
コード例 #6
0
    def validate_metadata(self, params):
        """Validate the provided name:value metadata pairs, and return
    a list of associated errors, either caused by duplicate or
    malformed pairs."""
        nparams = len(params)
        errors = []

        # Map each name:value metadata pair to its number of occurences.
        pair_count = {}
        for i in xrange(1, nparams, 2):
            pair = params[i]
            pair_count.setdefault(pair, 0)
            pair_count[pair] += 1
        # Map keys with values greater than one must be duplicate pairs.
        duplicate_pairs = map(
            lambda x: x[0], filter(lambda x: x[1] > 1, pair_count.iteritems()))
        # For the detailed string, use the name:value metadata pair.
        # We can't use the actual message back from the validator, as it
        # is not internationalized.
        for pair in duplicate_pairs:
            errors.append(
                validatorlib.ValidationError(pair,
                                             C.SCORING_ADJUST_DUPLICATE_PAIRS))
        if errors:
            # Do not proceed with further validation if any duplicates.
            return errors

        # Next, check for malformed name:value pairs.
        validator = validatorlib.EnterpriseMetadata()
        for i in xrange(1, nparams, 2):
            pair = params[i]
            pair_errors = validator.validate(pair, None)
            if (pair_errors != validatorlib.VALID_OK
                    and pair_errors != validatorlib.VALID_SHORT_CIRCUIT):
                logging.info("Errors on pair %s are %s" %
                             (pair, repr(pair_errors)))
                errors.append(
                    validatorlib.ValidationError(pair,
                                                 C.SCORING_ADJUST_BAD_PAIRS))
        return errors
コード例 #7
0
    def validate_equivalence(self, line, operator):
        """Validate an equivalence line (i.e. a=b or a>b).
    See validate_line for return values.
    """
        # Check for more than one operator
        op = line[operator]
        if op == '=':
            other = '>'
        else:
            other = '='

        # Check whether the same operator appears a second time
        # and whether the other operator appears at all
        if (string.find(line, op, operator + 1) >= 0
                or string.find(line, other) >= 0):
            return validatorlib.ValidationError(
                "Line must contain one operator (= or >)",
                C.QUERY_EXP_VALIDATION_OPERATOR)

        left_part = string.strip(line[0:operator])
        if left_part == '':
            return validatorlib.ValidationError(
                "Word or phrase missing before operator",
                C.QUERY_EXP_VALIDATION_EMPTY_LEFT)
        error = self.check_characters(
            left_part, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS)
        if error:
            return error

        right_part = string.strip(line[operator + 1:])
        if right_part == '':
            return validatorlib.ValidationError(
                "Word or phrase missing after operator",
                C.QUERY_EXP_VALIDATION_EMPTY_RIGHT)
        error = self.check_characters(
            right_part, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS)
        if error:
            return error

        return None
コード例 #8
0
    def validate(self, contents, max_errors):
        """Split contents into lines, and execute validate_line on
    each. Errors from validate_line are accumulated into a list of
    validatorlib.ValidationError objects, up to the maximum number
    specified. Also checks for empty contents. On success, returns
    validatorlib.VALID_OK instead of a list of errors.
    Returns the number of entries and the errors list.
    """
        entry_count = 0
        line_number = 0
        has_only_blank = 1
        error_count = 0
        errors = []

        # Skip the BOM if given
        utf8_bom = unichr(0xFEFF).encode("utf-8")
        if contents.startswith(utf8_bom):
            contents = contents[len(utf8_bom):]

        for line in contents.splitlines():
            line = string.strip(line)
            line_number += 1

            if line == '':
                continue

            has_only_blank = 0
            if line.startswith('#'):
                continue

            entry_count += 1
            error = self.validate_line(line)
            if error:
                error.addAttrib('LINE', line_number)
                errors.append(error)
                error_count += 1
                if error_count >= max_errors:
                    break

        if has_only_blank:
            errors.append(
                validatorlib.ValidationError(
                    "File must be non-empty",
                    C.QUERY_EXP_VALIDATION_FILE_EMPTY))

        if not errors:
            errors = validatorlib.VALID_OK
        return (entry_count, errors)
コード例 #9
0
    def validate_line(self, line):
        """Validate a line and return either None or an error, in the
    form of a validatorlib.ValidationError object.
    """
        if line.startswith('{'):
            return self.validate_set(line)

        operator = string.find(line, '=')
        if operator == -1:
            operator = string.find(line, '>')

        if operator != -1:
            return self.validate_equivalence(line, operator)

        # No setbrackets and not an operator
        return validatorlib.ValidationError(
            "Line must contain an operator (= or >) or be a set",
            C.QUERY_EXP_VALIDATION_OPERATOR)
コード例 #10
0
    def setvar(self, userName, varName, varVal):
        user = ent_collection.EntUserParam(userName, self.cfg.globalParams)
        # if user has no params yet, create them
        if not user.Exists() and not user.Create():
            logging.error("Failed to create userparam %s" % userName)
            user.Delete()
            return admin_handler.formatValidationErrors(
                [validatorlib.ValidationError("Invalid User")])

        val = {}
        config_utils.SafeExec(string.strip(varVal), val)
        if not val.has_key(varName): return 1
        value = val[varName]

        try:
            errors = user.set_var(varName, value, validate=1)
        except KeyError:
            return 1

        return admin_handler.formatValidationErrors(errors)
コード例 #11
0
class QueryExpansionBase:
    """Handles all operations which depend only on the collection object."""

    # Timeout for executing the synonyms compiler. As a data point, on an
    # unloaded one-way, a file of 80000 synonyms compiles in about 5 seconds, so
    # this limit would only be hit if something goes wrong, or on a stupendously
    # huge file or a very heavily loaded machine.
    COMPILER_TIMEOUT = 300

    # Status flag to indicate when an apply is in progress
    applying_changes = 0

    # Status flag to indicate when an upload is in progress
    uploading_dict = 0

    # Languages for which custom dictionaries and blacklists may be uploaded.
    # TODO(dahe): derive from configuration settings
    languages = ('all', 'en', 'pt', 'fr', 'it', 'de', 'es', 'nl')

    def __init__(self, cfg):
        """Initialize with global params (used for storing collection info)"""
        self.cfg = cfg

    def ConstructCollectionObject(self, name):
        """Returns a collection object for the given name."""
        return ent_collection.EntQueryExp(name, self.cfg.globalParams)

    def List(self):
        """Returns a list of all query expansion entries."""
        names = ent_collection.ListQueryExpEntries(self.cfg.globalParams)
        return "%s\n" % string.join(names, '\n')

    def Upload(self, coll_obj, patch, params, max_errors, contents):
        """Upload (make) an entry, provided the contents pass validation.
    coll_obj is a collection object for this entry.
    patch is 1 if we are to patch an existing entry (see Create for the
    collection object for details).
    param is a dictionary of additional parameters, also passed to Create.
    It must contain the entry type, but everything else is optional. The
    entry count will be filled in.
    max_errors is the maximum number of errors in validation.
    contents is the contents of the entry.

    Returns either VALID_OK, VALID_SHORT_CIRCUIT or a list of validation
    errors.
    """

        name = coll_obj.name
        logging.info("Uploading dictionary %s" % name)
        contents = entconfig.RepairUTF8(contents)

        entry_type = params[C.ENTRY_TYPE]
        validator = None
        if entry_type == C.QUERY_EXP_FILETYPE_SYNONYMS:
            validator = SynonymsValidator()
        elif entry_type == C.QUERY_EXP_FILETYPE_BLACKLIST:
            validator = BlacklistValidator()
        else:
            logging.error("Unknown entry_type: %s" % entry_type)
            return validatorlib.VALID_SHORT_CIRCUIT

        entry_count, errors = validator.validate(contents, int(max_errors))

        if errors != validatorlib.VALID_OK:
            logging.error("Errors validating query exp upload for %s" % name)
            return errors

        logging.info("Successful validation for query exp entry %s" % name)
        params[C.ENTRY_COUNT] = entry_count

        # Setting "needs apply" could be overzealous if the next stage fails,
        # but we prefer to err on the side of caution
        self.cfg.setGlobalParam(C.QUERY_EXP_STATUS,
                                int(C.QUERY_EXP_STATUS_NEEDS_APPLY))

        # Now we can actually create the object.
        try:
            if not coll_obj.Create(patch, params):
                return validatorlib.ValidationError(
                    "Unable to create query exp entry",
                    QUERYEXP_UNABLE_TO_CREATE_ENTRY)
        except Exception, e:
            t, v, tb = sys.exc_info()
            exc_msg = string.join(traceback.format_exception(t, v, tb))
            logging.error(exc_msg)
            return validatorlib.ValidationError(
                "Unable to create query exp entry",
                QUERYEXP_UNABLE_TO_CREATE_ENTRY)

        # Ideally we would set the contents at the same time as the Create
        # TODO(dahe): do this if possible.
        try:
            error = coll_obj.set_file_var_content(C.CONTENT,
                                                  contents,
                                                  validate=0)
        except KeyError:
            coll_obj.Delete()
            return validatorlib.ValidationError(
                "Unable to create query exp entry",
                QUERYEXP_UNABLE_TO_CREATE_ENTRY)

        return validatorlib.VALID_OK
コード例 #12
0
 def formatError(self, code, msg):
     """Format a single error for return."""
     return admin_handler.formatValidationErrors(
         [validatorlib.ValidationError(msg, code)])