def validate_set(self, line): """Validate a set line (i.e. {a, b, ...}). See validate_line for return values. """ if not line.endswith('}'): # Set with missing close brackets return validatorlib.ValidationError( "Missing } for word set.", C.QUERY_EXP_VALIDATION_SET_SYNTAX) if string.find(line, "=") >= 0 or string.find(line, ">") >= 0: return validatorlib.ValidationError( "Sets can not contain operators = and >.", C.QUERY_EXP_VALIDATION_SET_SYNTAX) word_set = string.split(line[1:-1], ',') if len(word_set) > 32: return validatorlib.ValidationError( "Too many elements in word set. The limit is 32.", C.QUERY_EXP_VALIDATION_SET_TOO_BIG) if len(word_set) == 0: return validatorlib.ValidationError( "Empty word set.", C.QUERY_EXP_VALIDATION_SET_SYNTAX) for w in word_set: if string.strip(w) == '': return validatorlib.ValidationError( "Empty elements in word set.", C.QUERY_EXP_VALIDATION_SET_SYNTAX) error = self.check_characters( w, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS) if error: return error return None
def Upload(self, coll_obj, patch, params, max_errors, contents): """Upload (make) an entry, provided the contents pass validation. coll_obj is a collection object for this entry. patch is 1 if we are to patch an existing entry (see Create for the collection object for details). param is a dictionary of additional parameters, also passed to Create. It must contain the entry type, but everything else is optional. The entry count will be filled in. max_errors is the maximum number of errors in validation. contents is the contents of the entry. Returns either VALID_OK, VALID_SHORT_CIRCUIT or a list of validation errors. """ name = coll_obj.name logging.info("Uploading dictionary %s" % name) contents = entconfig.RepairUTF8(contents) entry_type = params[C.ENTRY_TYPE] validator = None if entry_type == C.QUERY_EXP_FILETYPE_SYNONYMS: validator = SynonymsValidator() elif entry_type == C.QUERY_EXP_FILETYPE_BLACKLIST: validator = BlacklistValidator() else: logging.error("Unknown entry_type: %s" % entry_type) return validatorlib.VALID_SHORT_CIRCUIT entry_count, errors = validator.validate(contents, int(max_errors)) if errors != validatorlib.VALID_OK: logging.error("Errors validating query exp upload for %s" % name) return errors logging.info("Successful validation for query exp entry %s" % name) params[C.ENTRY_COUNT] = entry_count # Setting "needs apply" could be overzealous if the next stage fails, # but we prefer to err on the side of caution self.cfg.setGlobalParam(C.QUERY_EXP_STATUS, int(C.QUERY_EXP_STATUS_NEEDS_APPLY)) # Now we can actually create the object. try: if not coll_obj.Create(patch, params): return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY) except Exception, e: t, v, tb = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY)
def setparams(self, policy_name, encoded_args): """Set the parameters for groups of scoring adjustments, including validation. Creates the group if it does not exist. If no errors, returns an empty list. Otherwise, returns a list of tuples of error code and detail string.""" settings = {} config_utils.SafeExec(string.strip(encoded_args), settings) errors = [] # Validate settings for each group. for group in settings.keys(): if group == "patterns": # Params should be a list containing the scoring weight, then # alternating patterns and adjust levels. We only validate the # patterns. errors = self.validate_patterns(settings["patterns"]) elif group == "datebias": # the only param, weight [0..100] is already validated in the # handler code (ScoringAdjustHandler) pass elif group == "metadata": # Params should be a list containing the scoring weight, then # alternating name:value metadata information and adjust levels. # We only validate the name:value metadata information. errors = self.validate_metadata(settings["metadata"]) else: logging.info("Ignoring unknown scoring group " + group) # If no errors yet, make sure policy is present. policy = None if not errors: if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME: policy = self.cfg.getGlobalParam(C.ENT_SCORING_ADJUST) else: policies = self.cfg.getGlobalParam( C.ENT_SCORING_ADDITIONAL_POLICIES) if policies and policy_name in policies: policy = policies[policy_name] if policy is None: errors.append( validatorlib.ValidationError( policy_name, C.SCORING_ADJUST_POLICY_MISSING)) # If no errors, now save each group (even unknown ones) if not errors: for group in settings.keys(): policy[group] = settings[group] if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME: self.cfg.setGlobalParam(C.ENT_SCORING_ADJUST, policy) else: policies = self.cfg.getGlobalParam( C.ENT_SCORING_ADDITIONAL_POLICIES) policies[policy_name] = policy self.cfg.setGlobalParam(C.ENT_SCORING_ADDITIONAL_POLICIES, policies) errors = validatorlib.VALID_OK return admin_handler.formatValidationErrors(errors)
def check_characters(self, item, special): """Check item for a character in special. Return an error if found, None otherwise. """ for c in special: if c in item: return validatorlib.ValidationError( "Item contains invalid character", C.QUERY_EXP_VALIDATION_INVALID_CHAR) return None
def validate_line(self, line): """Validate a line and return either None or an error, in the form of a validatorlib.ValidationError object. """ # Only allow lines with no white space if self.whitespace.search(line): return validatorlib.ValidationError( "Cannot have space in blacklist entry", C.QUERY_EXP_VALIDATION_WHITESPACE) return self.check_characters( line, self.QUERY_EXP_BLACKLIST_SPECIAL_CHARACTERS)
def validate_metadata(self, params): """Validate the provided name:value metadata pairs, and return a list of associated errors, either caused by duplicate or malformed pairs.""" nparams = len(params) errors = [] # Map each name:value metadata pair to its number of occurences. pair_count = {} for i in xrange(1, nparams, 2): pair = params[i] pair_count.setdefault(pair, 0) pair_count[pair] += 1 # Map keys with values greater than one must be duplicate pairs. duplicate_pairs = map( lambda x: x[0], filter(lambda x: x[1] > 1, pair_count.iteritems())) # For the detailed string, use the name:value metadata pair. # We can't use the actual message back from the validator, as it # is not internationalized. for pair in duplicate_pairs: errors.append( validatorlib.ValidationError(pair, C.SCORING_ADJUST_DUPLICATE_PAIRS)) if errors: # Do not proceed with further validation if any duplicates. return errors # Next, check for malformed name:value pairs. validator = validatorlib.EnterpriseMetadata() for i in xrange(1, nparams, 2): pair = params[i] pair_errors = validator.validate(pair, None) if (pair_errors != validatorlib.VALID_OK and pair_errors != validatorlib.VALID_SHORT_CIRCUIT): logging.info("Errors on pair %s are %s" % (pair, repr(pair_errors))) errors.append( validatorlib.ValidationError(pair, C.SCORING_ADJUST_BAD_PAIRS)) return errors
def validate_equivalence(self, line, operator): """Validate an equivalence line (i.e. a=b or a>b). See validate_line for return values. """ # Check for more than one operator op = line[operator] if op == '=': other = '>' else: other = '=' # Check whether the same operator appears a second time # and whether the other operator appears at all if (string.find(line, op, operator + 1) >= 0 or string.find(line, other) >= 0): return validatorlib.ValidationError( "Line must contain one operator (= or >)", C.QUERY_EXP_VALIDATION_OPERATOR) left_part = string.strip(line[0:operator]) if left_part == '': return validatorlib.ValidationError( "Word or phrase missing before operator", C.QUERY_EXP_VALIDATION_EMPTY_LEFT) error = self.check_characters( left_part, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS) if error: return error right_part = string.strip(line[operator + 1:]) if right_part == '': return validatorlib.ValidationError( "Word or phrase missing after operator", C.QUERY_EXP_VALIDATION_EMPTY_RIGHT) error = self.check_characters( right_part, self.QUERY_EXP_SYNONYMS_SPECIAL_CHARACTERS) if error: return error return None
def validate(self, contents, max_errors): """Split contents into lines, and execute validate_line on each. Errors from validate_line are accumulated into a list of validatorlib.ValidationError objects, up to the maximum number specified. Also checks for empty contents. On success, returns validatorlib.VALID_OK instead of a list of errors. Returns the number of entries and the errors list. """ entry_count = 0 line_number = 0 has_only_blank = 1 error_count = 0 errors = [] # Skip the BOM if given utf8_bom = unichr(0xFEFF).encode("utf-8") if contents.startswith(utf8_bom): contents = contents[len(utf8_bom):] for line in contents.splitlines(): line = string.strip(line) line_number += 1 if line == '': continue has_only_blank = 0 if line.startswith('#'): continue entry_count += 1 error = self.validate_line(line) if error: error.addAttrib('LINE', line_number) errors.append(error) error_count += 1 if error_count >= max_errors: break if has_only_blank: errors.append( validatorlib.ValidationError( "File must be non-empty", C.QUERY_EXP_VALIDATION_FILE_EMPTY)) if not errors: errors = validatorlib.VALID_OK return (entry_count, errors)
def validate_line(self, line): """Validate a line and return either None or an error, in the form of a validatorlib.ValidationError object. """ if line.startswith('{'): return self.validate_set(line) operator = string.find(line, '=') if operator == -1: operator = string.find(line, '>') if operator != -1: return self.validate_equivalence(line, operator) # No setbrackets and not an operator return validatorlib.ValidationError( "Line must contain an operator (= or >) or be a set", C.QUERY_EXP_VALIDATION_OPERATOR)
def setvar(self, userName, varName, varVal): user = ent_collection.EntUserParam(userName, self.cfg.globalParams) # if user has no params yet, create them if not user.Exists() and not user.Create(): logging.error("Failed to create userparam %s" % userName) user.Delete() return admin_handler.formatValidationErrors( [validatorlib.ValidationError("Invalid User")]) val = {} config_utils.SafeExec(string.strip(varVal), val) if not val.has_key(varName): return 1 value = val[varName] try: errors = user.set_var(varName, value, validate=1) except KeyError: return 1 return admin_handler.formatValidationErrors(errors)
class QueryExpansionBase: """Handles all operations which depend only on the collection object.""" # Timeout for executing the synonyms compiler. As a data point, on an # unloaded one-way, a file of 80000 synonyms compiles in about 5 seconds, so # this limit would only be hit if something goes wrong, or on a stupendously # huge file or a very heavily loaded machine. COMPILER_TIMEOUT = 300 # Status flag to indicate when an apply is in progress applying_changes = 0 # Status flag to indicate when an upload is in progress uploading_dict = 0 # Languages for which custom dictionaries and blacklists may be uploaded. # TODO(dahe): derive from configuration settings languages = ('all', 'en', 'pt', 'fr', 'it', 'de', 'es', 'nl') def __init__(self, cfg): """Initialize with global params (used for storing collection info)""" self.cfg = cfg def ConstructCollectionObject(self, name): """Returns a collection object for the given name.""" return ent_collection.EntQueryExp(name, self.cfg.globalParams) def List(self): """Returns a list of all query expansion entries.""" names = ent_collection.ListQueryExpEntries(self.cfg.globalParams) return "%s\n" % string.join(names, '\n') def Upload(self, coll_obj, patch, params, max_errors, contents): """Upload (make) an entry, provided the contents pass validation. coll_obj is a collection object for this entry. patch is 1 if we are to patch an existing entry (see Create for the collection object for details). param is a dictionary of additional parameters, also passed to Create. It must contain the entry type, but everything else is optional. The entry count will be filled in. max_errors is the maximum number of errors in validation. contents is the contents of the entry. Returns either VALID_OK, VALID_SHORT_CIRCUIT or a list of validation errors. """ name = coll_obj.name logging.info("Uploading dictionary %s" % name) contents = entconfig.RepairUTF8(contents) entry_type = params[C.ENTRY_TYPE] validator = None if entry_type == C.QUERY_EXP_FILETYPE_SYNONYMS: validator = SynonymsValidator() elif entry_type == C.QUERY_EXP_FILETYPE_BLACKLIST: validator = BlacklistValidator() else: logging.error("Unknown entry_type: %s" % entry_type) return validatorlib.VALID_SHORT_CIRCUIT entry_count, errors = validator.validate(contents, int(max_errors)) if errors != validatorlib.VALID_OK: logging.error("Errors validating query exp upload for %s" % name) return errors logging.info("Successful validation for query exp entry %s" % name) params[C.ENTRY_COUNT] = entry_count # Setting "needs apply" could be overzealous if the next stage fails, # but we prefer to err on the side of caution self.cfg.setGlobalParam(C.QUERY_EXP_STATUS, int(C.QUERY_EXP_STATUS_NEEDS_APPLY)) # Now we can actually create the object. try: if not coll_obj.Create(patch, params): return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY) except Exception, e: t, v, tb = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY) # Ideally we would set the contents at the same time as the Create # TODO(dahe): do this if possible. try: error = coll_obj.set_file_var_content(C.CONTENT, contents, validate=0) except KeyError: coll_obj.Delete() return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY) return validatorlib.VALID_OK
def formatError(self, code, msg): """Format a single error for return.""" return admin_handler.formatValidationErrors( [validatorlib.ValidationError(msg, code)])