def __init__(self): ASearchResultConstraint.__init__(self) self.__stamped = StampedSource() self.__resolver = Resolver()
class UniqueSearchResultConstraint(ASearchResultConstraint): """ Represents a single uniqueness constraint with respect to entity search results, providing the ability to verify that the constraint is satisfied via the validate function. """ def __init__(self): ASearchResultConstraint.__init__(self) self.__stamped = StampedSource() self.__resolver = Resolver() def validate(self, results): """ Validates the search result set to ensure that there are no obvious duplicate results. Returns True if all results are unique within a fuzzy margin of error or False otherwise. """ proxies = map(self.__stamped.proxyFromEntity, results) # ensure that no result resolves definitively to any other result in the result set for i in xrange(len(proxies)): proxy = proxies[i] def dedup(): for j in xrange(len(proxies)): proxy2 = proxies[j] if i != j and proxy.kind == proxy2.kind: yield proxy2 dups = self.__resolver.resolve(proxy, generatorSource(dedup()), count=1) if len(dups) > 0 and dups[0][0]['resolved']: return False seen = defaultdict(set) # ensure that there are no obvious duplicate results without using the resolver for i in xrange(len(results)): result = results[i] keys = [ k for k in result.sources if k.endswith('_id') ] # ensure that the same source id doesn't appear twice in the result set # (source ids are supposed to be unique) for key in keys: value = str(result[key]) if value in seen[key]: return False seen[key].add(value) for j in xrange(i + 1, len(results)): result2 = results[j] if i != j and self._eq(result.kind, result2.kind) and self._eq(result.title, result2.title): if len(result.types.intersection(result2.types)) > 0: utils.log("") utils.log("!" * 80) utils.log("dupe encountered: %s\n%s" % (result, result2)) utils.log("!" * 80) utils.log("") return False return True