def __proxyToEntity(self, cluster): # Additional level of filtering -- some things get clustered (for the purpose of boosting certain cluster # scores) but never included in the final result because we're not 100% that the data is good enough to show # users. filteredResults = [r for r in cluster.results if r.dataQuality >= MIN_RESULT_DATA_QUALITY_TO_INCLUDE] # So this is ugly, but it's pretty common for two listings to have the same or virtually the same data quality # and using relevance as a tie-breaker is really helpful. filteredResults.sort(key=lambda r: (r.dataQuality + (r.relevance / 10.0), r.resolverObject.source, r.resolverObject.key), reverse=True) # TODO PRELAUNCH: Only use the best result from each source. entity = EntityProxyContainer().addAllProxies(result.resolverObject for result in filteredResults).buildEntity() for result in filteredResults: entity.addThirdPartyId(result.resolverObject.source, result.resolverObject.key) return entity
def searchEntities(self, query, coords = None, full = True, local = False, category = None, subcategory = None, offset = 0, limit = 10): results = [] kinds, types = _convertCategorySubcategory(category, subcategory) try: if coords.lat is not None and coords.lng is not None: coords = (coords.lat, coords.lng) else: coords = None except Exception as e: logs.warning("Exception (%s) - setting coords to None" % e) coords = None search = self.search(query, coords = coords, full = full, local = local, offset = offset, limit = limit, kinds = kinds, types = types) for item in search: source = item[1].target.source if source not in self._sources_map: source = 'stamped' entity = EntityProxyContainer().addProxy(item[1].target).buildEntity() # Hack to make sure entity_id is set (since it's not a part of a group) if source == 'stamped': entity.entity_id = item[1].target.key results.append(entity) return Entity.fast_id_dedupe(results)