class DuckDuckGoWordOccurrenceClassifier: def __init__(self): self.duckduckgo_search = DuckduckgoSearch(False) self.word_occurrence_classifier = WordOccurrenceClassifier() self.cache = Cache(DUCKDUCK_WORD_OCCURRENCE_CACHE, INPUT_LANGUAGE) self.cache.load() def classify(self, term): cache_result = self.cache.search_cache(term) if cache_result is not None: return self.word_occurrence_classifier.normalize_results(ClassificationResult(term, cache_result.Matches)) try: search_result = self.duckduckgo_search.general_search(term) except Exception, e: return ClassificationResult(term, {key: -1 for key in categories}) result = self.word_occurrence_classifier.calculate_score(term, search_result) self.cache.update_cache(term, result) return self.word_occurrence_classifier.normalize_results(result)
class CompanyDuckDuckClassifier: def __init__(self): self.duckduckgo_search = DuckduckgoSearch(True) self.company_postfix = ['corp', 'corporation', 'company', 'inc', 'headquarters'] self.cache = Cache(DUCKDUCK_COMPANY_CACHE, INPUT_LANGUAGE) self.min_hits_to_match = DUCKDUCK_COMPANY_MIN_HITS_TO_MATCH self.cache.load() def classify(self, term): cache_result = self.cache.search_cache(term) if self.cache.search_cache(term) is not None: return self.normalize_results(ClassificationResult(term, cache_result.Matches)) result = ClassificationResult(term) for company_word in self.company_postfix: term_to_search = term + ' ' + company_word try: if self.duckduckgo_search.general_search(term_to_search) != '': result.Matches['company'] += 1 except Exception, e: return ClassificationResult(term, {key: -1 for key in categories}) self.cache.update_cache(term, result) return self.normalize_results(result)
def __init__(self): self.duckduckgo_search = DuckduckgoSearch(True) self.company_postfix = ['corp', 'corporation', 'company', 'inc', 'headquarters'] self.cache = Cache(DUCKDUCK_COMPANY_CACHE, INPUT_LANGUAGE) self.min_hits_to_match = DUCKDUCK_COMPANY_MIN_HITS_TO_MATCH self.cache.load()
def __init__(self): self.duckduckgo_search = DuckduckgoSearch(False) self.word_occurrence_classifier = WordOccurrenceClassifier() self.cache = Cache(DUCKDUCK_WORD_OCCURRENCE_CACHE, INPUT_LANGUAGE) self.cache.load()