Esempio n. 1
0
class DuckDuckGoWordOccurrenceClassifier:
    def __init__(self):
        self.duckduckgo_search = DuckduckgoSearch(False)
        self.word_occurrence_classifier = WordOccurrenceClassifier()
        self.cache = Cache(DUCKDUCK_WORD_OCCURRENCE_CACHE, INPUT_LANGUAGE)
        self.cache.load()

    def classify(self, term):
        cache_result = self.cache.search_cache(term)
        if cache_result is not None:
            return self.word_occurrence_classifier.normalize_results(ClassificationResult(term, cache_result.Matches))
        try:
            search_result = self.duckduckgo_search.general_search(term)
        except Exception, e:
            return ClassificationResult(term, {key: -1 for key in categories})

        result = self.word_occurrence_classifier.calculate_score(term, search_result)
        self.cache.update_cache(term, result)
        return self.word_occurrence_classifier.normalize_results(result)
Esempio n. 2
0
class CompanyDuckDuckClassifier:
    def __init__(self):
        self.duckduckgo_search = DuckduckgoSearch(True)
        self.company_postfix = ['corp', 'corporation', 'company', 'inc', 'headquarters']
        self.cache = Cache(DUCKDUCK_COMPANY_CACHE, INPUT_LANGUAGE)
        self.min_hits_to_match = DUCKDUCK_COMPANY_MIN_HITS_TO_MATCH
        self.cache.load()

    def classify(self, term):
        cache_result = self.cache.search_cache(term)
        if self.cache.search_cache(term) is not None:
            return self.normalize_results(ClassificationResult(term, cache_result.Matches))
        result = ClassificationResult(term)
        for company_word in self.company_postfix:
            term_to_search = term + ' ' + company_word
            try:
                if self.duckduckgo_search.general_search(term_to_search) != '':
                    result.Matches['company'] += 1
            except Exception, e:
                return ClassificationResult(term, {key: -1 for key in categories})
        self.cache.update_cache(term, result)
        return self.normalize_results(result)