class CompanyDuckDuckClassifier: def __init__(self): self.duckduckgo_search = DuckduckgoSearch(True) self.company_postfix = ['corp', 'corporation', 'company', 'inc', 'headquarters'] self.cache = Cache(DUCKDUCK_COMPANY_CACHE, INPUT_LANGUAGE) self.min_hits_to_match = DUCKDUCK_COMPANY_MIN_HITS_TO_MATCH self.cache.load() def classify(self, term): cache_result = self.cache.search_cache(term) if self.cache.search_cache(term) is not None: return self.normalize_results(ClassificationResult(term, cache_result.Matches)) result = ClassificationResult(term) for company_word in self.company_postfix: term_to_search = term + ' ' + company_word try: if self.duckduckgo_search.general_search(term_to_search) != '': result.Matches['company'] += 1 except Exception, e: return ClassificationResult(term, {key: -1 for key in categories}) self.cache.update_cache(term, result) return self.normalize_results(result)
class DuckDuckGoWordOccurrenceClassifier: def __init__(self): self.duckduckgo_search = DuckduckgoSearch(False) self.word_occurrence_classifier = WordOccurrenceClassifier() self.cache = Cache(DUCKDUCK_WORD_OCCURRENCE_CACHE, INPUT_LANGUAGE) self.cache.load() def classify(self, term): cache_result = self.cache.search_cache(term) if cache_result is not None: return self.word_occurrence_classifier.normalize_results(ClassificationResult(term, cache_result.Matches)) try: search_result = self.duckduckgo_search.general_search(term) except Exception, e: return ClassificationResult(term, {key: -1 for key in categories}) result = self.word_occurrence_classifier.calculate_score(term, search_result) self.cache.update_cache(term, result) return self.word_occurrence_classifier.normalize_results(result)
class FacebookSearch: company = {"company", "non-profit organization","organization", "professional services", "product/service", "government organization", "church/religious organization"} place = {"country", "city", "landmark", "public places", "historical place", "tours/sightseeing", "travel/leisure", "national park", "neighborhood"} person = {"politician", "public figure", "government official", "writer", "athlete", "artist", "musician/band", "news personality", "entertainer", "actor/director", "author", "comedian"} def __init__(self): ##https://developers.facebook.com/tools/explorer/ user_token = "CAACEdEose0cBAO9m1ZChz3qqjqcS3HBlerOSd3wteZC7EqQYlcbngZCmQtvhKQmMmE1sORZAzZA07PboTUXgskIcZBFKeA05FpASH2hEoZCW4im9ZCuNLAlgnOHc00YM5tZByxLCZBo33JkGmq4aPFDNil7FnJGCI7dxgJVwd1ZApkurkCB8WGdZBSJTdNc6zFoCUXFHAKZCZC8OAF6RSV0ljZAw9TA" extended_access_token = "CAABfbgndG3ABAOGqO5oQ1HwqVOYrlZB6CofIOciVPgMFC4zIRRk7wJvjrZBTIpFlJ3eTZA72fs4UKmyPgasMZA6MEtAaCSegvAju2zsUXAgaTRCxAfFjwrh9x8ZBLJ4lRlEHVWg0m6ZAWk9mMWdYpAVc27cIZCwi5IXO4t0U2fWkVzbAn0UrWJS" self.graph = GraphAPI(extended_access_token) self.fb_cache = Cache(FACEBOOK_CACHE,INPUT_LANGUAGE) self.fb_cache.load() self.sleep_count = 0 def shutdown(self): self.fb_cache.save() def search_Facebook(self, term): cache_result = self.fb_cache.search_cache(term) if cache_result is None: self.sleep_count +=1 if self.sleep_count == 5: time.sleep(random.randint(1,10)) self.sleep_count = 0 cnt = [0, 0, 0, 0] try: cnt = self.search_user(term, cnt) cnt = self.search_page(term, cnt) cnt = self.search_place(term, cnt) except Exception,e: #print e cnt = [-1, -1, -1, -1] self.fb_cache.update_cache_from_list(term,cnt) else: