Exemple #1
0
class CompanyDuckDuckClassifier:
    def __init__(self):
        self.duckduckgo_search = DuckduckgoSearch(True)
        self.company_postfix = ['corp', 'corporation', 'company', 'inc', 'headquarters']
        self.cache = Cache(DUCKDUCK_COMPANY_CACHE, INPUT_LANGUAGE)
        self.min_hits_to_match = DUCKDUCK_COMPANY_MIN_HITS_TO_MATCH
        self.cache.load()

    def classify(self, term):
        cache_result = self.cache.search_cache(term)
        if self.cache.search_cache(term) is not None:
            return self.normalize_results(ClassificationResult(term, cache_result.Matches))
        result = ClassificationResult(term)
        for company_word in self.company_postfix:
            term_to_search = term + ' ' + company_word
            try:
                if self.duckduckgo_search.general_search(term_to_search) != '':
                    result.Matches['company'] += 1
            except Exception, e:
                return ClassificationResult(term, {key: -1 for key in categories})
        self.cache.update_cache(term, result)
        return self.normalize_results(result)
Exemple #2
0
class DuckDuckGoWordOccurrenceClassifier:
    def __init__(self):
        self.duckduckgo_search = DuckduckgoSearch(False)
        self.word_occurrence_classifier = WordOccurrenceClassifier()
        self.cache = Cache(DUCKDUCK_WORD_OCCURRENCE_CACHE, INPUT_LANGUAGE)
        self.cache.load()

    def classify(self, term):
        cache_result = self.cache.search_cache(term)
        if cache_result is not None:
            return self.word_occurrence_classifier.normalize_results(ClassificationResult(term, cache_result.Matches))
        try:
            search_result = self.duckduckgo_search.general_search(term)
        except Exception, e:
            return ClassificationResult(term, {key: -1 for key in categories})

        result = self.word_occurrence_classifier.calculate_score(term, search_result)
        self.cache.update_cache(term, result)
        return self.word_occurrence_classifier.normalize_results(result)
Exemple #3
0
class FacebookSearch:
    company = {"company", "non-profit organization","organization", "professional services", "product/service", "government organization", "church/religious organization"}
    place = {"country", "city", "landmark", "public places", "historical place", "tours/sightseeing", "travel/leisure",
             "national park", "neighborhood"}
    person = {"politician", "public figure", "government official", "writer", "athlete", "artist", "musician/band",
              "news personality", "entertainer", "actor/director", "author", "comedian"}

    def __init__(self):
        ##https://developers.facebook.com/tools/explorer/
        user_token = "CAACEdEose0cBAO9m1ZChz3qqjqcS3HBlerOSd3wteZC7EqQYlcbngZCmQtvhKQmMmE1sORZAzZA07PboTUXgskIcZBFKeA05FpASH2hEoZCW4im9ZCuNLAlgnOHc00YM5tZByxLCZBo33JkGmq4aPFDNil7FnJGCI7dxgJVwd1ZApkurkCB8WGdZBSJTdNc6zFoCUXFHAKZCZC8OAF6RSV0ljZAw9TA"
        extended_access_token = "CAABfbgndG3ABAOGqO5oQ1HwqVOYrlZB6CofIOciVPgMFC4zIRRk7wJvjrZBTIpFlJ3eTZA72fs4UKmyPgasMZA6MEtAaCSegvAju2zsUXAgaTRCxAfFjwrh9x8ZBLJ4lRlEHVWg0m6ZAWk9mMWdYpAVc27cIZCwi5IXO4t0U2fWkVzbAn0UrWJS"
        self.graph = GraphAPI(extended_access_token)
        self.fb_cache = Cache(FACEBOOK_CACHE,INPUT_LANGUAGE)
        self.fb_cache.load()
        self.sleep_count = 0

    def shutdown(self):
        self.fb_cache.save()

    def search_Facebook(self, term):
        cache_result = self.fb_cache.search_cache(term)
        if cache_result is None:
            self.sleep_count +=1
            if self.sleep_count == 5:
                time.sleep(random.randint(1,10))
                self.sleep_count = 0
            cnt = [0, 0, 0, 0]
            try:
                cnt = self.search_user(term, cnt)
                cnt = self.search_page(term, cnt)
                cnt = self.search_place(term, cnt)
            except Exception,e:
                #print e
                cnt = [-1, -1, -1, -1]
            self.fb_cache.update_cache_from_list(term,cnt)
        else: