Exemple #1
0
    def saveResult(self, tweet, query):
        try:
            sr = SearchResult()
            sr.screenName = tweet.user.screen_name
            sr.userId = tweet.user.id
            sr.tweetId = tweet.id
            #sr.text = tweet.text
            sr.query = query
            #sr.tweetedTime = tweet.created_at
            sr.lang = tweet.lang
            sr.save()

        except Exception as e:
            if e[0] == 1366:
                log.info("Error no 1366, removing text")
                sr.text = None
                sr.save()
            else:
                raise e
Exemple #2
0
    def saveResult(self, tweet, query):
        try:
            sr = SearchResult()
            sr.screenName = tweet.user.screen_name
            sr.userId = tweet.user.id
            sr.tweetId = tweet.id
            #sr.text = tweet.text
            sr.query=query
            #sr.tweetedTime = tweet.created_at
            sr.lang = tweet.lang
            sr.save()

        except Exception as e:
            if e[0] == 1366:
                log.info("Error no 1366, removing text")
                sr.text=None
                sr.save()
            else:
                raise e
        p.dump(sr)


if __name__ == '__main__':
    with open('queries.txt', 'r') as f:
        queries = f.readlines()
    random.shuffle(queries)
    search_results = []
    driver = webdriver.Firefox()
    for query in queries:
        if not query:
            continue
        src = get_text_with_query(driver, query)
        sr = SearchResult()
        sr.results += parse(src)
        sr.query = query

        time.sleep(WAIT)

        more_pages = parse_navigation(src)
        for p in more_pages:
            path = p.get('url')
            t = get_text(driver, BASE_URL + path)
            time.sleep(WAIT)
            buff = parse(t)
            sr.results += buff
        search_results.append(sr)

    driver.close()
    save_search_results(search_results)
Exemple #4
0
        p.dump(sr)


if __name__ == '__main__':
    with open('queries.txt', 'r') as f:
        queries = f.readlines()
    random.shuffle(queries)
    search_results = []
    driver = webdriver.Firefox()
    for query in queries:
        if not query:
            continue
        src = get_text_with_query(driver, query)
        sr = SearchResult()
        sr.results += parse(src)
        sr.query = query

        time.sleep(WAIT)

        more_pages = parse_navigation(src)
        for p in more_pages:
            path = p.get('url')
            t = get_text(driver, BASE_URL+path)
            time.sleep(WAIT)
            buff = parse(t)
            sr.results += buff
        search_results.append(sr)

    driver.close()
    save_search_results(search_results)