def download_tweets(search=None, profile=None, sleep=1):
    assert search or profile

    term = urllib.quote_plus(search or profile)
    url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL
    url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL

    response = requests.get(url.format(term=term), headers={'User-agent': USER_AGENT}).text
    max_position = find_value(response, 'data-max-position')
    min_position = find_value(response, 'data-min-position')

    for tweet in parse_search_results(response.encode('utf8')):
        yield tweet

    has_more_items = True
    while has_more_items:
        response = requests.get(url_more.format(term=term, max_position=min_position), headers={'User-agent': USER_AGENT}).text
        response_dict = json.loads(response)
        min_position = response_dict['min_position']
        has_more_items = response_dict['has_more_items'] if profile else False

        for tweet in parse_search_results(response_dict['items_html'].encode('utf8')):
            yield tweet

            if search:
                has_more_items = True

        time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP):
    assert search or profile

    term = (search or profile)
    url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL
    url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL

    response = requests.get(url.format(term=term), headers={'User-agent': USER_AGENT}).text
    max_position = find_value(response, 'data-max-position')
    min_position = find_value(response, 'data-min-position')

    for tweet in parse_search_results(response.encode('utf8')):
        yield tweet

    has_more_items = True
    while has_more_items:
        response = requests.get(url_more.format(term=term, max_position=min_position), headers={'User-agent': USER_AGENT}).text
        try:
            response_dict = json.loads(response)
        except:
            import datetime
            with open('__debug.response_%s.txt' % datetime.datetime.now().strftime('%Y-%m-%d.%H%M'), 'wb') as fh:
                print >>fh, repr(response)
            raise
        
        min_position = response_dict['min_position']
        has_more_items = response_dict['has_more_items'] if profile else False

        for tweet in parse_search_results(response_dict['items_html'].encode('utf8')):
            yield tweet

            if search:
                has_more_items = True

        time.sleep(sleep)
Beispiel #3
0
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP):
    assert search or profile

    term = (search or profile)
    url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL
    url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL

    response = requests.get(
        url.format(term=urllib.quote_plus(term)),
        headers={'User-agent': USER_AGENT})
    response_text = response.text
    min_position = find_value(response_text, 'data-min-position')

    for tweet in parse_search_results(response_text):
        yield tweet

    has_more_items = True
    last_min_position = None
    while has_more_items:
        response = requests.get(url_more.format(
            term=urllib.quote_plus(term), max_position=min_position),
            headers={'User-agent': USER_AGENT}
        )
        response_text = response.text
        try:
            response_dict = json.loads(response_text)
        except Exception:
            import datetime
            timestamp = datetime.datetime.now().strftime('%Y-%m-%d.%H%M')
            with open('__debug.response_%s.txt' % timestamp, 'wb') as fh:
                print >>fh, repr(response_text)
            raise

        min_position = response_dict['min_position']
        if profile:
            has_more_items = response_dict['has_more_items']
        else:
            has_more_items = last_min_position != min_position

        for tweet in parse_search_results(response_dict['items_html']):
            yield tweet

            if search:
                has_more_items = True

        last_min_position = min_position
        time.sleep(sleep)
Beispiel #4
0
def download_tweets(search=None, profile=None, sleep=DEFAULT_SLEEP):
    assert search or profile

    term = (search or profile)
    url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL
    url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL

    response = requests.get(url.format(term=urllib.quote_plus(term)), headers={'User-agent': USER_AGENT}).text
    max_position = find_value(response, 'data-max-position')
    min_position = find_value(response, 'data-min-position')

    for tweet in parse_search_results(response.encode('utf8')):
        yield tweet

    has_more_items = True
    last_min_position = None
    while has_more_items:
        response = requests.get(url_more.format(term=urllib.quote_plus(term), max_position=min_position), headers={'User-agent': USER_AGENT}).text
        try:
            response_dict = json.loads(response)
        except:
            import datetime
            with open('__debug.response_%s.txt' % datetime.datetime.now().strftime('%Y-%m-%d.%H%M'), 'wb') as fh:
                print >>fh, repr(response)
            raise
        
        min_position = response_dict['min_position']
        has_more_items = response_dict['has_more_items'] if profile else last_min_position != min_position

        for tweet in parse_search_results(response_dict['items_html'].encode('utf8')):
            yield tweet

            if search:
                has_more_items = True

        last_min_position = min_position
        time.sleep(sleep)
def download_tweets(search=None, profile=None, sleep=1):
    assert search or profile

    term = urllib.quote_plus(search or profile)
    url = TWITTER_SEARCH_URL if search else TWITTER_PROFILE_URL
    url_more = TWITTER_SEARCH_MORE_URL if search else TWITTER_PROFILE_MORE_URL

    response = requests.get(url.format(term=term),
                            headers={
                                'User-agent': USER_AGENT
                            }).text
    max_position = find_value(response, 'data-max-position')
    min_position = find_value(response, 'data-min-position')

    for tweet in parse_search_results(response.encode('utf8')):
        yield tweet

    has_more_items = True
    while has_more_items:
        response = requests.get(url_more.format(term=term,
                                                max_position=min_position),
                                headers={
                                    'User-agent': USER_AGENT
                                }).text
        response_dict = json.loads(response)
        min_position = response_dict['min_position']
        has_more_items = response_dict['has_more_items'] if profile else False

        for tweet in parse_search_results(
                response_dict['items_html'].encode('utf8')):
            yield tweet

            if search:
                has_more_items = True

        time.sleep(sleep)