Ejemplo n.º 1
0
def update_tweets(tweet_batch):
    if not tweet_batch.count():
        return

    tweet_batch = list(tweet_batch)
    client = get_client('statuses_lookup')
    try:
        responses = client.call('statuses_lookup',
                                id_=[tweet._ident for tweet in tweet_batch],
                                trim_user=True)
    except tweepy.error.TweepError:
        log('got tweepy.error.TweepError!')
        log('tweet ids = %s' % [tweet._ident for tweet in tweet_batch])
        return_client(client)
        raise
    return_client(client)

    for response in responses:
        monitor_stop_flag()
        tweet = next(
            (tweet
             for tweet in tweet_batch if tweet._ident == response._json['id']),
            None)
        if tweet:
            global_task_queue.add(update_tweet_from_response, [response])
            tweet_batch.remove(tweet)

    deleted_count = 0
    for tweet in tweet_batch:
        deleted_count += 1
        tweet.deleted_at = today()
        tweet.save()
    if deleted_count > 0:
        log("{} tweets have been deleted".format(deleted_count))
def _fetch_tweets_from_html(term, since, until):
    monitor_stop_flag()
    url = 'https://twitter.com/search?q={} since%3A{} until%3A{}'.format(
        term, since.strftime("%Y-%m-%d"), until.strftime("%Y-%m-%d"))
    log(url)
    request = Request(url, headers={
        'User-Agent': random.choice(BROWSER_USER_AGENTS),
        'Host': 'twitter.com',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,fr-CA;q=0.8,en;q=0.5,fr;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Referer': 'https://twitter.com/',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Cache-Control': 'max-age=0, no-cache',
        'TE': 'Trailers',
        'Pragma': 'no-cache',
    })

    try:
        data = urlopen(request, timeout=5, context=ssl._create_unverified_context())
        page = bs(data, "html.parser")
    except socket.timeout:
        log('Socket timeout while fetching tweets from hashtag: #{}'.format(term))
        safe_sleep(1)
        return _fetch_tweets_from_html(term, since, until)
    tweets = page.find_all('li', {"data-item-type": "tweet"})
    tweet_list = [int(tweet['data-item-id']) for tweet in tweets if tweet.has_attr('data-item-id')]
    return tweet_list
def update_twitter_users(twitter_user_batch):
    if not twitter_user_batch.count():
        return

    twitter_user_batch = list(twitter_user_batch)
    client = get_client('lookup_users')
    try:
        responses = client.call(
            'lookup_users',
            user_ids=[user._ident for user in twitter_user_batch])
    except tweepy.error.TweepError:
        log('got tweepy.error.TweepError!')
        log('user_ids = %s' % [user._ident for user in twitter_user_batch])
        return_client(client)
        raise
    return_client(client)

    for response in responses:
        monitor_stop_flag()
        tw_user = next((user for user in twitter_user_batch
                        if user._ident == response._json['id']), None)
        if tw_user:
            global_task_queue.add(update_twitter_user_from_response,
                                  args=[tw_user, response._json])
            twitter_user_batch.remove(tw_user)
    for tw_user in twitter_user_batch:
        log('Twitter user (%s) has returned no result.' % tw_user)
        # twUser._error_on_update = True
        tw_user._last_updated = today()
        tw_user._update_frequency = 5
        tw_user.save()
    def execute(self):
        while global_task_queue.empty():
            monitor_stop_flag()

        self.current_task, self.current_args, self.current_kwargs = global_task_queue.get(
        )
        if not self.current_task:
            return
        # log('Consuming task: %s' % self.current_task.__name__)
        self.current_task(*self.current_args, **self.current_kwargs)
        self.current_task = self.current_args = self.current_kwargs = None
Ejemplo n.º 5
0
 def next(self):
     monitor_stop_flag()
     if self.index == -1:
         return None
     if self.index < self.nbItems:
         item = self.results[self.index]
         self.index += 1
         return item
     else:
         self._get_next_set()
         return self.next()
def harvest_twitter_hashtag(twitter_hashtag_harvester):
    twitter_hashtag = twitter_hashtag_harvester.twitter_hashtag
    log(twitter_hashtag)
    while True:
        monitor_stop_flag()

        tweet_ids = _fetch_tweets_from_html(
            twitter_hashtag.term,
            twitter_hashtag_harvester.harvest_since,
            twitter_hashtag_harvester.harvest_until
        )
        pretty(tweet_ids)
Ejemplo n.º 7
0
def get_client(call_name):
    client = None
    if not clients_queue.empty():
        client = clients_queue.get()
    while not client or client.get_remaining_calls(call_name) <= 0:
        monitor_stop_flag()
        if client:
            clients_queue.put(client)
            client = None
        if not clients_queue.empty():
            client = clients_queue.get()
    # client.pretty_limit_status()
    return client