Ejemplo n.º 1
0
def main():
    api = get_twitter_api()
    user = api.me()
    sys.stderr.write("-- Name: %s -- ID: %s \n" % (user.name, user.id))
    arg_parser = get_parser()
    args = arg_parser.parse_args()

    auth = get_twitter_auth()
    sys.stderr.write("Start scraping tweets:...\n")
    while True:
        try:
            # start using Streaming API to collect data:
            twitter_stream = \
                Stream(auth, MyListener(api, args.lang, DataBaseManager(args.database)))
            twitter_stream.filter(locations=city[args.city])
        except IncompleteRead as e:
            sys.stderr.write('[Error!] on IncompleteRead {}\n'.format(e))
            time.sleep(10)
            continue
        except KeyboardInterrupt as ex:
            # Or however you want to exit this loop
            sys.stderr.write('You cancel the program! {}\n'.format(ex))
            break
        except Exception as ept:
            sys.stderr.write('[Error!] on Exception {}\n'.format(ept))
            time.sleep(10)
            continue
class CustomListener(StreamListener):
    """Custom StreamListener for streaming Twitter data."""

    def __init__(self, fname):
        safe_fname = format_filename(fname)
        self.outfile = "stream_%s.jsonl" % safe_fname

    def on_data(self, data):
        try:
            with open(self.outfile, 'a') as f: # Create a file for appending
                f.write(data)
                return True
        except BaseException as e:
            sys.stderr.write("Error on_data: {}\n".format(e))
            time.sleep(5)
        return True
    
    def on_error(self, status):
        if status == 420:
            sys.stderr.write("Rate limit exceeded\n")
            return False
        else:
            sys.stderr.write("Error {}\n".format(status))
            retrun True
    
    def format_filename(fname):
        """ Convert fname into a safe string for a file name.

        Return: string
        """
        return ''.join(convert_valid(one_char) for one_char in fname)

    def convert_valid(one_char):
        """Convert a character into '_' if "invalid".

        Return: string
        """
        valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
        if one_char in valid_chars:
            return one_char
        else:
            return '_'
        
    if __name__ == '__main__':
        query = sys.argv[1:] # list of CLI arguments
        query_fname = ' '.join(query) # string
        auth = get_twitter_auth()
        twitter_stream = Stream(auth, CustomListener(query_fname))
        twitter_stream.filter(track=query, async=True)
    def observe_tweets(observable):
        class TweetListener(StreamListener):
            def on_data(self, data):
                insert_tweet(json.loads(data))
                observable.on_next(data)
                return True

            def on_error(self, status):
                if status == 420:
                    observable.on_error(status)
                    sys.stderr.write("Rate limit exceeded\n".format(status))
                    return False
                else:
                    sys.stderr.write("Error {}\n".format(status))
                    return True

        tweet_listener = TweetListener()
        auth = get_twitter_auth()

        stream = Stream(auth, tweet_listener)
        stream.filter(track=topics, locations=locations)
Ejemplo n.º 4
0
            sys.stderr.write("Rate limit exceeded\n".format(status))
            return False
        else:
            sys.stderr.write("Error {}\n".format(status))
            return True


def format_filename(fname):
    """Convert fname into a safe string for a file name.
    Return: string
    """
    return ''.join(convert_valid(one_char) for one_char in fname)


def convert_valid(one_char):
    """Convert a character into '_' if "invalid".
    Return: string
    """
    valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
    if one_char in valid_chars:
        return one_char
    else:
        return '_'


if __name__ == '__main__':
    query = sys.argv[1:]  # list of CLI arguments
    query_fname = ' '.join(query)  # string
    auth = get_twitter_auth()
    twitter_stream = Stream(auth, CustomListener(query_fname))
    twitter_stream.filter(track=query, is_async=True)
Ejemplo n.º 5
0
def get_wordcloud():
    words = dict(counter.most_common(30))
    wordcloud = WordCloud(
        background_color='white',
        font_path=font_path,
        mask=image_mask,
        stopwords=stopwords + punctuation,
    ).generate_from_frequencies(words)
    return wordcloud


def display_wordcloud():
    wordcloud = get_wordcloud()
    plt.imshow(wordcloud.recolor(color_func=image_colors),
               interpolation='bilinear')
    plt.axis('off')
    plt.pause(10.0)
    return True


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()
    query = [args.query]
    auth = twitter_client.get_twitter_auth()
    stream = Stream(auth, MyListener(query))
    stream.filter(track=query, async=True)
    while True:
        if counter:
            display_wordcloud()
            return False
        else:
            sys.stderr.write("Error {}\n".format(status))
            return True

def format_filename(fname):
    """Convert fname into a safe string for a file name.

    Return: string
    """
    return ''.join(convert_valid(one_char) for one_char in fname)


def convert_valid(one_char):
    """Convert a character into '_' if "invalid".

    Return: string
    """
    valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
    if one_char in valid_chars:
        return one_char
    else:
        return '_'

if __name__ == '__main__':
    query = sys.argv[1:] # list of CLI arguments
    query_fname = ' '.join(query) # string
    auth = get_twitter_auth()
    twitter_stream = Stream(auth, CustomListener(query_fname))
    twitter_stream.filter(track=query, async=True)