Ejemplo n.º 1
0
def main(*args, **kwargs):

    if len(kwargs) == 0:
        print('No arguments/parameters passed. For more information on how to'\
         'use OldTweetsScraper, you may pass "-help" as a parameter.')
        return

#    try:
    opts = kwargs
    r = args[0]
    tweet_criteria = models.TweetCriteria()
    for opt, value in opts.items():
        if opt == 'username':
            tweet_criteria.username = value
        elif opt == 'since':
            tweet_criteria.since = value
        elif opt == 'until':
            tweet_criteria.until = value
        elif opt == 'query':
            tweet_criteria.query = value
        elif opt == 'max_tweets':
            tweet_criteria.max_tweets = value
        elif opt == 'language':
            tweet_criteria.language = value

    miner = controllers_q.Scraper()
    r, status, result = miner.get_tweets(tweet_criteria, r)
    print('Finished scraping data')
    return r, status, result
Ejemplo n.º 2
0
def getTwitterData(start_date,
                   end_date,
                   max_per_day,
                   hashtag,
                   main_file='tweets_gathered.csv'):
    run = True
    start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    step = datetime.timedelta(days=1)

    while (start <= end):
        start_date = str(start.date())
        print("Processing: %s" % (start_date))
        end_date = str((start + step).date())
        tweet_criteria = models.TweetCriteria()
        tweet_criteria.since = start_date
        tweet_criteria.until = end_date
        tweet_criteria.query = hashtag
        tweet_criteria.max_tweets = int(max_per_day)

        exporter = controllers.Exporter()
        miner = controllers.Scraper()
        miner.get_tweets(tweet_criteria,
                         buffer=exporter.output_to_file,
                         buffer_length=int(max_per_day))
        exporter.close()

        with open('tweets_gathered.csv') as f:
            num_lines = sum(1 for line in f)
        if (num_lines > 1):
            if (run):
                fout = open(main_file, "w")
                for line in open("tweets_gathered.csv"):
                    fout.write(line)
            else:
                fout = open(main_file, "a")
                f = open("tweets_gathered.csv")
                for line in f.readlines()[1:]:
                    fout.write(line)
                    f.close()
            fout.close()
            run = False

        start += step
Ejemplo n.º 3
0
def main(argv):

    if len(argv) == 0:
        print('No arguments/parameters passed. For more information on how to'\
         'use OldTweetsScraper, you may pass "-help" as a parameter.')
        return

    if len(argv) == 1 and argv[0] == '-help':
        instructions = 'You need to include parameters when running this file.'\
                    + 'At least one of the following arguments must be used:'\
                    + '\n query: query text to search for'\
                    + '\n username: twitter username'\
                    + '\n\nThe following arguments are optional and may be '\
                    + 'passed simultaneously:'\
                    + '\n since: lower bound for the date using format '\
                    + 'YYYY-MM-DD'\
                    + '\n until: upper bound for the date using format '\
                    + 'YYYY-MM-DD'\
                    + '\n max-tweets: maximum number of tweets to retrieve '\
                    + '(default: 100)\n'
        examples = '''
        #Example 1 - Get tweets by username [barackobama] and set max tweets to 1
            python main.py --username "barackobama" --max-tweets 1\n

        #Example 2 - Get tweets by query [#marcosNotAHero]
            python main.py --query "#marcosNotAHero" --max-tweets 1\n

        #example 3 - Get tweets by query and bound dates [#BenhamRise, '2016-01-01', '2017-04-01']
            python main.py --query "#BenhamRise" --since 2016-01-01 --until 2017-04-01
        '''

        print(instructions)
        print(examples)
        return

    try:
        opts, args = getopt.getopt(argv, '', ('username='******'since=',\
                    'until=', 'query=', 'max-tweets='))

        tweet_criteria = models.TweetCriteria()

        for opt, arg in opts:
            if opt == '--username':
                tweet_criteria.username = arg
            elif opt == '--since':
                tweet_criteria.since = arg
            elif opt == '--until':
                tweet_criteria.until = arg
            elif opt == '--query':
                tweet_criteria.query = arg
            elif opt == '--max-tweets':
                tweet_criteria.max_tweets = int(arg)

        exporter = controllers.Exporter()
        miner = controllers.Scraper()

        miner.get_tweets(tweet_criteria, buffer = exporter.output_to_file)
        exporter.close()

        text = 'Finished scraping data. Output file generated'\
            +' "tweets_gathered.csv"'
        print(text);
    except:
        text = 'Unexpected error. Please try again. For more information on'\
            + ' how to use this script, use the -help argument.'
        print(text)