Exemple #1
0
def RunMLCollector(month, day):
    filename = config.GetSetsFileName(month, day, collection_type)
    sets = config.ReadJSON(filename)

    times = []
    for set_n in sets:
        for time in set_n['runtimes']:
            times.append(time)

    sorted_times = sorted(times)

    for sorted_time in sorted_times:
        for set_n in sets:
            if sorted_time in set_n['runtimes']:
                timer = sorted_time
                timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S')
                if timex > datetime.datetime.now():
                    delta = int(
                        (timex - datetime.datetime.now()).total_seconds())
                    print('Waiting', delta, 'seconds. Next Up', set_n['name'],
                          set_n['call'], timex)
                    sleeper.sleep(delta)

                    # Set API calls
                    if set_n['call'] == 1:
                        collection.FilterStatusByLocation(set_n)
                    else:
                        set_n['call'] = collection.GetUpdatedStatuses(set_n)
                    calltime = datetime.datetime.now()
                    set_n['call_times'].append(calltime)
                    config.WriteJSON(sets, filename)
                else:
                    print('skipping old runtime.')
def GetUpdatedStatuses(params):
    print('Updating tweets')

    month_num = params['month']
    day_num = params['day']
    set_name = params['set']
    call_num = params['call']

    filename = config.GetTweetFileName(month_num, day_num, set_name,
                                       call_num - 1)
    tweet_ids = config.GetTweetIds(filename)

    # Pass id get status
    twit_api = GetTwitterRest()
    statuses = []
    for tweet_id in tweet_ids:
        try:
            status = twit_api.statuses.show(id=tweet_id)
            statuses.append(status)
        except Exception as e:
            print(e, file=sys.stderr)
            print('Skipped tweet id:', tweet_id)
            continue

    # Update call count and write updated statuses to file
    filename = config.GetTweetFileName(month_num, day_num, set_name, call_num)
    call_num += 1
    config.WriteJSON(statuses, filename)
    print('Saved', len(statuses), 'statuses to file.')

    # Do i need to return this value or is it saved like streaming
    return call_num
def GetFollowers(userid):
    print('Getting followers of', userid)
    twit_api = GetTwitterRest()

    filename = config.GetUserFileName(userid)
    user = config.ReadJSON(filename)

    pageCount = 0
    followers = []
    next_cursor = -1
    while (next_cursor != 0 and pageCount < 5):
        if twit_api.application.rate_limit_status(
        )['resources']['followers']['/followers/list']['remaining'] > 0:
            follower = twit_api.followers.list(user_id=userid,
                                               count=200,
                                               cursor=next_cursor)
            influence_score = 0
            for user in follower:
                influence_score = (user['followers_count'] *
                                   config.GetWeights()['followers_count']) + (
                                       user['listed_count'] *
                                       config.GetWeights()['listed_count'])
                user['influence_score'] = influence_score

            followers.append(follower['users'])
            next_cursor = follower['next_cursor']
            pageCount += 1
        else:
            print("Sleeping")
            delta = 15 * 60
            sleeper.sleep(delta)

    user['followers'] = followers
    config.WriteJSON(user, filename)
def GetUser(user_id):
    print('Getting users')
    twit_api = GetTwitterRest()
    user = twit_api.users.show(user_id=user_id)

    filename = config.GetUserFileName(userid)
    config.WriteJSON(user, filename)
def ReadTweets(list_ofQueries, search_location):
    twit_api = GetTwitterRest()

    for x in range(0, len(list_ofQueries)):
        query = list_ofQueries[x]
        search_results = twitter_api.search.tweets(q=query,
                                                   geocode=search_location,
                                                   count=100)

        filename = GetTweetsFolder() + query + '.json'
        config.WriteJSON(search_results, filename)
def FilterStatusByLocation(params):
    print('Getting new tweets by location')

    month_num = params['month']
    day_num = params['day']
    set_name = params['set']
    call_num = params['call']
    search_box = params['boundaries']

    if call_num == 1:

        max_tweets = 100
        twit_stream = GetTwitterStream()
        try:
            stream = twit_stream.statuses.filter(locations=search_box)
            # Load tweets to list
            statuses = []
            for status in stream:
                statuses.append(status)
                if len(statuses) == max_tweets:
                    break

            # Write tweets to file
            filename = config.GetTweetFileName(month_num, day_num, set_name,
                                               call_num)
            config.WriteJSON(statuses, filename)
            call_num += 1
            print('Saved', len(statuses), 'statuses to file.')

        except Exception as e:
            print(e, file=sys.stderr)
            print('Could not get statuses.')

        return call_num

    else:
        print('Error. Check call number.')
Exemple #7
0
def MakeSets(year, month, day, collection_interval, interval_identifier,
             user_schedule, collection_type, boundaries):

    for interval in range(1, collection_interval + 1):

        # Get start times from schedule
        num_sets = len(user_schedule.columns)
        num_calls = len(user_schedule.index)
        cols = list(user_schedule.columns)
        all_times = []
        for col in cols:
            set_runtimes = user_schedule[col]
            for set_runtime in set_runtimes:
                # Create starting time for day. All times midnight to start time get called on following day but recorded for set start day.
                a = datetime.time(hour=6)
                if set_runtime < a:
                    StartDate = datetime.datetime(year, month, day + 1)
                else:
                    StartDate = datetime.datetime(year, month, day)
                runtime = datetime.datetime.combine(StartDate, set_runtime)
                all_times.append(runtime)

        # Create sets obj
        sets = []
        j = 0
        for i in range(1, num_sets + 1):
            k = j + num_calls
            name = 'set_' + str(i)
            runs = all_times[j:k]
            day = day
            set_x = {
                'interval_name': interval_identifier,
                'total_intervals': collection_interval,
                'collection_type': collection_type,
                'interval': interval,
                'year': year,
                'month': month,
                'day': day,
                'name': name,
                'set': i,
                'call': 1,
                'runtimes': runs,
                'call_times': [],
                'boundaries': boundaries
            }
            sets.append(set_x)
            j += num_calls

        # Add month and day folders if needed
        APP_ROOT = os.path.dirname(os.path.abspath(__file__))
        tweets_folder = config.GetTweetsFolder().replace('../', '')
        MONTH_ROOT = APP_ROOT.replace(
            'scripts',
            tweets_folder + '/' + collection_type + '/M' + str(month))
        if not os.path.isdir(MONTH_ROOT):
            os.mkdir(MONTH_ROOT)

        DAY_ROOT = MONTH_ROOT + "/D" + str(day)
        if not os.path.isdir(DAY_ROOT):
            os.mkdir(DAY_ROOT)

        config.WriteJSON(sets,
                         config.GetSetsFileName(month, day, collection_type))

        # increment day. if day is in next month, increment month, set day to 1
        # Add year by EOY!
        day += 1
        try:
            datetime.datetime(year, month, day)
        except ValueError as e:
            if str(e) == "day is out of range for month":
                month += 1
                day = 1
            try:
                datetime.datetime(year, month, day)
            except ValueError as e:
                if str(e) == "month must be in 1..12":
                    month = 1
Exemple #8
0
def RunNWCollector(month, day, localPlaces):
    import config

    filename = config.GetSetsFileName(month, day, collection_type)
    sets = config.ReadJSON(filename)

    # localPlaces = ['Erie, PA','Wesleyville, PA','Harborcreek, PA','Lawrence Park, PA']

    times = []
    for set_n in sets:
        for time in set_n['runtimes']:
            times.append(time)

    sorted_times = sorted(times)

    for sorted_time in sorted_times:
        for set_n in sets:
            if sorted_time in set_n['runtimes']:
                timer = sorted_time
                timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S')
                if timex > datetime.datetime.now():
                    delta = int(
                        (timex - datetime.datetime.now()).total_seconds())
                    print('Waiting', delta, 'seconds. Next Up', set_n['name'],
                          set_n['call'], timex)
                    sleeper.sleep(delta)

                    # Get tweets
                    collection.FilterStatusByLocation(set_n)

                    # Update sets
                    calltime = datetime.datetime.now()
                    set_n['call_times'].append(calltime)
                    config.WriteJSON(sets, filename)

                    # Check each user, if in Erie write to users and get friends/followers
                    tweets = config.ReadJSON(
                        config.GetTweetFileName(set_n['month'], set_n['day'],
                                                set_n['set'], set_n['call']))
                    newUsers = []
                    for tweet in tweets:
                        influence_score = 0
                        if tweet['place'] and tweet['place'][
                                'full_name'] in localPlaces:
                            influence_score = (
                                tweet['user']['followers_count'] *
                                config.GetWeights()['followers_count']) + (
                                    tweet['user']['listed_count'] *
                                    config.GetWeights()['listed_count'])
                            tweet['user']['influence_score'] = influence_score
                            newUsers.append(tweet['user'])

                    # Write each user to file, get friends, followers
                    for user in newUsers:
                        filename = config.GetUserFileName(user['id_str'])
                        config = Path(filename)
                        if config.is_file():
                            # Update this - need the most current version but not if user is in this set
                            print('User', user['id_str'],
                                  'already exists. Skipping for now.')
                        else:
                            print('Writing user', user['id_str'])
                            config.WriteJSON(user, filename)
                            collection.GetFriends(user['id_str'])
                            collection.GetFollowers(user['id_str'])
                else:
                    print('skipping old runtime.')