def PreviewSchedule(month, day, collection_type): sets = config.ReadJSON(config.GetSetsFileName(month, day, collection_type)) print("\n") print("Preview Schedule") print("____________________________________________") print("Interval Name:", sets[0]['interval_name']) print("Interval Type:", sets[0]['interval_type']) print("Total Intervals:", sets[0]['total_intervals']) print("Interval Number:", sets[0]['interval']) times = [] for set_n in sets: for time in set_n['runtimes']: times.append(time) print('\n') print("Number Sets:", len(sets)) print("Number Calls:", len(sets[0]['runtimes'])) print("Current Call:", sets[0]['call']) print('\n') sorted_times = sorted(times) call_copy = 1 for sorted_time in sorted_times: for set_n in sets: if sorted_time in set_n['runtimes']: timer = sorted_time timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S') print('Set:', set_n['set'], ' Call:', call_copy, 'Runtime:', timer) call_copy += 1
def RunMLCollector(month, day): filename = config.GetSetsFileName(month, day, collection_type) sets = config.ReadJSON(filename) times = [] for set_n in sets: for time in set_n['runtimes']: times.append(time) sorted_times = sorted(times) for sorted_time in sorted_times: for set_n in sets: if sorted_time in set_n['runtimes']: timer = sorted_time timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S') if timex > datetime.datetime.now(): delta = int( (timex - datetime.datetime.now()).total_seconds()) print('Waiting', delta, 'seconds. Next Up', set_n['name'], set_n['call'], timex) sleeper.sleep(delta) # Set API calls if set_n['call'] == 1: collection.FilterStatusByLocation(set_n) else: set_n['call'] = collection.GetUpdatedStatuses(set_n) calltime = datetime.datetime.now() set_n['call_times'].append(calltime) config.WriteJSON(sets, filename) else: print('skipping old runtime.')
def GetFollowers(userid): print('Getting followers of', userid) twit_api = GetTwitterRest() filename = config.GetUserFileName(userid) user = config.ReadJSON(filename) pageCount = 0 followers = [] next_cursor = -1 while (next_cursor != 0 and pageCount < 5): if twit_api.application.rate_limit_status( )['resources']['followers']['/followers/list']['remaining'] > 0: follower = twit_api.followers.list(user_id=userid, count=200, cursor=next_cursor) influence_score = 0 for user in follower: influence_score = (user['followers_count'] * config.GetWeights()['followers_count']) + ( user['listed_count'] * config.GetWeights()['listed_count']) user['influence_score'] = influence_score followers.append(follower['users']) next_cursor = follower['next_cursor'] pageCount += 1 else: print("Sleeping") delta = 15 * 60 sleeper.sleep(delta) user['followers'] = followers config.WriteJSON(user, filename)
def ExtractTweets(month, days, sets, calls, num_tweets, collection_type, clean=True, useWeights=True, wrapEntities=True): # Change days to interval days to pull from multiple months. # Determine file combos file_combos = [] for i in days: for j in sets: for k in calls: file_combo = (i, j, k) file_combos.append(file_combo) # Get statuses for all combos statusCollection = [] for file_combo in file_combos: day_num = file_combo[0] set_name = file_combo[1] call_num = file_combo[2] filename = config.GetTweetFileName(month, day_num, set_name, call_num, collection_type) if filename != '': # Add in sample data status = config.ReadJSON(filename) filename = config.GetSetsFileName(month, day_num, collection_type) sets = config.ReadJSON(filename) # Validate numtweets if num_tweets <= 0: num_tweets = 10 elif num_tweets > len(status): num_tweets = len(status) # Add each set attribute status = status[:num_tweets] for tweet in status: tweet['day'] = day_num tweet['set'] = set_name tweet['call'] = call_num offset = datetime.timedelta(hours=5) time = sets[set_name - 1]['call_times'][call_num - 1] tweet['calltime'] = time if useWeights == True: influence_score = ( tweet['favorite_count'] * config.GetWeights()['favorite_count']) + ( tweet['retweet_count'] * config.GetWeights()['retweet_count']) + ( tweet['user']['followers_count'] * config.GetWeights()['followers_count']) + ( tweet['user']['listed_count'] * config.GetWeights()['listed_count']) else: influence_score = tweet['favorite_count'] + tweet[ 'retweet_count'] tweet['influence_score'] = influence_score statusCollection.append(status) # Load each into dict count = 0 status_dict = {} for statuses in statusCollection: for status in statuses: status_dict[count] = status count += 1 # Get sorted df from dicts tweets = pd.DataFrame(status_dict).T # tweets = tweets.sort_values(by=['influence_score'],ascending=False) if clean: tweets = CleanTweets(tweets) if wrapEntities: tweets = WrapEntities(tweets) return tweets
def RunNWCollector(month, day, localPlaces): import config filename = config.GetSetsFileName(month, day, collection_type) sets = config.ReadJSON(filename) # localPlaces = ['Erie, PA','Wesleyville, PA','Harborcreek, PA','Lawrence Park, PA'] times = [] for set_n in sets: for time in set_n['runtimes']: times.append(time) sorted_times = sorted(times) for sorted_time in sorted_times: for set_n in sets: if sorted_time in set_n['runtimes']: timer = sorted_time timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S') if timex > datetime.datetime.now(): delta = int( (timex - datetime.datetime.now()).total_seconds()) print('Waiting', delta, 'seconds. Next Up', set_n['name'], set_n['call'], timex) sleeper.sleep(delta) # Get tweets collection.FilterStatusByLocation(set_n) # Update sets calltime = datetime.datetime.now() set_n['call_times'].append(calltime) config.WriteJSON(sets, filename) # Check each user, if in Erie write to users and get friends/followers tweets = config.ReadJSON( config.GetTweetFileName(set_n['month'], set_n['day'], set_n['set'], set_n['call'])) newUsers = [] for tweet in tweets: influence_score = 0 if tweet['place'] and tweet['place'][ 'full_name'] in localPlaces: influence_score = ( tweet['user']['followers_count'] * config.GetWeights()['followers_count']) + ( tweet['user']['listed_count'] * config.GetWeights()['listed_count']) tweet['user']['influence_score'] = influence_score newUsers.append(tweet['user']) # Write each user to file, get friends, followers for user in newUsers: filename = config.GetUserFileName(user['id_str']) config = Path(filename) if config.is_file(): # Update this - need the most current version but not if user is in this set print('User', user['id_str'], 'already exists. Skipping for now.') else: print('Writing user', user['id_str']) config.WriteJSON(user, filename) collection.GetFriends(user['id_str']) collection.GetFollowers(user['id_str']) else: print('skipping old runtime.')