예제 #1
0
def PreviewSchedule(month, day, collection_type):

    sets = config.ReadJSON(config.GetSetsFileName(month, day, collection_type))

    print("\n")
    print("Preview Schedule")
    print("____________________________________________")
    print("Interval Name:", sets[0]['interval_name'])
    print("Interval Type:", sets[0]['interval_type'])
    print("Total Intervals:", sets[0]['total_intervals'])
    print("Interval Number:", sets[0]['interval'])

    times = []
    for set_n in sets:
        for time in set_n['runtimes']:
            times.append(time)

    print('\n')
    print("Number Sets:", len(sets))
    print("Number Calls:", len(sets[0]['runtimes']))
    print("Current Call:", sets[0]['call'])
    print('\n')

    sorted_times = sorted(times)

    call_copy = 1
    for sorted_time in sorted_times:
        for set_n in sets:
            if sorted_time in set_n['runtimes']:
                timer = sorted_time
                timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S')
                print('Set:', set_n['set'], ' Call:', call_copy, 'Runtime:',
                      timer)
                call_copy += 1
예제 #2
0
def RunMLCollector(month, day):
    filename = config.GetSetsFileName(month, day, collection_type)
    sets = config.ReadJSON(filename)

    times = []
    for set_n in sets:
        for time in set_n['runtimes']:
            times.append(time)

    sorted_times = sorted(times)

    for sorted_time in sorted_times:
        for set_n in sets:
            if sorted_time in set_n['runtimes']:
                timer = sorted_time
                timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S')
                if timex > datetime.datetime.now():
                    delta = int(
                        (timex - datetime.datetime.now()).total_seconds())
                    print('Waiting', delta, 'seconds. Next Up', set_n['name'],
                          set_n['call'], timex)
                    sleeper.sleep(delta)

                    # Set API calls
                    if set_n['call'] == 1:
                        collection.FilterStatusByLocation(set_n)
                    else:
                        set_n['call'] = collection.GetUpdatedStatuses(set_n)
                    calltime = datetime.datetime.now()
                    set_n['call_times'].append(calltime)
                    config.WriteJSON(sets, filename)
                else:
                    print('skipping old runtime.')
예제 #3
0
def GetFollowers(userid):
    print('Getting followers of', userid)
    twit_api = GetTwitterRest()

    filename = config.GetUserFileName(userid)
    user = config.ReadJSON(filename)

    pageCount = 0
    followers = []
    next_cursor = -1
    while (next_cursor != 0 and pageCount < 5):
        if twit_api.application.rate_limit_status(
        )['resources']['followers']['/followers/list']['remaining'] > 0:
            follower = twit_api.followers.list(user_id=userid,
                                               count=200,
                                               cursor=next_cursor)
            influence_score = 0
            for user in follower:
                influence_score = (user['followers_count'] *
                                   config.GetWeights()['followers_count']) + (
                                       user['listed_count'] *
                                       config.GetWeights()['listed_count'])
                user['influence_score'] = influence_score

            followers.append(follower['users'])
            next_cursor = follower['next_cursor']
            pageCount += 1
        else:
            print("Sleeping")
            delta = 15 * 60
            sleeper.sleep(delta)

    user['followers'] = followers
    config.WriteJSON(user, filename)
예제 #4
0
def ExtractTweets(month,
                  days,
                  sets,
                  calls,
                  num_tweets,
                  collection_type,
                  clean=True,
                  useWeights=True,
                  wrapEntities=True):
    # Change days to interval days to pull from multiple months.
    # Determine file combos
    file_combos = []
    for i in days:
        for j in sets:
            for k in calls:
                file_combo = (i, j, k)
                file_combos.append(file_combo)

    # Get statuses for all combos
    statusCollection = []
    for file_combo in file_combos:
        day_num = file_combo[0]
        set_name = file_combo[1]
        call_num = file_combo[2]
        filename = config.GetTweetFileName(month, day_num, set_name, call_num,
                                           collection_type)
        if filename != '':
            # Add in sample data
            status = config.ReadJSON(filename)
            filename = config.GetSetsFileName(month, day_num, collection_type)
            sets = config.ReadJSON(filename)
            # Validate numtweets
            if num_tweets <= 0:
                num_tweets = 10
            elif num_tweets > len(status):
                num_tweets = len(status)
            # Add each set attribute
            status = status[:num_tweets]
            for tweet in status:
                tweet['day'] = day_num
                tweet['set'] = set_name
                tweet['call'] = call_num
                offset = datetime.timedelta(hours=5)
                time = sets[set_name - 1]['call_times'][call_num - 1]
                tweet['calltime'] = time
                if useWeights == True:
                    influence_score = (
                        tweet['favorite_count'] *
                        config.GetWeights()['favorite_count']) + (
                            tweet['retweet_count'] *
                            config.GetWeights()['retweet_count']) + (
                                tweet['user']['followers_count'] *
                                config.GetWeights()['followers_count']) + (
                                    tweet['user']['listed_count'] *
                                    config.GetWeights()['listed_count'])
                else:
                    influence_score = tweet['favorite_count'] + tweet[
                        'retweet_count']

                tweet['influence_score'] = influence_score

            statusCollection.append(status)

    # Load each into dict
    count = 0
    status_dict = {}
    for statuses in statusCollection:
        for status in statuses:
            status_dict[count] = status
            count += 1

    # Get sorted df from dicts
    tweets = pd.DataFrame(status_dict).T
    # tweets = tweets.sort_values(by=['influence_score'],ascending=False)

    if clean:
        tweets = CleanTweets(tweets)

    if wrapEntities:
        tweets = WrapEntities(tweets)

    return tweets
예제 #5
0
def RunNWCollector(month, day, localPlaces):
    import config

    filename = config.GetSetsFileName(month, day, collection_type)
    sets = config.ReadJSON(filename)

    # localPlaces = ['Erie, PA','Wesleyville, PA','Harborcreek, PA','Lawrence Park, PA']

    times = []
    for set_n in sets:
        for time in set_n['runtimes']:
            times.append(time)

    sorted_times = sorted(times)

    for sorted_time in sorted_times:
        for set_n in sets:
            if sorted_time in set_n['runtimes']:
                timer = sorted_time
                timex = datetime.datetime.strptime(timer, '%Y-%m-%d %H:%M:%S')
                if timex > datetime.datetime.now():
                    delta = int(
                        (timex - datetime.datetime.now()).total_seconds())
                    print('Waiting', delta, 'seconds. Next Up', set_n['name'],
                          set_n['call'], timex)
                    sleeper.sleep(delta)

                    # Get tweets
                    collection.FilterStatusByLocation(set_n)

                    # Update sets
                    calltime = datetime.datetime.now()
                    set_n['call_times'].append(calltime)
                    config.WriteJSON(sets, filename)

                    # Check each user, if in Erie write to users and get friends/followers
                    tweets = config.ReadJSON(
                        config.GetTweetFileName(set_n['month'], set_n['day'],
                                                set_n['set'], set_n['call']))
                    newUsers = []
                    for tweet in tweets:
                        influence_score = 0
                        if tweet['place'] and tweet['place'][
                                'full_name'] in localPlaces:
                            influence_score = (
                                tweet['user']['followers_count'] *
                                config.GetWeights()['followers_count']) + (
                                    tweet['user']['listed_count'] *
                                    config.GetWeights()['listed_count'])
                            tweet['user']['influence_score'] = influence_score
                            newUsers.append(tweet['user'])

                    # Write each user to file, get friends, followers
                    for user in newUsers:
                        filename = config.GetUserFileName(user['id_str'])
                        config = Path(filename)
                        if config.is_file():
                            # Update this - need the most current version but not if user is in this set
                            print('User', user['id_str'],
                                  'already exists. Skipping for now.')
                        else:
                            print('Writing user', user['id_str'])
                            config.WriteJSON(user, filename)
                            collection.GetFriends(user['id_str'])
                            collection.GetFollowers(user['id_str'])
                else:
                    print('skipping old runtime.')