Ejemplo n.º 1
0
def thinking_about_this_stuff():
    import networkx as nx
    import community
    #from make_graph import make_graph

    api_path = '../api_keys/'
    screen_name = 'graphlabteam'
    apis = oauth_login(api_path)
    target, target_tweets, followers, following, user_lists = \
        get_user_data(apis[0], screen_name)

    edges = make_graph(target['id'], followers, apis)

    g = nx.Graph(data=edges)

    p0 = community.best_partition(g)
    p1 = community.best_partition(g, partition=p0)

    while p0 != p1:
        p0 = community.best_partition(g, partition=p1)
        p1 = community.best_partition(g, partition=p0)

    partitions = [[k for k in p1.keys() if p1[k] == v]
                  for v in set(p1.values())]
Ejemplo n.º 2
0
def thinking_about_this_stuff():
    import networkx as nx
    import community
    #from make_graph import make_graph

    api_path = '../api_keys/'
    screen_name = 'graphlabteam'
    apis = oauth_login(api_path)
    target, target_tweets, followers, following, user_lists = \
        get_user_data(apis[0], screen_name)

    edges = make_graph(target['id'], followers, apis)

    g = nx.Graph(data=edges)

    p0 = community.best_partition(g)
    p1 = community.best_partition(g, partition=p0)

    while p0 != p1:
        p0 = community.best_partition(g, partition=p1)
        p1 = community.best_partition(g, partition=p0)

    partitions = [[k for k in p1.keys() if p1[k] == v]
                     for v in set(p1.values())]
Ejemplo n.º 3
0
def load(screen_name=None, user_id=None, force_db_update = False,
                  force_twitter_update=False, debug=False):
    '''
    Main entry point into gravitty module. Should be used by importing
    gravitty and calling gravitty.load('<your_screen_name').

    Please see the readme at github.com/ericjeske/gravitty for mandatory setup
    instructions and api requirements.

    The load function will make every attempt to load data from cache
    sources (mongoDB) before using twitter's api. It is, however, suggested
    that multiple twitter api keys are utilized with this app to avoid rate
    limiting restrictions.

    By default, running this function will return a json object that can
    be parsed by d3.js to create a community graph. Additional information,
    including the raw twitter data, parsed twitter data, user similarity,
    community clustering dendrogram, community analytics data, community
    networkx graph, and community json object, can be returned by passing in
    debug=True.

    Also, by default, this app will create two pickled objects,
    one containing the debug data described above, the other containing the
    community json file. Subsequent calls for the same user will use this
    data to save time (and api calls).

    To override the use of pickled data, use force_db_update = True. Data
    for each follower will be pulled from mongoDB if possible, otherwise it
    will be pulled from twitter.

    To do a clean-slate download, downloading everything from twitter,
    use force_twitter_update = True.

    '''

    if screen_name == None and user_id == None:
        raise Exception('Please enter an id or name')

    # Assume that if screen_name was not provided (only user id) then a
    # pickle has not been created.
    if screen_name is not None:
        ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH)
        sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT
        sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT

        # Check to see if there are pickles for the user. Note that this will
        # be overriden if force_db_update is set to true
        if os.path.isfile(sn_file_debug) and debug \
                and not force_twitter_update and not force_db_update:
            return pickle.load(open(sn_file_debug, 'rb'))

        if os.path.isfile(sn_file) \
                and not force_twitter_update and not force_db_update:
            return pickle.load(open(sn_file, 'rb'))

    # Use api credentials from files located in the API_PATH.
    ABS_API_PATH = os.path.join(os.path.dirname(__file__), API_PATH)
    apis = oauth_login(ABS_API_PATH)

    # Try to start up a mongo database connection to cache data in
    try:
        conn = pymongo.MongoClient("localhost", 27017)

    except pymongo.errors.ConnectionFailure:
        print 'Please run mongod and re-run program'
        raise Exception('DBError')

    db = conn[DB_NAME]

    # Get the target user's data from either the screen_name or user_id
    user_data = get_user_data(db, apis[0],
                              name = screen_name, uid = user_id,
                              force = force_twitter_update)

    # If the user is protected (or has more than the maximum
    # followers/friends), then return an error
    if user_data == None:
        print 'Was unable to access data for %s / %s' % (screen_name, user_id)
        raise Exception('TargetError')

    user_info, user_tweets, followers, following, user_lists = user_data

    # Using the target user's list of followers (user ids), get the same
    # information we just got for the target user for each of its followers
    raw_df = get_follower_data(db, apis, followers,
                               force = force_twitter_update)

    # Filter the dataframe for inactive users. Then parse the raw dataframe
    # to extract the relevant features from the raw data
    df = parse_dataframe( filter_dataframe(raw_df) )

    # With the features in hand, calculate the latent similarity between each
    # set of users. See similarity.py for more detail on the calculations of
    # this similarity metric.

    # The resulting dataframe will be a square matrix indexed/columned by
    # user_id and contain the undirected edge weights between each pair of
    # users.
    df_similarity = make_similarity_dataframe(df)

    # Make an undirected representing the relationship between each user,
    # if any. Each node ID is the user ID, each edge weight is equal to the
    # similarity score between those two users.
    graph = make_graph(df, df_similarity)

    # Using the louvain method, find communities within the weighted graph.
    # The returned dendrogram is a list of dictionaries where the values of
    # each dictionary are the keys of the next dictionary. The length of the
    # dendrogram indicates the number of levels of community clusters
    # detected.
    dendrogram = generate_dendrogram(graph)

    # Add a final mapping to the dendrogram that maps everyone into the
    # same community. They are, after all, followers of the same user.
    dendrogram.append({k:0 for k in dendrogram[-1].values()})

    # Modify the dataframe to contain columns titled 'cid + <level>'. Each
    # column contains the community id's for that level for each user.
    # Also, this is a convenient time to calculate graph modularity at each
    # level so produce that here as well.
    df, modularity = get_community_assignment(df, graph, dendrogram)

    num_levels = len(dendrogram)

    # For each community at each level of the dendrogram, find the topics,
    # sentiment, biggest influencers, etc. for each.
    data = get_community_analytics(df, graph, num_levels,
                                   community_modularity = modularity)

    # Both the mentioned and most connected users fields from the community
    # analytics function are user ids. Turn them into screen names.
    data = get_screen_names(data, 'mentioned', df, db, apis[0])
    data = get_screen_names(data, 'most_connected', df, db, apis[0])

    # Close the database connection. It is no longer needed.
    conn.close()

    # Create a networkx graph where each node represents a community. Edges
    # represent membership into larger communities at the next level up (
    # down?) the dendrogram and have no edge weights. The data obtained in
    # the previous steps from community_analytics is loaded into the
    # attributes of each node.
    community_graph = create_community_graph(data, dendrogram)

    # Parse this graph into a json representation for use & consumption by
    # d3.js
    community_json = create_community_json(community_graph, user_info)

    # Just in case we don't have the screen name, grab it.
    if screen_name is None:
        screen_name = user_info['screen_name']

    # Pickle the objects for reuse.
    ABS_PKL_PATH = os.path.join(os.path.dirname(__file__), PKL_PATH)
    sn_file = ABS_PKL_PATH + str(screen_name) + '.' + PKL_FILE_EXT
    sn_file_debug = ABS_PKL_PATH + str(screen_name) + '.' + DBG_FILE_EXIT

    pickle.dump((raw_df, df, df_similarity, dendrogram, data,
                 community_graph, community_json), open(sn_file_debug, 'wb'))

    pickle.dump(community_json, open(sn_file, 'wb'))

    # If debug is true, return all of the precusor objects along with the json
    if debug:
        return (raw_df, df, df_similarity, dendrogram, data,
                community_graph, community_json)

    # Otherwise return the json object
    return community_json
Ejemplo n.º 4
0
NUMBER_OF_COLLECTIONS = 5
COLLECTION_TIME = 2.5  # in minutes
WAIT_TIME = 10  # in minutes

date_format = '%m/%d/%Y %H:%M:%S'

broncos, panthers, counts = [], [], []
for counter in range(1, NUMBER_OF_COLLECTIONS + 1):
    print '------------------------------------------'
    print 'COLLECTION NUMBER', counter
    broncos_counter, panthers_counter = 0, 0
    count_dict = {'start_time': datetime.now().strftime(format=date_format)}

    # Create a stream instance
    auth = oauth_login(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, token=OAUTH_TOKEN, token_secret=OAUTH_TOKEN_SECRET)
    twitter_stream = TwitterStream(auth=auth)
    stream = twitter_stream.statuses.filter(track=track, locations=locations)

    endTime = datetime.now() + timedelta(minutes=COLLECTION_TIME)
    while datetime.now() <= endTime:  # collect tweets while current time is less than endTime
        for tweet in stream:
            if 'text' in tweet.keys():
                if datetime.now() > endTime:
                    break
                elif 'Broncos' in tweet['text'] and 'Panthers' in tweet['text']:
                    broncos.append(tweet), panthers.append(tweet)
                    broncos_counter += 1
                    panthers_counter += 1
                    print 'Panthers: %s, Broncos: %s' % (panthers_counter, broncos_counter)
                elif 'Broncos' in tweet['text']:
# -*- coding: utf-8 -*-
import csv

from clint import resources

from utils import oauth_login, base_url, bank_id, account_id

resources.init('openbankproject', 'evmakesgeo')
key_file = resources.user.read('config.ini')

openbank = oauth_login(base_url, key_file)

# Parse cords from csv
reader = csv.reader(open('static/ireland_points.csv'))
firstline = True
csv_data = []
for row in reader:
    if firstline:  #skip first line
        firstline = False
        continue
    csv_data.append((row[0], row[1]))

# Get transactoin ids
response = openbank.get(
    "{}obp/v1.2.1/banks/{}/accounts/{}/owner/transactions".format(
        base_url, bank_id, account_id))
transactions = [each['id']
                for each in response.json()['transactions']][0:len(csv_data)]

# Create for a list of transactions id geo metadata
for index, transaction in enumerate(transactions):
# TODO: add secret key to url (or POST?)
# also look at invalid signature error message (oauth) or if that is part of missing secret key
# (scala obp importer has it working)

#import json
from clint import resources
from utils import oauth_login, base_url

resources.init('openbankproject', 'evmakesgeo')
key_file = resources.user.read('config.ini')

# key file in a place like: /Users/simonredfern/Library/Application\ Support/evmakesgeo/config.ini

print "key file is: %s" % key_file

openbank = oauth_login(base_url, key_file)

# Load a json file for sandbox creation.
with open('/Users/simonredfern/Documents/OpenBankProject/DATA/BNPP/OBP-sandbox-bnpp-fr_compact.json') as data_file:
    #data = json.load(data_file)
    data=data_file.read().replace('\n', '')

url = "{}obp/vsandbox/v1.0/data-import".format(base_url)
print 'url is: %s' % url
#print 'data is: %s' % data

headers = {
        'content-type': 'application/json',
        'Accept': 'application/json'
        }
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
from utils import oauth_login
from data import get_user_data, get_follower_data
#from make_graph import make_graph

api_path = '../api_keys/'

screen_name = 'ZipfianAcademy'
user_id = 1244850380
#screen_name = 'graphlabteam'

apis = oauth_login(api_path)[3:]

target, target_tweets, followers, following, user_lists = \
    get_user_data(apis[2], screen_name=screen_name, user_id=user_id)

print target['id']
ind = followers.index(562363) + 1

data = get_follower_data(apis, followers[ind:])
#g = make_graph(target['id'], followers, apis)


def thinking_about_this_stuff():
    import networkx as nx
    import community
    #from make_graph import make_graph

    api_path = '../api_keys/'
    screen_name = 'graphlabteam'
    apis = oauth_login(api_path)
Ejemplo n.º 8
0
from constants import CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET
from twitter import TwitterStream
from utils import oauth_login, save_json

track = "Patriots"  # Tweets for Patriots

TOTAL_TWEETS = 2500

patriots = []
patriots_counter = 0

while patriots_counter < TOTAL_TWEETS:  # collect tweets while current time is less than endTime
    # Create a stream instance
    auth = oauth_login(consumer_key=CONSUMER_KEY,
                       consumer_secret=CONSUMER_SECRET,
                       token=OAUTH_TOKEN,
                       token_secret=OAUTH_TOKEN_SECRET)
    twitter_stream = TwitterStream(auth=auth)
    stream = twitter_stream.statuses.filter(track=track)
    counter = 0
    for tweet in stream:
        if patriots_counter == TOTAL_TWEETS:
            print 'break'
            break
        elif counter % 500 == 0 and counter != 0:
            print 'get new stream'
            break
        else:
            patriots.append(tweet)
            patriots_counter += 1
            counter += 1
Ejemplo n.º 9
0
# -*- coding: utf-8 -*-
from utils import oauth_login
from data import get_user_data, get_follower_data
#from make_graph import make_graph

api_path = '../api_keys/'

screen_name = 'ZipfianAcademy'
user_id = 1244850380
#screen_name = 'graphlabteam'

apis = oauth_login(api_path)[3:]

target, target_tweets, followers, following, user_lists = \
    get_user_data(apis[2], screen_name=screen_name, user_id=user_id)

print target['id']
ind = followers.index(562363) + 1

data = get_follower_data(apis, followers[ind:])
#g = make_graph(target['id'], followers, apis)


def thinking_about_this_stuff():
    import networkx as nx
    import community
    #from make_graph import make_graph

    api_path = '../api_keys/'
    screen_name = 'graphlabteam'
    apis = oauth_login(api_path)