def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(
        r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids')))

    followers = r.mget([
        getRedisIdByUserId(follower_id, 'info.json')
        for follower_id in follower_ids
    ])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({
            'screen_name': f['screen_name'],
            'user_id': f['id']
        })

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    field_names = ['Date', 'Count']
    pt = PrettyTable(field_names=field_names)
    pt.align = 'l'

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                  for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME,
                        'follower_ids')))

    followers = r.mget([getRedisIdByUserId(follower_id, 'info.json')
                       for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({'screen_name': f['screen_name'], 'user_id': f['id']})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    fields = ['Date', 'Count']
    pt = PrettyTable(fields=fields)
    [pt.set_field_align(f, 'l') for f in fields]

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                    for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    pt.printt()

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, "follower_ids")))

    followers = r.mget([getRedisIdByUserId(follower_id, "info.json") for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f["followers_count"]
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({"screen_name": f["screen_name"], "user_id": f["id"]})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, "follower_freqs"), json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print "The top 10 followers from the sample:"

    field_names = ["Date", "Count"]
    pt = PrettyTable(field_names=field_names)
    pt.align = "l"

    for (user, freq) in reversed([(user["screen_name"], k) for k in keys[-10:] for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" % (SCREEN_NAME, pp(avg))
コード例 #4
0
SCREEN_NAME = 'timoreilly'  # XXX: iPython Notebook cannot prompt for input
THRESHOLD = 15  # XXX: iPython Notebook cannot prompt for input

# Connect using default settings for localhost

r = redis.Redis()

# Compute screen_names for friends

friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))
friend_screen_names = []
for friend_id in friend_ids:
    try:
        friend_screen_names.append(
            json.loads(r.get(getRedisIdByUserId(
                friend_id, 'info.json')))['screen_name'].lower())
    except TypeError, e:
        continue  # not locally available in Redis - look it up or skip it

# Pull the  list of (entity, frequency) tuples from CouchDB

server = couchdb.Server('http://localhost:5984')
db = server['tweets-user-timeline-' + SCREEN_NAME]

entities_freqs = sorted(
    [(row.key, row.value)
     for row in db.view('index/entity_count_by_doc', group=True)],
    key=lambda x: x[1])

# Keep only user entities with insufficient frequencies
コード例 #5
0
g = nx.Graph()
r = redis.Redis()

# Compute all ids for nodes appearing in the graph

friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids')))
id_for_screen_name = json.loads(r.get(getRedisIdByScreenName(SCREEN_NAME,
                                'info.json')))['id']
ids = [id_for_screen_name] + friend_ids

for current_id in ids:
    print >> sys.stderr, 'Processing user with id', current_id

    try:
        current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json'
                                  )))
        current_screen_name = current_info['screen_name']
        friend_ids = list(r.smembers(getRedisIdByScreenName(current_screen_name,
                          'friend_ids')))

        # filter out ids for this person if they aren't also SCREEN_NAME's friends too, 
        # which is the basis of the query

        friend_ids = [fid for fid in friend_ids if fid in ids]
    except Exception, e:
        print >> sys.stderr, 'Skipping', current_id

    for friend_id in friend_ids:
        try:
            friend_info = json.loads(r.get(getRedisIdByUserId(friend_id,
                                     'info.json')))
コード例 #6
0
g = nx.Graph()
r = redis.Redis()

# Compute all ids for nodes appearing in the graph

friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids')))
id_for_screen_name = json.loads(r.get(getRedisIdByScreenName(SCREEN_NAME,
                                'info.json')))['id']
ids = [id_for_screen_name] + friend_ids

for current_id in ids:
    print >> sys.stderr, 'Processing user with id', current_id

    try:
        current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json'
                                  )))
        current_screen_name = current_info['screen_name']
        friend_ids = list(r.smembers(getRedisIdByScreenName(current_screen_name,
                          'friend_ids')))

        # filter out ids for this person if they aren't also SCREEN_NAME's friends too, 
        # which is the basis of the query

        friend_ids = [fid for fid in friend_ids if fid in ids]
    except Exception, e:
        print >> sys.stderr, 'Skipping', current_id

    for friend_id in friend_ids:
        try:
            friend_info = json.loads(r.get(getRedisIdByUserId(friend_id,
                                     'info.json')))
コード例 #7
0
r = redis.Redis()

# Compute all ids for nodes appearing in the graph

friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME,
                                                    'friend_ids')))
id_for_screen_name = json.loads(
    r.get(getRedisIdByScreenName(SCREEN_NAME, 'info.json')))['id']
ids = [id_for_screen_name] + friend_ids

for current_id in ids:
    print >> sys.stderr, 'Processing user with id', current_id

    try:
        current_info = json.loads(
            r.get(getRedisIdByUserId(current_id, 'info.json')))
        current_screen_name = current_info['screen_name']
        friend_ids = list(
            r.smembers(
                getRedisIdByScreenName(current_screen_name, 'friend_ids')))

        # filter out ids for this person if they aren't also SCREEN_NAME's friends too,
        # which is the basis of the query

        friend_ids = [fid for fid in friend_ids if fid in ids]
    except Exception, e:
        print >> sys.stderr, 'Skipping', current_id

    for friend_id in friend_ids:
        try:
            friend_info = json.loads(
from twitter__util import getRedisIdByUserId

SCREEN_NAME = sys.argv[1]
THRESHOLD = int(sys.argv[2])

# Connect using default settings for localhost

r = redis.Redis()

# Compute screen_names for friends

friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))
friend_screen_names = []
for friend_id in friend_ids:
    try:
        friend_screen_names.append(json.loads(r.get(getRedisIdByUserId(friend_id,
                                   'info.json')))['screen_name'].lower())
    except TypeError, e:
        continue  # not locally available in Redis - look it up or skip it

# Pull the  list of (entity, frequency) tuples from CouchDB

server = couchdb.Server('http://*****:*****@'
コード例 #9
0
ファイル: mine.py プロジェクト: sorenu/Assignment1
t = login()
r = redis.Redis()

# get info and friends for central user
getUserInfo(t,r,[SCREEN_NAME])
getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, 'friend_ids', t, r)
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, 'follower_ids', t, r)

# get friends and followers of central user
friend_ids = getFriends(SCREEN_NAME)
follower_ids = getFollowers(SCREEN_NAME)

# do union of friends and followers
ids = list(r.sunion(getRedisIdByScreenName(SCREEN_NAME,'friend_ids'), getRedisIdByScreenName(SCREEN_NAME,'follower_ids')))

# get user info for friends and followers
getUserInfo(t, r, user_ids=ids)

# get friends of friends and followers
for user_id in ids:
	screen_name = json.loads(r.get(getRedisIdByUserId(user_id, 'info.json')))['screen_name']
	try:
		getFriends(screen_name)
	except:
		continue





API_KEY = sys.argv[2]
API_ENDPOINT = \
    'http://api.infochimps.com/soc/net/tw/strong_links.json?screen_name=%s&apikey=%s'

r = redis.Redis()  # default connection settings on localhost

try:
    url = API_ENDPOINT % (SCREEN_NAME, API_KEY)
    response = urllib2.urlopen(url)
except urllib2.URLError, e:
    print 'Failed to fetch ' + url
    raise e

strong_links = json.loads(response.read())

# resolve screen names and print to screen:

print "%s's Strong Links" % (SCREEN_NAME, )
print '-' * 30
for sl in strong_links['strong_links']:
    if sl is None:
        continue

    try:
        user_info = json.loads(r.get(getRedisIdByUserId(sl[0], 'info.json')))
        print user_info['screen_name'], sl[1]
    except Exception, e:
        print >> sys.stderr, "ERROR: couldn't resolve screen_name for", sl
        print >> sys.stderr, "Maybe you haven't harvested data for this person yet?"

from twitter__util import getRedisIdByUserId

SCREEN_NAME = sys.argv[1]
THRESHOLD = int(sys.argv[2])

# Connect using default settings for localhost

r = redis.Redis()

# Compute screen_names for friends

friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, "friend_ids"))
friend_screen_names = []
for friend_id in friend_ids:
    try:
        friend_screen_names.append(json.loads(r.get(getRedisIdByUserId(friend_id, "info.json")))["screen_name"])
    except TypeError, e:
        continue  # not locally available in Redis - look it up or skip it

# Pull the  list of (entity, frequency) tuples from CouchDB

server = couchdb.Server("http://*****:*****@" and ef[1] >= THRESHOLD]
コード例 #12
0
import sys
import redis
import json
from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

EGO = sys.argv[1]

r = redis.Redis()
normalized_locations = []

friend_ids = list(r.smembers(getRedisIdByScreenName(EGO, 'friend_ids')))
ego_id = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id']

ids = [ego_id] + friend_ids

for user_id in ids:
	redis_id = getRedisIdByUserId(str(user_id), 'info.json')
	location_json = r.get(redis_id)
	if 	location_json:
		location = json.loads(location_json)['location']
		if location:
			normalized_location = location.lower().encode("utf-8")
			normalized_locations.append(normalized_location)
	
unique_locations = set(normalized_locations)

for ul in unique_locations:
	print ul
コード例 #13
0
ファイル: create_graph.py プロジェクト: sorenu/Assignment1
graph = nx.Graph()

EGO = sys.argv[1]
EGO_ID = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id']
TEMP_UNION_KEY = 'temp$union'

# union EGO's friends and followers
r.sunionstore(TEMP_UNION_KEY, [getRedisIdByScreenName(EGO,'friend_ids'), getRedisIdByScreenName(EGO,'follower_ids')])
friend_follower_ids = list(r.smembers(TEMP_UNION_KEY))

# for each friend/follower :
for friend_follower_id in friend_follower_ids:
	# intersect friend/follower's friends with ego's union
	# NB: the getFriends function used for mining only uses screen_name keys
	try:
		screen_name = json.loads(r.get(getRedisIdByUserId(friend_follower_id, 'info.json')))['screen_name']
	except:
		continue
	intersecting_ids = list(r.sinter(TEMP_UNION_KEY, getRedisIdByScreenName(screen_name, 'friend_ids')))
	# add edge between EGO and the current friend/follower
	graph.add_edge(EGO_ID, friend_follower_id)
	# add edges between each id in the intersection and the id of the current friend/follower
	for intersecting_id in intersecting_ids:
		graph.add_edge(friend_follower_id, intersecting_id)

# cleanup
r.delete(TEMP_UNION_KEY)

# plot the graph
plt.figure(1)
nx.draw_spring(graph,node_size=65, node_color='#7FA8FF', node_shape='o',edge_color='.1',with_labels=False,width=1.3)
コード例 #14
0
g = nx.Graph()
r = redis.Redis()


LIST_NAME = "oslo-ids"

starting_ids = list(r.smembers(LIST_NAME))
print starting_ids




# Compute all ids for nodes appearing in the graph
for _id in starting_ids:
	friend_ids = list(r.smembers(getRedisIdByUserId(_id, 'friend_ids')))
	id_for_screen_name = _id
	ids = [id_for_screen_name] + friend_ids

	for current_id in ids:
		print >> sys.stderr, 'Processing user with id', current_id

		try:
			raw_current_info = r.get(getRedisIdByUserId(current_id, 'info.json'
									  ))
			if not raw_current_info:
				# try to get it one more time
				print "Making req to Twitter API"
				os.system('python friends_followers__get_user_info_by_id.py ' + current_id)

			current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json'