def getSourceList(users,typ,sampleSize,filterN): tw={} twc={} twDetails={} print users #we can look up a max of 100 users... #TO DO / HACK just sample 100 for now, if required...? if len(users)>100: users=random.sample(users, 100) print 'HACK FUDGE, only using 100 users:',users twd=api.lookup_users(screen_names=users) for u in twd: if type(u) is newt.tweepy.models.User: twc[u.screen_name]=filterN twDetails[u.screen_name]=u if sampleSize==0: sampleSize='all' for user in users: print "Getting ",typ," of ",user if typ=="followers": tmp=newt.getTwitterFollowersDetailsByIDs(api,user,sampleSize) else: tmp=newt.getTwitterFriendsDetailsByIDs(api,user,sampleSize) print "Grabbed ",str(len(tmp)),typ,' for ',user #tw.extend(tmp) for t in tmp: if t in twc: twc[t]=twc[t]+1 else: twc[t]=1 twDetails[t]=tmp[t] #deDupeList=list(set(origList)) #deDupeList=filter(lambda e: e not in origList,origList) for t in twc: if twc[t]>=filterN: tw[t]=twDetails[t] return tw
try: extra=sys.argv[3] except: extra=-1 try: sampleSize=int(sys.argv[4]) except: sampleSize='all' #---------------------------------------------------------------- tw={} if typ=="followers": tw=newt.getTwitterFollowersDetailsByIDs(api,user,sampleSize) else: tw=newt.getTwitterFriendsDetailsByIDs(api,user,sampleSize) f=newt.openTimestampedFile(user+'/'+typ,'sample'+str(sampleSize)+'tweeps.txt') for tweep in tw: f.write(str(tweep)+'\n') f.close() report("List members:") for i in tw: report(tw[i].screen_name) ''' report("List members:") for i in tw: report(tw[i].screen_name) '''
newt.gephiOutputFile(api, 'hashtag-' + tag, tw, 'outerfollowers') if followerView: typ = 'followers' sampleSize = 195 filterN = 3 twc = {} twDetails = {} for tweep in tw: user = tw[tweep].screen_name print "Getting followers of ", user if typ == "followers": tmp = newt.getTwitterFollowersDetailsByIDs(api, user, sampleSize) else: tmp = newt.getTwitterFriendsDetailsByIDs(api, user, sampleSize) print "Grabbed ", str(len(tmp)), typ, ' for ', user #tw.extend(tmp) for t in tmp: if t in twc: twc[t] = twc[t] + 1 else: twc[t] = 1 twDetails[t] = tmp[t] tw = {} for t in twc: if twc[t] >= filterN: tw[t] = twDetails[t] print len(tw), tw
def getSourceList(users, typ, sampleSize, filterN): tw = {} twc = {} twDetails = {} print users #we can look up a max of 100 users... #TO DO / HACK just sample 100 for now, if required...? #if len(users)>100: # users=random.sample(users, 500) # print 'HACK FUDGE, only using 500 users:',users if len(users) > 100: twd = [] #print 'users',users for l in chunks(users, 100): #print 'partial',l tmp = api.lookup_users(screen_names=l) twd.append(tmp) else: twd = api.lookup_users(screen_names=users) for u in twd: if type(u) is newt.tweepy.models.User: twc[u.screen_name] = filterN twDetails[u.screen_name] = u fn = projname + '/userdetails.csv' writer = csv.writer(open(fn, 'wb+'), quoting=csv.QUOTE_ALL) k = [ 'source', 'screen_name', 'name', 'description', 'location', 'time_zone', 'created_at', 'contributors_enabled', 'url', 'listed_count', 'friends_count', 'followers_count', 'statuses_count', 'favourites_count', 'id_str', 'id', 'verified', 'utc_offset', 'profile_image_url', 'protected' ] writer.writerow(k) for uu in twDetails: u = twDetails[uu] ux = [''] for x in [ u.screen_name, u.name, u.description, u.location, u.time_zone ]: if x != None: ux.append( unicodedata.normalize('NFKD', unicode(x)).encode( 'ascii', 'ignore')) else: ux.append('') for x in [ u.created_at, u.contributors_enabled, u.url, u.listed_count, u.friends_count, u.followers_count, u.statuses_count, u.favourites_count, u.id_str, u.id, u.verified, u.utc_offset, u.profile_image_url, u.protected ]: ux.append(x) try: writer.writerow(ux) except: pass if sampleSize == 0: sampleSize = 'all' for user in users: print "Getting ", typ, " of ", user if typ == "followers": tmp = newt.getTwitterFollowersDetailsByIDs(api, user, sampleSize) else: tmp = newt.getTwitterFriendsDetailsByIDs(api, user, sampleSize) print "Grabbed ", str(len(tmp)), typ, ' for ', user #tw.extend(tmp) for t in tmp: if t in twc: twc[t] = twc[t] + 1 else: twc[t] = 1 twDetails[t] = tmp[t] #deDupeList=list(set(origList)) #deDupeList=filter(lambda e: e not in origList,origList) for t in twc: if twc[t] >= filterN: tw[t] = twDetails[t] return tw