def load_user_data(user_id=None, screen_name=None): """ Retrieve and set user's data. Or get it from the store if already there. """ assert bool(user_id) != bool(screen_name) if user_id: user_data = s.get_user_data(user_id) if user_data: return user_id, user_data user = t.get_user(user_id=user_id) else: # screen_name user = t.get_user(screen_name=screen_name) user_id = user.id user_data = select_user_data(user) s.set_user_data(user_id, user_data) return user_id, user_data
def get_user_id(data, id_key, name_key): user_id = data.get(id_key) or '' if user_id != '': return user_id screen_name = data.get(name_key) or '' if screen_name == '': return '' user = twitter.get_user(screen_name=screen_name) if user is None: return '' return user['id_str']
def digest_ids(self, tasty_ids): statuses_digested = 0 for tasty_id in tasty_ids: guru = twitter.get_user(id_str=tasty_id) statuses_digested += self.digest_user(guru) if self.deadline is not None and (datetime.datetime.now() >= self.deadline): break return statuses_digested
from datetime import datetime from graphs import render_graph from twitter import download_followers, count_followers, download_tweets, get_user, count_tweets, count_likes if __name__ == '__main__': user = get_user('yedpodtrzitko') since = datetime(2017, 6, 30) til = datetime(2017, 7, 6) # download_followers(user) # download_tweets(user) graph_data = {} graph_followers = count_followers(user, since, til) graph_data.update(graph_followers) graph_tweets = count_tweets(user, since, til) graph_data.update(graph_tweets) graph_likes = count_likes(user, since, til) graph_data.update(graph_likes) render_graph(graph_data)
def run( creds, force_tweet=False, debug=False ): if not debug: try: debug = config.DEBUG_MODE except AttributeError: pass if debug: force_tweet = True logging.debug( "brains.run(), force_tweet is %s, debug is %s" % (force_tweet, debug) ) then = datetime.datetime.now() bot_settings = settings.get_settings( creds ) bot_state = state.get_state( creds ) bot_state.last_run = then bot_state.put() deadline = then + TIME_LIMIT learning_style = bot_settings.learning_style api = twitter.get_api( creds ) statuses_digested = 0 namespace_manager.set_namespace( creds.screen_name ) logging.debug( "brains.run(): learning_style is: %s" % learning_style ) worker = verbivorejr.VerbivoreWorker( api, bot_settings ) worker.deadline = deadline if learning_style == constants.learning_style_oneuser: # learn from one user guru_name = bot_settings.learning_guru guru = twitter.get_user( screen_name=guru_name ) statuses_digested = worker.digest_user( guru ) elif learning_style == constants.learning_style_following: guru_ids = api.friends_ids( stringify_ids=True ) statuses_digested = worker.digest_ids( guru_ids ) elif learning_style == constants.learning_style_followers: guru_ids = api.followers_ids( stringify_ids=True ) statuses_digested = worker.digest_ids( guru_ids ) worker.put() logging.debug( "brains.run(): digested %d new statuses" % statuses_digested ) # only continue if chance is met if bot_settings.tweet_chance < random.random() and force_tweet is False: logging.debug( "brains.run(): didn't meet tweet_chance of %2.1f" % bot_settings.tweet_chance ) return do_tweet = False if force_tweet: logging.debug( "brains.run(): force_tweet is set" ) do_tweet = True elif bot_settings.locquacity_onschedule: logging.debug( "brains.run(): will tweet on schedule" ) do_tweet = True elif bot_settings.locquacity_speakonnew and statuses_digested > 0 : logging.debug( "brains.run(): locquacity_speakonnew, statuses_digested: %s" % statuses_digested ) do_tweet = True # check deadline, defer tweeting if necessary if datetime.datetime.now() >= deadline: logging.debug( "brains.run(): aborted after put()'ing worker, deadline is looming." ) taskqueue.add( url="/%s/run" % api.me().screen_name ) return queen = verbivorejr.VerbivoreQueen() queen.deadline = deadline if do_tweet: tweet = None safety = 10 while tweet is None and safety > 0: tweet = queen.secrete( 130 ) safety = safety - 1 if tweet is not None: tweet = verbivorejr.uc_first( tweet ) post_tweet( api, tweet, debug=debug ) replied_userids = [] if bot_settings.locquacity_reply: last_replied_id = bot_state.last_replied_id logging.debug( "brains.run(): last_replied_id is %s" % last_replied_id ) mentions = api.mentions( since_id=last_replied_id ) logging.debug( "-> %d mentions" % len(mentions) ) my_name = "@%s" % creds.screen_name last_timestamp = None for mention in mentions: if datetime.datetime.now() >= deadline: break # only reply when we've been directly addressed #if mention.text[:len(my_name)] != my_name: # break logging.debug( "-> reply to %s" % mention.author.screen_name ) reply = "@%s" % mention.author.screen_name tweet = None safety = 5 while tweet is None and safety > 0: logging.debug( "--> generate reply, safety=%d" % safety ) if datetime.datetime.now() >= deadline: break tweet = queen.secrete_reply( mention.text, 130 - len(reply) ) safety = safety -1 if tweet is not None: reply = "%s %s" % (reply, tweet) post_tweet( api, reply, in_reply_to_status_id=mention.id, debug=debug ) replied_userids.append( mention.author.id ) this_timestamp = mention.created_at if last_timestamp is None or this_timestamp > last_timestamp: last_replied_id = mention.id_str last_timestamp = this_timestamp bot_state.last_replied_id = last_replied_id bot_state.put() if bot_settings.locquacity_greetnew: if datetime.datetime.now() >= deadline: logging.debug( "brains.run(): aborted before greeting new followers, deadline is looming." ) return new_follower_ids = None stored_follower_ids = creds.follower_ids api_follower_ids = api.followers_ids() if stored_follower_ids is None: new_follower_ids = api_follower_ids else: new_follower_ids = [] for api_follower_id in api_follower_ids: if api_follower_id not in stored_follower_ids: new_follower_ids.append( api_follower_id ) if new_follower_ids is not None and len(new_follower_ids) > 0: logging.debug( "brains.run(): new_follower_ids: %s" % new_follower_ids ) for new_follower_id in new_follower_ids: if new_follower_id not in replied_userids: tw_user = api.get_user( user_id=new_follower_id ) screen_name = tw_user.screen_name safety = 5 greeting = None while greeting is None and safety > 0: greeting = queen.secrete_greeting( screen_name, 130 ) if greeting is not None: post_tweet( api, greeting, debug=debug ) else: logging.debug( "brains.run(): no new followers" ) creds.follower_ids = api_follower_ids creds.put() now = datetime.datetime.now() elapsed = now - then logging.debug( "brains.run(): completed in %d seconds" % elapsed.seconds )
def main(): global expanded #To REFACTOR: Make this a file on disk that is read on start. staff = {'al3x': True, 'rsarver': True, 'kevinweil': True, 'jointheflock': True, 'squarecog': True, 'pothos': True, 'syou6162': True} crawl_deque = deque() idx = {} #Seed the crawler with an initial user, should be set on command line. DEGREE 0 seed = twitter.get_user("datajunkie") crawl_deque.append(seed) crawl_deque.append("\n") degree = 2 current_degree = 1 #1-current user + friends/followers, etc. start = time.time() deque_size = 0 expanded = {} LOG = open("process.log","a") print >> LOG, "%s Twitter Process Started." % twitter.thetime() LOG.close() if len(sys.argv) > 1 and sys.argv[1] == "-r": LOG = open("process.log", "a") print >> LOG, "%s Resuming from crash." % twitter.thetime() LOG.close() crawl_deque = deque() populate_queue("queue.dat", crawl_deque) populate_dict("expanded.dat", expanded) while True: #Save state for iteration. #Save queue. save_state(crawl_deque, expanded) user = crawl_deque.popleft() if user == "\n": LOG = open("process.log","a") print >> LOG, "%s Queue now has size %d after degree %d." % (twitter.thetime(), deque_size, current_degree) crawl_deque.append("\n") end = time.time() print >> LOG, "%s Time required for degree %d was %s s." % (twitter.thetime(), current_degree, str(end-start)) LOG.close() current_degree += 1 if current_degree > degree: break start = time.time() continue #If this user has already been expanded (got friends and followers), don't do it again if already_expanded(user['screen_name']): continue #If this user's information has already been scraped, don't do it again. #if not already_scraped(idx, user['screen_name']): #in_index dump(user) #idx[user['screen_name']] = user['id'] #mark the user as scraped. #TO DO: Print user info to file. LOG = open("process.log", "a") print >> LOG, "%s Getting friends and followers for %s." % (twitter.thetime(), user['screen_name']) LOG.close() #Check that the user is not a crawler bomb. if user.has_key('friends_count') and user.has_key('followers_count') and \ user['friends_count'] + user['followers_count'] > 15000: twitter.log(twitter.thetime(), "WARN", "NA", user['screen_name'], "SKIP", "NA") continue friends, followers = get_ff(user['screen_name']) if friends == -1 or followers == -1: B = open("blacklist", "a") print >> B, user['screen_name'] B.close() if friends == -1: print "Getting friends for", user['screen_name'], "failed." if followers == -1: print "Getting followers for", user['screen_name'], "failed." continue for friend in friends: #if user information has already been scraped, don't do it again. #if not already_scraped(idx, friend['screen_name']): #user_data = twitter.get_user(friend['screen_name']) dump(friend) #idx[friend['screen_name']] = friend['id'] #Mark the user as scraped. if friend['screen_name'] not in staff: crawl_deque.append(friend) deque_size += 1 GRAPH = open("graph.log","a") print >> GRAPH, ','.join([user['screen_name'], friend['screen_name']]) GRAPH.close() for follower in followers: #if the node exists, get it. Otherwise, create it. #if not already_scraped(idx, follower['screen_name']): #user_data = twitter.get_user(follower['screen_name']) if len(follower) == 0: continue dump(follower) #idx[follower['screen_name']] = follower['id'] #mark user as scraped if follower['screen_name'] not in staff: crawl_deque.append(follower) deque_size += 1 GRAPH = open("graph.log","a") print >> GRAPH, ','.join([follower['screen_name'], user['screen_name']]) GRAPH.close() expanded[user['screen_name']] = True OUT = open("index.pickle", "w") cPickle.dump(idx, OUT) OUT.close()