def load_user_data(user_id=None, screen_name=None):
    """
    Retrieve and set user's data.
    Or get it from the store if already there.
    """
    assert bool(user_id) != bool(screen_name)
    if user_id:
        user_data = s.get_user_data(user_id)
        if user_data:
            return user_id, user_data
        user = t.get_user(user_id=user_id)
    else: # screen_name
        user = t.get_user(screen_name=screen_name)
    user_id = user.id
    user_data = select_user_data(user)
    s.set_user_data(user_id, user_data)
    return user_id, user_data
def get_user_id(data, id_key, name_key):
    user_id = data.get(id_key) or ''
    if user_id != '': return user_id
    screen_name = data.get(name_key) or ''
    if screen_name == '': return ''
    user = twitter.get_user(screen_name=screen_name)
    if user is None: return ''
    return user['id_str']
Exemple #3
0
def load_user_data(user_id=None, screen_name=None):
    """
	Retrieve and set user's data.
	Or get it from the store if already there.
	"""
    assert bool(user_id) != bool(screen_name)
    if user_id:
        user_data = s.get_user_data(user_id)
        if user_data:
            return user_id, user_data
        user = t.get_user(user_id=user_id)
    else:  # screen_name
        user = t.get_user(screen_name=screen_name)
    user_id = user.id
    user_data = select_user_data(user)
    s.set_user_data(user_id, user_data)
    return user_id, user_data
Exemple #4
0
 def digest_ids(self, tasty_ids):
     statuses_digested = 0
     for tasty_id in tasty_ids:
         guru = twitter.get_user(id_str=tasty_id)
         statuses_digested += self.digest_user(guru)
         if self.deadline is not None and (datetime.datetime.now() >= self.deadline):
             break
     return statuses_digested
Exemple #5
0
from datetime import datetime

from graphs import render_graph
from twitter import download_followers, count_followers, download_tweets, get_user, count_tweets, count_likes

if __name__ == '__main__':
    user = get_user('yedpodtrzitko')
    since = datetime(2017, 6, 30)
    til = datetime(2017, 7, 6)
    # download_followers(user)
    # download_tweets(user)

    graph_data = {}
    graph_followers = count_followers(user, since, til)
    graph_data.update(graph_followers)

    graph_tweets = count_tweets(user, since, til)
    graph_data.update(graph_tweets)

    graph_likes = count_likes(user, since, til)
    graph_data.update(graph_likes)

    render_graph(graph_data)
Exemple #6
0
def run( creds, force_tweet=False, debug=False ):	

	if not debug:
		try:
			debug = config.DEBUG_MODE
		except AttributeError:
			pass

	if debug:
		force_tweet = True

	logging.debug( "brains.run(), force_tweet is %s, debug is %s" % (force_tweet, debug) )

	then = datetime.datetime.now()
	bot_settings = settings.get_settings( creds )

	bot_state = state.get_state( creds )
	bot_state.last_run = then
	bot_state.put()

	deadline = then + TIME_LIMIT
	learning_style = bot_settings.learning_style
	api = twitter.get_api( creds )
	statuses_digested = 0

	namespace_manager.set_namespace( creds.screen_name )

	logging.debug( "brains.run(): learning_style is: %s" % learning_style )
	worker = verbivorejr.VerbivoreWorker( api, bot_settings )
	worker.deadline = deadline
	if learning_style == constants.learning_style_oneuser:
		# learn from one user
		guru_name = bot_settings.learning_guru
		guru = twitter.get_user( screen_name=guru_name )
		statuses_digested = worker.digest_user( guru )
	elif learning_style == constants.learning_style_following:
		guru_ids = api.friends_ids( stringify_ids=True )
		statuses_digested = worker.digest_ids( guru_ids )
	elif learning_style == constants.learning_style_followers:
		guru_ids = api.followers_ids( stringify_ids=True )
		statuses_digested = worker.digest_ids( guru_ids )
	
	worker.put()

	logging.debug( "brains.run(): digested %d new statuses" % statuses_digested )

	# only continue if chance is met
	if bot_settings.tweet_chance < random.random() and force_tweet is False:
		logging.debug( "brains.run(): didn't meet tweet_chance of %2.1f" % bot_settings.tweet_chance )
		return

	do_tweet = False

	if force_tweet:
		logging.debug( "brains.run(): force_tweet is set" )
		do_tweet = True
	elif bot_settings.locquacity_onschedule:
		logging.debug( "brains.run(): will tweet on schedule" )
		do_tweet = True
	elif bot_settings.locquacity_speakonnew and statuses_digested > 0 :
		logging.debug( "brains.run(): locquacity_speakonnew, statuses_digested: %s" % statuses_digested )
		do_tweet = True

	# check deadline, defer tweeting if necessary
	if datetime.datetime.now() >= deadline:
		logging.debug( "brains.run(): aborted after put()'ing worker, deadline is looming." )
		taskqueue.add( url="/%s/run" % api.me().screen_name )
		return
	
	queen = verbivorejr.VerbivoreQueen()
	queen.deadline = deadline

	if do_tweet:
		tweet = None
		safety = 10
		while tweet is None and safety > 0:
			tweet = queen.secrete( 130 )
			safety = safety - 1
		if tweet is not None:
			tweet = verbivorejr.uc_first( tweet )
			post_tweet( api, tweet, debug=debug )

	replied_userids = []
	if bot_settings.locquacity_reply:
		
		last_replied_id = bot_state.last_replied_id	
		logging.debug( "brains.run(): last_replied_id is %s" % last_replied_id )
		mentions = api.mentions( since_id=last_replied_id )
		logging.debug( "-> %d mentions" % len(mentions) )

		my_name = "@%s" % creds.screen_name
		last_timestamp = None
		for mention in mentions:
			
			if datetime.datetime.now() >= deadline:
				break

			# only reply when we've been directly addressed
			#if mention.text[:len(my_name)] != my_name:
			#	break
			logging.debug( "-> reply to %s" % mention.author.screen_name )
			reply = "@%s" % mention.author.screen_name
			tweet = None
			safety = 5
			while tweet is None and safety > 0:
				logging.debug( "--> generate reply, safety=%d" % safety )
				if datetime.datetime.now() >= deadline:
					break
				tweet = queen.secrete_reply( mention.text, 130 - len(reply) )
				safety = safety -1

			if tweet is not None:
				reply = "%s %s" % (reply, tweet)
				post_tweet( api, reply, in_reply_to_status_id=mention.id, debug=debug )
				replied_userids.append( mention.author.id )

			this_timestamp = mention.created_at
			if last_timestamp is None or this_timestamp > last_timestamp:
				last_replied_id = mention.id_str
				last_timestamp = this_timestamp

		bot_state.last_replied_id = last_replied_id
		bot_state.put()


	if bot_settings.locquacity_greetnew:

		if datetime.datetime.now() >= deadline:
			logging.debug( "brains.run(): aborted before greeting new followers, deadline is looming." )
			return

		new_follower_ids = None
		stored_follower_ids = creds.follower_ids
		api_follower_ids = api.followers_ids()
		if stored_follower_ids is None:
			new_follower_ids = api_follower_ids
		else:
			new_follower_ids = []
			for api_follower_id in api_follower_ids:
				if api_follower_id not in stored_follower_ids:
					new_follower_ids.append( api_follower_id )

		if new_follower_ids is not None and len(new_follower_ids) > 0:
			logging.debug( "brains.run(): new_follower_ids: %s" % new_follower_ids )
			for new_follower_id in new_follower_ids:
				if new_follower_id not in replied_userids:
					tw_user = api.get_user( user_id=new_follower_id )
					screen_name = tw_user.screen_name
					safety = 5
					greeting = None
					while greeting is None and safety > 0:
						greeting = queen.secrete_greeting( screen_name, 130 )
					if greeting is not None:
						post_tweet( api, greeting, debug=debug )
		else:
			logging.debug( "brains.run(): no new followers" )

		creds.follower_ids = api_follower_ids
		creds.put()

	now = datetime.datetime.now()
	elapsed = now - then
	logging.debug( "brains.run(): completed in %d seconds" % elapsed.seconds )
def main():
    global expanded
    #To REFACTOR: Make this a file on disk that is read on start.
    staff = {'al3x': True, 'rsarver': True, 'kevinweil': True, 'jointheflock': True, 'squarecog': True, 'pothos': True, 'syou6162': True}
    crawl_deque = deque()
    idx = {}
    #Seed the crawler with an initial user, should be set on command line. DEGREE 0
    seed = twitter.get_user("datajunkie")
    crawl_deque.append(seed)
    crawl_deque.append("\n")
    degree = 2
    current_degree = 1  #1-current user + friends/followers, etc.
    start = time.time()
    deque_size = 0
    expanded = {}
    LOG = open("process.log","a")
    print >> LOG, "%s Twitter Process Started." % twitter.thetime()
    LOG.close()
    if len(sys.argv) > 1 and sys.argv[1] == "-r":
        LOG = open("process.log", "a")
        print >> LOG, "%s Resuming from crash." % twitter.thetime()
        LOG.close()
        crawl_deque = deque()
        populate_queue("queue.dat", crawl_deque)
        populate_dict("expanded.dat", expanded)
    while True:
        #Save state for iteration.
        #Save queue.
        save_state(crawl_deque, expanded)
        user = crawl_deque.popleft()
        if user == "\n":
            LOG = open("process.log","a")
            print >> LOG, "%s Queue now has size %d after degree %d." % (twitter.thetime(), deque_size, current_degree)
            crawl_deque.append("\n")
            end = time.time()
            print >> LOG, "%s Time required for degree %d was %s s." % (twitter.thetime(), current_degree, str(end-start))
            LOG.close()
            current_degree += 1
            if current_degree > degree:
                break
            start = time.time()
            continue
        #If this user has already been expanded (got friends and followers), don't do it again
        if already_expanded(user['screen_name']):
            continue
        #If this user's information has already been scraped, don't do it again.
        #if not already_scraped(idx, user['screen_name']):      #in_index
        dump(user)
            #idx[user['screen_name']] = user['id']     #mark the user as scraped.
            #TO DO: Print user info to file.
        LOG = open("process.log", "a")
        print >> LOG, "%s Getting friends and followers for %s." % (twitter.thetime(), user['screen_name'])
        LOG.close()
        #Check that the user is not a crawler bomb.
        if user.has_key('friends_count') and user.has_key('followers_count') and \
            user['friends_count'] + user['followers_count'] > 15000:
            twitter.log(twitter.thetime(), "WARN", "NA", user['screen_name'], "SKIP", "NA") 
            continue
        friends, followers = get_ff(user['screen_name'])
        if friends == -1 or followers == -1:
            B = open("blacklist", "a")
            print >> B, user['screen_name']
            B.close()
            if friends == -1:
                print "Getting friends for", user['screen_name'], "failed."
            if followers == -1:
                print "Getting followers for", user['screen_name'], "failed."
            continue
        for friend in friends:
            #if user information has already been scraped, don't do it again.
            #if not already_scraped(idx, friend['screen_name']):
                #user_data = twitter.get_user(friend['screen_name'])
            dump(friend)
                #idx[friend['screen_name']] = friend['id']    #Mark the user as scraped.
            if friend['screen_name'] not in staff:
                crawl_deque.append(friend)
                deque_size += 1
            GRAPH = open("graph.log","a")
            print >> GRAPH, ','.join([user['screen_name'], friend['screen_name']])
            GRAPH.close()
        for follower in followers:
            #if the node exists, get it. Otherwise, create it.
            #if not already_scraped(idx, follower['screen_name']):
                #user_data = twitter.get_user(follower['screen_name'])
            if len(follower) == 0:
                continue
            dump(follower)
            #idx[follower['screen_name']] = follower['id']  #mark user as scraped
            if follower['screen_name'] not in staff:
                crawl_deque.append(follower)
                deque_size += 1
            GRAPH = open("graph.log","a")
            print >> GRAPH, ','.join([follower['screen_name'], user['screen_name']])
            GRAPH.close()
        expanded[user['screen_name']] = True
        OUT = open("index.pickle", "w")
        cPickle.dump(idx, OUT)
        OUT.close()