def _initial_populate(self): '''Gather a group of TwitterUsers. Tries to choose a highly interconnected group.''' root_user = TwitterUser(self._root_user_id) root_node = root_user.get_all_data() self._community_members.append(root_node) root_score = self._filled_user_score(root_node) #user scores determine how interconnected a user is self._node_pool[self._root_user_id] = {'user':root_node, 'score':root_score} logging.debug('Adding root to node_pool') self.save() self._resume_populate()
def _initial_populate(self): '''Gather a group of TwitterUsers. Tries to choose a highly interconnected group.''' root_user = TwitterUser(self._root_user_id) root_node = root_user.get_all_data() self._community_members.append(root_node) root_score = self._filled_user_score(root_node) #user scores determine how interconnected a user is self._node_pool[self._root_user_id] = { 'user': root_node, 'score': root_score } logging.debug('Adding root to node_pool') self.save() self._resume_populate()
def _resume_populate(self): '''Gather a group of TwitterUsers. Tries to choose a highly interconnected group. Picks up where a previous _populate() call stopped''' while self._node_pool and len(self._community_members) < \ self._max_population: ''' choose person on list with highest interconnection first 0 at end of line is to take first user, second 0 get's user's id from tuple result ''' self._rescore_node_pool() highest_scoring_id = sorted(self._node_pool.items(), key=lambda item: item[1]['score'], reverse=True)[0][0] curr_node = self._node_pool[highest_scoring_id]['user'] #choose friends by rank to add to community, seems to work #better then followers friend_ids = self._sort_by_empty_score(curr_node['friend_ids']) added_count = 0 community_ids = [] for member in self._community_members: community_ids.append(member['uid']) #flag to delete the id we are using from the node pool when finished with it delete_highest_scoring_id = True for friend_id in friend_ids: if added_count <= self._max_friends_per_user: try: if friend_id not in community_ids: tu = TwitterUser(friend_id) sleep(1) new_user = tu.get_all_data() #TODO add some sort of conditions for addition to the community self._community_members.append(new_user) logging.debug('TwitterUser accepted to community') new_user_score = self._filled_user_score(new_user) self._node_pool[friend_id] = {'user':new_user, 'score':new_user_score} if self._safe: self.save() print len(self._community_members), "members" added_count += 1 logging.debug('TwitterUser accepted to node_pool') delete_highest_scoring_id = True except TwitterHTTPError as error: logging.debug('Twitter error: %s' % error) self.save() print "Number of members: ", len(self._community_members) #rate limiting error if '400' in str(error) or '420' in str(error): delete_highest_scoring_id = False logging.debug('Hit rate limit, quitting') return #unauthorized for user error elif '401' in str(error) or '404' in str(error): delete_highest_scoring_id = True if self._safe: self.save() #otherwise it's probably just a twitter server issue else: delete_highest_scoring_id = False logging.debug('Server error, sleeping for 5 secs') sleep(5) except BadUser as error: delete_highest_scoring_id = True logging.debug('TwitterUser rejected: %s' % error) except URLError: delete_highest_scoring_id = False logging.debug('URLError, sleeping for 5 secs') sleep(5) #once a user is chosen for evaluation pop him off the node list del self._node_pool[highest_scoring_id] if self._safe: self.save() logging.debug('Deleting node from node pool') self.save() print "Maximum community size reached." print "Number of members: ", len(self._community_members)
def _resume_populate(self): '''Gather a group of TwitterUsers. Tries to choose a highly interconnected group. Picks up where a previous _populate() call stopped''' while self._node_pool and len(self._community_members) < \ self._max_population: ''' choose person on list with highest interconnection first 0 at end of line is to take first user, second 0 get's user's id from tuple result ''' self._rescore_node_pool() highest_scoring_id = sorted(self._node_pool.items(), key=lambda item: item[1]['score'], reverse=True)[0][0] curr_node = self._node_pool[highest_scoring_id]['user'] #choose friends by rank to add to community, seems to work #better then followers friend_ids = self._sort_by_empty_score(curr_node['friend_ids']) added_count = 0 community_ids = [] for member in self._community_members: community_ids.append(member['uid']) #flag to delete the id we are using from the node pool when finished with it delete_highest_scoring_id = True for friend_id in friend_ids: if added_count <= self._max_friends_per_user: try: if friend_id not in community_ids: tu = TwitterUser(friend_id) sleep(1) new_user = tu.get_all_data() #TODO add some sort of conditions for addition to the community self._community_members.append(new_user) logging.debug('TwitterUser accepted to community') new_user_score = self._filled_user_score(new_user) self._node_pool[friend_id] = { 'user': new_user, 'score': new_user_score } if self._safe: self.save() print len(self._community_members), "members" added_count += 1 logging.debug('TwitterUser accepted to node_pool') delete_highest_scoring_id = True except TwitterHTTPError as error: logging.debug('Twitter error: %s' % error) self.save() print "Number of members: ", len( self._community_members) #rate limiting error if '400' in str(error) or '420' in str(error): delete_highest_scoring_id = False logging.debug('Hit rate limit, quitting') return #unauthorized for user error elif '401' in str(error) or '404' in str(error): delete_highest_scoring_id = True if self._safe: self.save() #otherwise it's probably just a twitter server issue else: delete_highest_scoring_id = False logging.debug('Server error, sleeping for 5 secs') sleep(5) except BadUser as error: delete_highest_scoring_id = True logging.debug('TwitterUser rejected: %s' % error) except URLError: delete_highest_scoring_id = False logging.debug('URLError, sleeping for 5 secs') sleep(5) #once a user is chosen for evaluation pop him off the node list del self._node_pool[highest_scoring_id] if self._safe: self.save() logging.debug('Deleting node from node pool') self.save() print "Maximum community size reached." print "Number of members: ", len(self._community_members)