class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs) self.logger = Logger(self.__class__.__name__) def store(self, graphs, start_date, end_date): """ Store main graph and all topic graphs into collection. """ documents = [{ 'topic_id': key, 'graph': graph, 'start_date': start_date, 'end_date': end_date } for key, graph in graphs.items()] self.collection.insert_many(documents) def get_all_sorted_topics(self): graphs = self.get_all({}, {'topic_id': 1}) topic_ids = set() for graph in graphs: topic_ids.add(graph['topic_id']) topics_list = sorted(list(topic_ids)) return [str(topic) for topic in topics_list] def create_indexes(self): self.logger.info( 'Creating topic_id index for collection cooccurrence_graphs.') Mongo().get().db.cooccurrence_graphs.create_index([ ('topic_id', pymongo.DESCENDING) ])
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs) self.logger = Logger(self.__class__.__name__) def create_indexes(self): self.logger.info('Creating topic_id index for collection cooccurrence_graphs.') Mongo().get().db.cooccurrence_graphs.create_index([('topic_id', pymongo.DESCENDING)])
class RawFollowerDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers) self.logger = Logger(self.__class__.__name__) def create_indexes(self): self.logger.info( 'Creating has_tweets index for collection raw_followers.') Mongo().get().db.raw_followers.create_index([('has_tweets', pymongo.DESCENDING)])
class CredentialService(metaclass=Singleton): CREDENTIALS_PATH = f"{abspath(join(dirname(__file__), '../../..'))}/twitter_credentials.json" def __init__(self): self.logger = Logger(self.__class__.__name__) self.in_use = set() self.credentials = [] # Load credentials file and create objects to access their elements try: with open(CredentialService.CREDENTIALS_PATH, 'r') as file: loaded = json.load(file) for value in loaded: self.credentials.append(Credential(**value)) except IOError: self.logger.error('Credentials file do not found') def get_all_credentials_for_service(self, service_id): """ Return all credentials for a given service. """ self.logger.info( f'Returning all credentials for service {service_id}.') # Check if some credential has already been assigned for credential in self.credentials: if f"{credential.id}-{service_id}" in self.in_use: raise CredentialsAlreadyInUseError(service_id) self.logger.info('Checked credentials') # Store in the in use set. We iterate twice because the number of credentials is small and it is easier than # doing rollbacks with the already stored credentials if we need to raise an exception for credential in self.credentials: self.in_use.add(f"{credential.id}-{service_id}") return self.credentials def get_credential_for_service(self, service_id): """ Get credential if current service is not using all of the available credentials. """ for credential in self.credentials: if f"{credential.id}-{service_id}" not in self.in_use: self.logger.info( f'Returning credential {credential.id} for service {service_id}.' ) self.in_use.add(f"{credential.id}-{service_id}") return credential raise NoAvailableCredentialsError(service_id) def get_credential_with_id_for_service(self, credential_id, service_id): """ Get credential if current service is not using all of the available credentials. """ for credential in self.credentials: if credential_id == credential.id and f"{credential.id}-{service_id}" not in self.in_use: self.logger.info( f'Returning credential {credential.id} for service {service_id}.' ) self.in_use.add(f"{credential.id}-{service_id}") return credential raise NoAvailableCredentialsError(service_id) def unlock_credential(self, credential_id, service_id): """ Unlock credential for a given service. """ key = f'{credential_id}-{service_id}' if key not in self.in_use: raise CredentialCurrentlyAvailableError(key) self.logger.info( f'Unlocking credential {credential_id} for service {service_id}.') self.in_use.remove(key)
class CandidateDAO(GenericDAO, metaclass=Singleton): FILE_PATH = f"{abspath(join(dirname(__file__), '../../'))}/resources/candidates.json" def __init__(self): super(CandidateDAO, self).__init__(Mongo().get().db.candidates) self.logger = Logger(self.__class__.__name__) def find(self, screen_name): """ Get user with given screen name. """ as_dict = self.get_first({'_id': screen_name}) if as_dict is None: raise NonExistentCandidateError(screen_name) # Transform from DB format to DTO format as_dict['screen_name'] = as_dict['_id'] return Candidate(**as_dict) def overwrite(self, candidate): """ Update candidate's fields (except for screen name). """ self.update_first( {'_id': candidate.screen_name}, { 'nickname': candidate.nickname, 'last_updated_followers': candidate.last_updated_followers }) def save(self, candidate): """ Store candidate. """ # Transform from DTO format to DB format to_insert = { '_id': candidate.screen_name, 'nickname': candidate.nickname, 'last_updated_followers': candidate.last_updated_followers } return self.insert(to_insert) def all(self): """ Get all currently stored candidates. """ candidates = [] as_dict_list = self.get_all() for as_dict in as_dict_list: # Transform from DB format to DTO format as_dict['screen_name'] = as_dict['_id'] candidates.append(Candidate(**as_dict)) return candidates def create_indexes(self): # There are no indexes to create for this collection pass def create_base_entries(self): # Check if collection is empty if self.get_all().count() > 0: return # Load candidates self.logger.info('Loading candidates from file into database.') with open(CandidateDAO.FILE_PATH, 'r') as file: candidates = json.load(file) # Store entries for candidate in candidates: # Transform for database format to_insert = { '_id': candidate['screen_name'], 'nickname': candidate['nickname'] } self.insert(to_insert) def update_json_resource(self, candidate): """ Add candidate to json file. """ self.logger.info( f'Storing candidate {candidate.screen_name} into file.') with open(CandidateDAO.FILE_PATH, 'r') as file: candidates = json.load(file) # Append new candidate candidates.append({ 'screen_name': candidate.screen_name, 'nickname': candidate.nickname }) # Write to file with open(CandidateDAO.FILE_PATH, 'w') as file: json.dump(candidates, file) def get_required_candidates(self): """ Retrieve dictionary like: {candidate: index}. """ candidates = self.get_all({'index': {'$exists': True}}) candidate_index = {} candidate_group = {} for candidate in candidates: candidate_index[candidate['_id']] = candidate['index'] candidate_group[candidate['index']] = candidate['group'] return candidate_index, candidate_group
class CandidateService(metaclass=Singleton): def __init__(self): self.logger = Logger(self.__class__.__name__) self.updating_followers = set() self.candidates = [] # Load candidates from db and create objects to access their elements self.candidates = CandidateDAO().all() ConcurrencyUtils().create_lock('candidate_for_update') def get_all(self): """ Returns all candidates currently in the list. """ return self.candidates def get_for_follower_updating(self): """ Polls a candidate for updating its follower list. """ # Lock to avoid concurrency issues when retrieving candidates across threads ConcurrencyUtils().acquire_lock('candidate_for_update') for candidate in self.candidates: # We will only return a candidate if it was not updated today and is not being currently updated if candidate not in self.updating_followers and not DateUtils.is_today( candidate.last_updated_followers): self.logger.info( f'Returning candidate {candidate.screen_name} for follower retrieval.' ) self.updating_followers.add(candidate) # Unlock ConcurrencyUtils().release_lock('candidate_for_update') return candidate # Unlock ConcurrencyUtils().release_lock('candidate_for_update') raise FollowerUpdatingNotNecessaryError() def finish_follower_updating(self, candidate): """ Unlock user for follower updating and update last updating time. """ if candidate not in self.updating_followers: raise CandidateCurrentlyAvailableForUpdateError( candidate.screen_name) # Update last updated followers date self.logger.info( f'Removing candidate {candidate.screen_name} from currently updating set.' ) candidate.last_updated_followers = datetime.now() CandidateDAO().overwrite(candidate) # Remove from set to not be polled again self.updating_followers.remove(candidate) def add_candidate(self, screen_name, nickname=None): """ Add a candidate with given screen name and nickname to the database and to the json file. """ try: CandidateDAO().find(screen_name) except NonExistentCandidateError: self.logger.info(f'Adding candidate {screen_name} to database.') candidate = Candidate(**{ 'screen_name': screen_name, 'nickname': nickname }) # Store in database CandidateDAO().save(candidate) # Update json resource CandidateDAO().update_json_resource(candidate) # Update current structure self.candidates.append(candidate) return raise CandidateAlreadyExistsError(screen_name)
class FollowersQueueService(metaclass=Singleton): def __init__(self): self.logger = Logger(self.__class__.__name__) self.updating_followers = {} self.priority_updating_followers = {} self.processing_followers = set() ConcurrencyUtils().create_lock('followers_for_update_tweets') def get_followers_to_update(self, followers_to_delete): # Acquire lock for get the followers ConcurrencyUtils().acquire_lock('followers_for_update_tweets') self.logger.info(f'Getting followers to update their tweets. Queue\'s size: {len(self.updating_followers)} ') followers_to_update = self.try_to_get_priority_followers() if len(followers_to_update) == 0: followers_to_update = self.get_followers_with_tweets_to_update() self.processing_followers.update(set(followers_to_update.keys())) self.processing_followers = self.processing_followers.difference(followers_to_delete) ConcurrencyUtils().release_lock('followers_for_update_tweets') return followers_to_update def try_to_get_priority_followers(self): # If we have recent downloaded followers users_to_update = {} if len(self.priority_updating_followers) != 0: self.logger.warning(f'Getting {len(self.priority_updating_followers)} recent downloaded followers.') users_to_update = self.priority_updating_followers.copy() self.priority_updating_followers = {} return users_to_update def get_followers_with_tweets_to_update(self): """ Get followers with tweets to update. """ max_users_per_window = ConfigurationManager().get_int('max_users_per_window') self.check_if_have_followers(max_users_per_window) # Get the min follower's quantity between length and max_users min_length = min(max_users_per_window, len(self.updating_followers.keys())) random_followers_keys = random.sample(self.updating_followers.keys(), min_length) # Remove selected followers followers_to_update = {} for follower in random_followers_keys: followers_to_update[follower] = self.updating_followers.pop(follower) return followers_to_update def check_if_have_followers(self, max_users_per_window): if len(self.updating_followers) <= 2 * max_users_per_window: # Retrieve more candidates from db self.add_followers_to_be_updated() if len(self.updating_followers) == 0: SlackHelper().post_message_to_channel( "No se obtuvieron seguidores de la base de datos.") self.logger.error('There are not followers to update their tweets.') raise NoMoreFollowersToUpdateTweetsError() def add_followers_to_be_updated(self, timedelta=180): self.logger.info( f'Adding new followers to update their tweets. Actual size: {str(len(self.updating_followers))}') followers = RawFollowerDAO().get_random_followers_sample(list(self.processing_followers), timedelta) new_followers = self.add_followers(followers) if len(new_followers) == 0: # If there are no new results self.logger.error('Can\'t retrieve followers to update their tweets. ') raise NoMoreFollowersToUpdateTweetsError() self.updating_followers.update(new_followers) def add_not_updated_followers_2(self): self.logger.info( f'Adding not updated followers.') self.add_followers_to_be_updated(85) def add_not_updated_followers_1(self): self.logger.info( f'Adding not updated followers.') self.add_followers_to_be_updated(95) def add_last_downloaded_followers(self): self.logger.info('Adding last downloaded followers') users_to_be_updated = RawFollowerDAO().get_all({ '$and': [ {'has_tweets': {'$exists': False}}, {'is_private': {'$ne': True}} ]}) followers = self.add_followers(users_to_be_updated) self.priority_updating_followers.update(followers) self.logger.info('Finishing insertion of last downloaded followers') def add_followers(self, downloaded): followers = {} for follower in downloaded: date = datetime(2019, 1, 1) if 'last_tweet_date' in follower and follower['last_tweet_date'] is not None: date = follower['last_tweet_date'] if date is None: self.logger.warning(f"None type for: {follower['_id']}") followers[follower['_id']] = date self.logger.info(f"Added {len(followers)} to queue.") return followers