Exemple #1
0
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton):
    def __init__(self):
        super(CooccurrenceGraphDAO,
              self).__init__(Mongo().get().db.cooccurrence_graphs)
        self.logger = Logger(self.__class__.__name__)

    def store(self, graphs, start_date, end_date):
        """ Store main graph and all topic graphs into collection. """
        documents = [{
            'topic_id': key,
            'graph': graph,
            'start_date': start_date,
            'end_date': end_date
        } for key, graph in graphs.items()]
        self.collection.insert_many(documents)

    def get_all_sorted_topics(self):
        graphs = self.get_all({}, {'topic_id': 1})
        topic_ids = set()
        for graph in graphs:
            topic_ids.add(graph['topic_id'])
        topics_list = sorted(list(topic_ids))
        return [str(topic) for topic in topics_list]

    def create_indexes(self):
        self.logger.info(
            'Creating topic_id index for collection cooccurrence_graphs.')
        Mongo().get().db.cooccurrence_graphs.create_index([
            ('topic_id', pymongo.DESCENDING)
        ])
Exemple #2
0
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton):

    def __init__(self):
        super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs)
        self.logger = Logger(self.__class__.__name__)

    def create_indexes(self):
        self.logger.info('Creating topic_id index for collection cooccurrence_graphs.')
        Mongo().get().db.cooccurrence_graphs.create_index([('topic_id', pymongo.DESCENDING)])
Exemple #3
0
class RawFollowerDAO(GenericDAO, metaclass=Singleton):
    def __init__(self):
        super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers)
        self.logger = Logger(self.__class__.__name__)

    def create_indexes(self):
        self.logger.info(
            'Creating has_tweets index for collection raw_followers.')
        Mongo().get().db.raw_followers.create_index([('has_tweets',
                                                      pymongo.DESCENDING)])
Exemple #4
0
class CredentialService(metaclass=Singleton):

    CREDENTIALS_PATH = f"{abspath(join(dirname(__file__), '../../..'))}/twitter_credentials.json"

    def __init__(self):
        self.logger = Logger(self.__class__.__name__)
        self.in_use = set()
        self.credentials = []
        # Load credentials file and create objects to access their elements
        try:
            with open(CredentialService.CREDENTIALS_PATH, 'r') as file:
                loaded = json.load(file)
                for value in loaded:
                    self.credentials.append(Credential(**value))
        except IOError:
            self.logger.error('Credentials file do not found')

    def get_all_credentials_for_service(self, service_id):
        """ Return all credentials for a given service. """
        self.logger.info(
            f'Returning all credentials for service {service_id}.')
        # Check if some credential has already been assigned
        for credential in self.credentials:
            if f"{credential.id}-{service_id}" in self.in_use:
                raise CredentialsAlreadyInUseError(service_id)
        self.logger.info('Checked credentials')
        # Store in the in use set. We iterate twice because the number of credentials is small and it is easier than
        # doing rollbacks with the already stored credentials if we need to raise an exception
        for credential in self.credentials:
            self.in_use.add(f"{credential.id}-{service_id}")
        return self.credentials

    def get_credential_for_service(self, service_id):
        """ Get credential if current service is not using all of the available credentials. """
        for credential in self.credentials:
            if f"{credential.id}-{service_id}" not in self.in_use:
                self.logger.info(
                    f'Returning credential {credential.id} for service {service_id}.'
                )
                self.in_use.add(f"{credential.id}-{service_id}")
                return credential
        raise NoAvailableCredentialsError(service_id)

    def get_credential_with_id_for_service(self, credential_id, service_id):
        """ Get credential if current service is not using all of the available credentials. """
        for credential in self.credentials:
            if credential_id == credential.id and f"{credential.id}-{service_id}" not in self.in_use:
                self.logger.info(
                    f'Returning credential {credential.id} for service {service_id}.'
                )
                self.in_use.add(f"{credential.id}-{service_id}")
                return credential
        raise NoAvailableCredentialsError(service_id)

    def unlock_credential(self, credential_id, service_id):
        """ Unlock credential for a given service. """
        key = f'{credential_id}-{service_id}'
        if key not in self.in_use:
            raise CredentialCurrentlyAvailableError(key)
        self.logger.info(
            f'Unlocking credential {credential_id} for service {service_id}.')
        self.in_use.remove(key)
Exemple #5
0
class CandidateDAO(GenericDAO, metaclass=Singleton):

    FILE_PATH = f"{abspath(join(dirname(__file__), '../../'))}/resources/candidates.json"

    def __init__(self):
        super(CandidateDAO, self).__init__(Mongo().get().db.candidates)
        self.logger = Logger(self.__class__.__name__)

    def find(self, screen_name):
        """ Get user with given screen name. """
        as_dict = self.get_first({'_id': screen_name})
        if as_dict is None:
            raise NonExistentCandidateError(screen_name)
        # Transform from DB format to DTO format
        as_dict['screen_name'] = as_dict['_id']
        return Candidate(**as_dict)

    def overwrite(self, candidate):
        """ Update candidate's fields (except for screen name). """
        self.update_first(
            {'_id': candidate.screen_name}, {
                'nickname': candidate.nickname,
                'last_updated_followers': candidate.last_updated_followers
            })

    def save(self, candidate):
        """ Store candidate. """
        # Transform from DTO format to DB format
        to_insert = {
            '_id': candidate.screen_name,
            'nickname': candidate.nickname,
            'last_updated_followers': candidate.last_updated_followers
        }
        return self.insert(to_insert)

    def all(self):
        """ Get all currently stored candidates. """
        candidates = []
        as_dict_list = self.get_all()
        for as_dict in as_dict_list:
            # Transform from DB format to DTO format
            as_dict['screen_name'] = as_dict['_id']
            candidates.append(Candidate(**as_dict))
        return candidates

    def create_indexes(self):
        # There are no indexes to create for this collection
        pass

    def create_base_entries(self):
        # Check if collection is empty
        if self.get_all().count() > 0:
            return
        # Load candidates
        self.logger.info('Loading candidates from file into database.')
        with open(CandidateDAO.FILE_PATH, 'r') as file:
            candidates = json.load(file)
        # Store entries
        for candidate in candidates:
            # Transform for database format
            to_insert = {
                '_id': candidate['screen_name'],
                'nickname': candidate['nickname']
            }
            self.insert(to_insert)

    def update_json_resource(self, candidate):
        """ Add candidate to json file. """
        self.logger.info(
            f'Storing candidate {candidate.screen_name} into file.')
        with open(CandidateDAO.FILE_PATH, 'r') as file:
            candidates = json.load(file)
        # Append new candidate
        candidates.append({
            'screen_name': candidate.screen_name,
            'nickname': candidate.nickname
        })
        # Write to file
        with open(CandidateDAO.FILE_PATH, 'w') as file:
            json.dump(candidates, file)

    def get_required_candidates(self):
        """ Retrieve dictionary like: {candidate: index}. """
        candidates = self.get_all({'index': {'$exists': True}})
        candidate_index = {}
        candidate_group = {}
        for candidate in candidates:
            candidate_index[candidate['_id']] = candidate['index']
            candidate_group[candidate['index']] = candidate['group']
        return candidate_index, candidate_group
Exemple #6
0
class CandidateService(metaclass=Singleton):
    def __init__(self):
        self.logger = Logger(self.__class__.__name__)
        self.updating_followers = set()
        self.candidates = []
        # Load candidates from db and create objects to access their elements
        self.candidates = CandidateDAO().all()
        ConcurrencyUtils().create_lock('candidate_for_update')

    def get_all(self):
        """ Returns all candidates currently in the list. """
        return self.candidates

    def get_for_follower_updating(self):
        """ Polls a candidate for updating its follower list. """
        # Lock to avoid concurrency issues when retrieving candidates across threads
        ConcurrencyUtils().acquire_lock('candidate_for_update')
        for candidate in self.candidates:
            # We will only return a candidate if it was not updated today and is not being currently updated
            if candidate not in self.updating_followers and not DateUtils.is_today(
                    candidate.last_updated_followers):
                self.logger.info(
                    f'Returning candidate {candidate.screen_name} for follower retrieval.'
                )
                self.updating_followers.add(candidate)
                # Unlock
                ConcurrencyUtils().release_lock('candidate_for_update')
                return candidate
        # Unlock
        ConcurrencyUtils().release_lock('candidate_for_update')
        raise FollowerUpdatingNotNecessaryError()

    def finish_follower_updating(self, candidate):
        """ Unlock user for follower updating and update last updating time. """
        if candidate not in self.updating_followers:
            raise CandidateCurrentlyAvailableForUpdateError(
                candidate.screen_name)
        # Update last updated followers date
        self.logger.info(
            f'Removing candidate {candidate.screen_name} from currently updating set.'
        )
        candidate.last_updated_followers = datetime.now()
        CandidateDAO().overwrite(candidate)
        # Remove from set to not be polled again
        self.updating_followers.remove(candidate)

    def add_candidate(self, screen_name, nickname=None):
        """ Add a candidate with given screen name and nickname to the database and to the json file. """
        try:
            CandidateDAO().find(screen_name)
        except NonExistentCandidateError:
            self.logger.info(f'Adding candidate {screen_name} to database.')
            candidate = Candidate(**{
                'screen_name': screen_name,
                'nickname': nickname
            })
            # Store in database
            CandidateDAO().save(candidate)
            # Update json resource
            CandidateDAO().update_json_resource(candidate)
            # Update current structure
            self.candidates.append(candidate)
            return
        raise CandidateAlreadyExistsError(screen_name)
Exemple #7
0
class FollowersQueueService(metaclass=Singleton):

    def __init__(self):
        self.logger = Logger(self.__class__.__name__)
        self.updating_followers = {}
        self.priority_updating_followers = {}
        self.processing_followers = set()
        ConcurrencyUtils().create_lock('followers_for_update_tweets')

    def get_followers_to_update(self, followers_to_delete):
        # Acquire lock for get the followers
        ConcurrencyUtils().acquire_lock('followers_for_update_tweets')
        self.logger.info(f'Getting followers to update their tweets. Queue\'s size: {len(self.updating_followers)} ')

        followers_to_update = self.try_to_get_priority_followers()
        if len(followers_to_update) == 0:
            followers_to_update = self.get_followers_with_tweets_to_update()

        self.processing_followers.update(set(followers_to_update.keys()))
        self.processing_followers = self.processing_followers.difference(followers_to_delete)
        ConcurrencyUtils().release_lock('followers_for_update_tweets')

        return followers_to_update

    def try_to_get_priority_followers(self):
        # If we have recent downloaded followers
        users_to_update = {}
        if len(self.priority_updating_followers) != 0:
            self.logger.warning(f'Getting {len(self.priority_updating_followers)} recent downloaded followers.')
            users_to_update = self.priority_updating_followers.copy()
            self.priority_updating_followers = {}
        return users_to_update

    def get_followers_with_tweets_to_update(self):
        """ Get followers with tweets to update. """
        max_users_per_window = ConfigurationManager().get_int('max_users_per_window')

        self.check_if_have_followers(max_users_per_window)

        # Get the min follower's quantity between length and max_users
        min_length = min(max_users_per_window, len(self.updating_followers.keys()))
        random_followers_keys = random.sample(self.updating_followers.keys(), min_length)

        # Remove selected followers
        followers_to_update = {}
        for follower in random_followers_keys:
            followers_to_update[follower] = self.updating_followers.pop(follower)
        return followers_to_update

    def check_if_have_followers(self, max_users_per_window):

        if len(self.updating_followers) <= 2 * max_users_per_window:
            # Retrieve more candidates from db
            self.add_followers_to_be_updated()

        if len(self.updating_followers) == 0:
            SlackHelper().post_message_to_channel(
                "No se obtuvieron seguidores de la base de datos.")
            self.logger.error('There are not followers to update their tweets.')
            raise NoMoreFollowersToUpdateTweetsError()

    def add_followers_to_be_updated(self, timedelta=180):
        self.logger.info(
            f'Adding new followers to update their tweets. Actual size: {str(len(self.updating_followers))}')
        followers = RawFollowerDAO().get_random_followers_sample(list(self.processing_followers), timedelta)
        new_followers = self.add_followers(followers)
        if len(new_followers) == 0:
            # If there are no new results
            self.logger.error('Can\'t retrieve followers to update their tweets. ')
            raise NoMoreFollowersToUpdateTweetsError()
        self.updating_followers.update(new_followers)

    def add_not_updated_followers_2(self):
        self.logger.info(
            f'Adding not updated followers.')
        self.add_followers_to_be_updated(85)

    def add_not_updated_followers_1(self):
        self.logger.info(
            f'Adding not updated followers.')
        self.add_followers_to_be_updated(95)

    def add_last_downloaded_followers(self):
        self.logger.info('Adding last downloaded followers')
        users_to_be_updated = RawFollowerDAO().get_all({
            '$and': [
                {'has_tweets': {'$exists': False}},
                {'is_private': {'$ne': True}}
            ]})
        followers = self.add_followers(users_to_be_updated)
        self.priority_updating_followers.update(followers)
        self.logger.info('Finishing insertion of last downloaded followers')

    def add_followers(self, downloaded):
        followers = {}
        for follower in downloaded:
            date = datetime(2019, 1, 1)
            if 'last_tweet_date' in follower and follower['last_tweet_date'] is not None:
                date = follower['last_tweet_date']
            if date is None:
                self.logger.warning(f"None type for: {follower['_id']}")
            followers[follower['_id']] = date
        self.logger.info(f"Added {len(followers)} to queue.")
        return followers