Beispiel #1
0
class Generic_Twitter_Crawler(object):
    def __init__(self, db):
        # AbstractController.__init__(self, db)
        self._db = db
        self._twitter_rest_api = Twitter_Rest_Api(db)
        self._config_parser = getConfig()
        self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))
        self._users_to_add = []
        self._post_to_add = []

    def retrieve_and_save_data_from_twitter_by_terms(self, keywords, terms,
                                                     topics):
        posts, total_twitter_users, connections = self.get_posts_and_authors_by_terms(
            keywords)
        self._db.addPosts(posts)
        self._add_users_to_db(total_twitter_users)

        self._db.addPosts(terms)
        self._db.addPosts(topics)
        self._db.addPosts(connections)

    def commit_db(self):
        self._db.addPosts(self._post_to_add)
        self._add_users_to_db(self._users_to_add)
        self._db.commit()
        self._users_to_add = []
        self._post_to_add = []

    def retrieve_and_save_data_from_twitter_by_post_id(self, post_id, label):
        post, user = self.get_post_and_author_by_post_id(post_id)
        try:
            converted_post = self._db.create_post_from_tweet_data(
                post, self._domain)
            converted_post.post_type = label
            self._users_to_add.append(user)
            self._post_to_add.append(converted_post)

        except TwitterError as e:
            exception_response = e[0][0]
            logging.info("e.massage =" + exception_response["message"])
            code = exception_response["code"]
            logging.info("e.code =" + str(exception_response["code"]))

            if code == 88:
                sec = self._twitter_rest_api.get_sleep_time_for_twitter_status_id(
                )
                logging.info("Seconds to wait from catched crush is: " +
                             str(sec))
                if sec != 0:
                    commons.count_down_time(sec)
                    self._num_of_twitter_status_id_requests = 0
                return self.retrieve_and_save_data_from_twitter_by_post_id(
                    post_id, label)

    def get_posts_and_authors_by_terms(self, keywords):
        term_tweets_dict = self.get_posts_by_terms(keywords)
        total_twitter_users = []
        total_posts = []
        connections = []
        for term, tweets in term_tweets_dict.iteritems():
            posts = []
            for tweet in tweets:
                post = self._db.create_post_from_tweet_data(
                    tweet, self._domain)
                term_post_connection, term_author_connection = self._create_connections(
                    term, post)
                connections.append(term_post_connection)
                connections.append(term_author_connection)
                posts.append(post)
            total_posts += posts
            #posts += [self._db.create_post_from_tweet_data(tweet, self._domain) for tweet in term_tweets_dict[term]]
            total_twitter_users += [
                post.user for post in term_tweets_dict[term]
            ]
        return total_posts, total_twitter_users, connections

    def get_post_and_author_by_post_id(self, post_id):
        post = self._twitter_rest_api.get_post_by_post_id(post_id)
        user = post.user
        return post, user

    def _add_users_to_db(self, total_twitter_users):
        author_type = None
        insertion_type = None
        self._twitter_rest_api.save_authors_and_connections(
            total_twitter_users, author_type, insertion_type)

    def get_posts_by_terms(self, terms):
        return self._twitter_rest_api.get_posts_by_terms(terms)

    def _create_connections(self, term, post):
        term_post_connection = AuthorConnection()

        term_post_connection.source_author_guid = term
        term_post_connection.destination_author_guid = post.post_id
        term_post_connection.connection_type = u"term-post"

        term_author_connection = AuthorConnection()
        term_author_connection.source_author_guid = term
        term_author_connection.destination_author_guid = post.author_guid
        term_author_connection.connection_type = u"term-author"

        return term_post_connection, term_author_connection
class MissingDataComplementor(Method_Executor):
    def __init__(self, db):
        Method_Executor.__init__(self, db)
        self._actions = self._config_parser.eval(self.__class__.__name__,
                                                 "actions")

        self._minimal_num_of_posts = self._config_parser.eval(
            self.__class__.__name__, "minimal_num_of_posts")
        self._limit_friend_follower_number = self._config_parser.eval(
            self.__class__.__name__, "limit_friend_follower_number")
        self._maximal_tweets_count_in_timeline = self._config_parser.eval(
            self.__class__.__name__, "maximal_tweets_count_in_timeline")

        self._found_twitter_users = []
        self._social_network_crawler = Twitter_Rest_Api(db)
        self._suspended_authors = []
        self._max_users_without_saving = self._config_parser.eval(
            self.__class__.__name__, "max_users_without_saving")
        self._posts = []
        self._authors = []
        self._post_citatsions = []

    def setUp(self):
        pass

    def fill_author_guid_to_posts(self):
        posts = self._db.get_posts()
        num_of_posts = len(posts)
        for i, post in enumerate(posts):
            msg = "\rPosts to fill: [{0}/{1}]".format(i, num_of_posts)
            print(msg, end="")
            post.author_guid = compute_author_guid_by_author_name(post.author)
        self._db.addPosts(posts)
        self._db.insert_or_update_authors_from_posts(self._domain, {}, {})

    def fill_data_for_followers(self):
        self._fill_data_for_author_connection_type(
            Author_Connection_Type.FOLLOWER)
        logging.info("---Finished crawl_followers_by_author_ids")

    def fill_data_for_friends(self):
        self._fill_data_for_author_connection_type(
            Author_Connection_Type.FRIEND)
        logging.info("---Finished crawl_friends_by_author_ids")

    def _fill_data_for_author_connection_type(self, connection_type):
        # TEST
        self._db.get_authors_by_domain("Microblog")
        # TEST

        cursor = self._db.get_followers_or_friends_candidats(
            connection_type, self._domain, self._limit_friend_follower_number)
        followers_or_friends_candidats = self._db.result_iter(cursor)
        followers_or_friends_candidats = [
            author_id[0] for author_id in followers_or_friends_candidats
        ]
        print("---crawl_followers_by_author_ids---")
        author_type = None
        are_user_ids = True
        insertion_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        crawl_users_by_author_ids_func_name = "crawl_users_by_author_ids"
        getattr(self._social_network_crawler,
                crawl_users_by_author_ids_func_name)(
                    followers_or_friends_candidats, connection_type,
                    author_type, are_user_ids, insertion_type)
        self._db.convert_temp_author_connections_to_author_connections(
            self._domain)

    def crawl_followers_by_author_ids(self, author_ids):
        print("---crawl_followers_by_author_ids---")
        author_type = None
        are_user_ids = True
        inseration_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        self._social_network_crawler.crawl_followers_by_twitter_author_ids(
            author_ids, author_type, are_user_ids, inseration_type)

    def crawl_friends_by_author_ids(self, author_ids):
        print("---crawl_friends_by_author_ids---")
        author_type = None
        are_user_ids = True
        inseration_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        self._social_network_crawler.crawl_friends_by_twitter_author_ids(
            author_ids, author_type, are_user_ids, inseration_type)

    def create_author_screen_names(self):
        screen_names = self._db.get_screen_names_for_twitter_authors_by_posts()
        return screen_names

    def fill_data_for_sources(self):
        print(
            "---complete_missing_information_for_authors_by_screen_names ---")
        logging.info(
            "---complete_missing_information_for_authors_by_screen_names ---")
        # twitter_author_screen_names = self.create_author_screen_names()
        twitter_author_screen_names = self._db.get_missing_data_twitter_screen_names(
        )
        # twitter_author_screen_names = (twitter_author.name for twitter_author in twitter_authors)
        # twitter_author_screen_names = list(twitter_author_screen_names)

        author_type = None
        are_user_ids = False
        inseration_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        # retrieve_full_data_for_missing_users
        total_twitter_users = self._social_network_crawler.handle_get_users_request(
            twitter_author_screen_names, are_user_ids, author_type,
            inseration_type)

        self._social_network_crawler.save_authors_and_connections(
            total_twitter_users, author_type, inseration_type)

        print(
            "---complete_missing_information_for_authors_by_screen_names was completed!!!!---"
        )
        logging.info(
            "---complete_missing_information_for_authors_by_screen_names was completed!!!!---"
        )
        return total_twitter_users

    def complete_missing_information_for_authors_by_ids(self):
        print("---complete_missing_information_for_authors_by_ids ---")
        logging.info("---complete_missing_information_for_authors_by_ids ---")
        # twitter_author_screen_names = self.create_author_screen_names()
        twitter_author_screen_names = self._db.get_missing_data_twitter_screen_names(
        )
        # twitter_author_screen_names = (twitter_author.name for twitter_author in twitter_authors)
        # twitter_author_screen_names = list(twitter_author_screen_names)

        author_type = None
        are_user_ids = False
        inseration_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        # retrieve_full_data_for_missing_users
        total_twitter_users = self._social_network_crawler.handle_get_users_request(
            twitter_author_screen_names, are_user_ids, author_type,
            inseration_type)
        # return self._found_twitter_users
        print(
            "---complete_missing_information_for_authors was completed!!!!---")
        logging.info(
            "---complete_missing_information_for_authors was completed!!!!---")
        return total_twitter_users

    def mark_suspended_or_not_existed_authors(self):
        suspended_authors = self._db.get_authors_for_mark_as_suspended_or_not_existed(
        )
        for suspended_author in suspended_authors:
            suspended_author.is_suspended_or_not_exists = self._window_start
            self._db.set_inseration_date(
                suspended_author, DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR)
        self._social_network_crawler.save_authors(suspended_authors)

    def mark_suspended_from_twitter(self):
        self._suspended_authors = []
        suspected_authors = self._db.get_not_suspended_authors(self._domain)
        suspected_authors_names = [author.name for author in suspected_authors]
        chunks = split_into_equal_chunks(
            suspected_authors_names, self._social_network_crawler.
            _maximal_user_ids_allowed_in_single_get_user_request)
        total_chunks = list(chunks)
        chunks = split_into_equal_chunks(
            suspected_authors_names, self._social_network_crawler.
            _maximal_user_ids_allowed_in_single_get_user_request)
        i = 1
        for chunk_of_names in chunks:
            msg = "\rChunck of author to Twitter: [{0}/{1}]".format(
                i, len(total_chunks))
            print(msg, end="")
            i += 1
            set_of_send_author_names = set(chunk_of_names)
            set_of_received_author_names = set(
                self._social_network_crawler.
                get_active_users_names_by_screen_names(chunk_of_names))
            author_names_of_suspendend_or_not_exists = set_of_send_author_names - set_of_received_author_names
            self._update_suspended_authors_by_screen_names(
                author_names_of_suspendend_or_not_exists)
        self._db.add_authors(self._suspended_authors)

    def _update_suspended_authors_by_screen_names(
            self, author_names_of_suspendend_or_not_exists):
        for author_name in author_names_of_suspendend_or_not_exists:
            user_guid = compute_author_guid_by_author_name(
                author_name).replace("-", "")
            suspended_author = self._db.get_author_by_author_guid(user_guid)

            suspended_author.is_suspended_or_not_exists = self._window_start
            suspended_author.author_type = Author_Type.BAD_ACTOR
            self._db.set_inseration_date(
                suspended_author, DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR)
            self._suspended_authors.append(suspended_author)

            num_of_suspended_authors = len(self._suspended_authors)
            if num_of_suspended_authors == self._max_users_without_saving:
                self._db.add_authors(self._suspended_authors)
                self._suspended_authors = []

    def fill_tweet_retweet_connection(self):
        '''
        Fetches the original tweets being retweeted by our posts.
        Updates the followig tables:
         * Post_Citations table with tweet-retweet connection
         * Posts table with missing tweets
         * Authors with the authors of the missing tweets
        '''
        retweets_with_no_tweet_citation = self._db.get_retweets_with_no_tweet_citation(
        )
        logging.info(
            "Updating tweet-retweet connection of {0} retweets".format(
                len(retweets_with_no_tweet_citation)))
        self._posts = []
        self._authors = []
        self._post_citatsions = []
        i = 1
        for post_guid, post_url in retweets_with_no_tweet_citation.iteritems():
            # logging.info("Analyzing retweet: {0} - {1}".format(post_guid, post_url))
            msg = "\r Analyzing retweet: {0} - {1} [{2}".format(
                post_guid, post_url, i) + "/" + str(
                    len(retweets_with_no_tweet_citation)) + '] '
            print(msg, end="")
            i += 1
            tweet_data = self.extract_retweet_data(retweet_guid=post_guid,
                                                   retweet_url=post_url)
            if tweet_data is not None:

                if not self._db.isPostExist(tweet_data.tweet_url):
                    post = Post(guid=tweet_data.tweet_guid,
                                post_id=tweet_data.tweet_guid,
                                url=tweet_data.tweet_url,
                                date=str_to_date(tweet_data.tweet_date),
                                title=tweet_data.tweet_content,
                                content=tweet_data.tweet_content,
                                post_osn_id=tweet_data.tweet_twitter_id,
                                retweet_count=tweet_data.tweet_retweet_count,
                                favorite_count=tweet_data.tweet_favorite_count,
                                author=tweet_data.tweet_author_name,
                                author_guid=tweet_data.tweet_author_guid,
                                domain=self._domain,
                                original_tweet_importer_insertion_date=unicode(
                                    get_current_time_as_string()))
                    self._posts.append(post)

                if not self._db.is_author_exists(tweet_data.tweet_author_guid,
                                                 self._domain):
                    author = Author(
                        name=tweet_data.tweet_author_name,
                        domain=self._domain,
                        author_guid=tweet_data.tweet_author_guid,
                        original_tweet_importer_insertion_date=unicode(
                            get_current_time_as_string()))
                    self._authors.append(author)

                if not self._db.is_post_citation_exist(tweet_data.retweet_guid,
                                                       tweet_data.tweet_guid):
                    post_citation = Post_citation(
                        post_id_from=tweet_data.retweet_guid,
                        post_id_to=tweet_data.tweet_guid,
                        url_from=tweet_data.retweet_url,
                        url_to=tweet_data.tweet_url)
                    self._post_citatsions.append(post_citation)

        self.update_tables_with_tweet_retweet_data(self._posts, self._authors,
                                                   self._post_citatsions)

    def extract_retweet_data(self, retweet_guid, retweet_url):
        '''
        :param retweet_guid: the guid of the retweet
        :param retweet_url: the url of the retweet
        :return: a RetweetData holding the data of the retweet
        '''
        try:
            retweet_id = self.extract_tweet_id(retweet_url)
            if retweet_id is None:
                return None

            retweet_status = self._social_network_crawler.get_status_by_twitter_status_id(
                retweet_id)
            tweet_status_dict = retweet_status.AsDict()
            if 'retweeted_status' in tweet_status_dict:
                tweet_status_dict = tweet_status_dict['retweeted_status']
                tweet_post_twitter_id = unicode(str(tweet_status_dict['id']))
                tweet_author_name = unicode(
                    tweet_status_dict['user']['screen_name'])
                tweet_url = unicode(
                    generate_tweet_url(tweet_post_twitter_id,
                                       tweet_author_name))
                tweet_creation_time = unicode(tweet_status_dict['created_at'])
                tweet_str_publication_date = unicode(
                    extract_tweet_publiction_date(tweet_creation_time))
                tweet_guid = unicode(
                    compute_post_guid(
                        post_url=tweet_url,
                        author_name=tweet_author_name,
                        str_publication_date=tweet_str_publication_date))
                tweet_author_guid = unicode(
                    compute_author_guid_by_author_name(tweet_author_name))
                tweet_author_guid = unicode(tweet_author_guid.replace("-", ""))
                tweet_content = unicode(tweet_status_dict['text'])
                tweet_retweet_count = unicode(
                    tweet_status_dict['retweet_count'])
                tweet_favorite_count = unicode(
                    tweet_status_dict['favorite_count'])

                retweet_data = RetweetData(
                    retweet_guid=retweet_guid,
                    retweet_url=retweet_url,
                    tweet_guid=tweet_guid,
                    tweet_url=tweet_url,
                    tweet_author_name=tweet_author_name,
                    tweet_author_guid=tweet_author_guid,
                    tweet_date=tweet_str_publication_date,
                    tweet_content=tweet_content,
                    tweet_twitter_id=tweet_post_twitter_id,
                    tweet_retweet_count=tweet_retweet_count,
                    tweet_favorite_count=tweet_favorite_count)
                return retweet_data
            else:
                return None

        except TwitterError as e:
            exception_response = e[0][0]
            logging.info("e.massage =" + exception_response["message"])
            code = exception_response["code"]
            logging.info("e.code =" + str(exception_response["code"]))

            self.update_tables_with_tweet_retweet_data(self._posts,
                                                       self._authors,
                                                       self._post_citatsions)
            self._posts = []
            self._authors = []
            self._post_citatsions = []

            if code == 88:
                sec = self._social_network_crawler.get_sleep_time_for_twitter_status_id(
                )
                logging.info("Seconds to wait from catched crush is: " +
                             str(sec))
                if sec != 0:
                    count_down_time(sec)
                    self._num_of_twitter_status_id_requests = 0
                return self._social_network_crawler.get_status(retweet_id)

        except Exception as e:
            logging.error(
                "Cannot fetch data for retweet: {0}. Error message: {1}".
                format(retweet_url, e.message))
            return None

    def extract_tweet_id(self, post_url):
        post_url = str(post_url)
        pattern = re.compile("http(.*)://twitter.com/(.*)/statuses/(.*)")
        extracted_info = pattern.findall(post_url)
        if extracted_info == []:
            pattern = re.compile("http(.*)://twitter.com/(.*)/status/(.*)")
            extracted_info = pattern.findall(post_url)
        if len(extracted_info[0]) < 2:
            return None
        else:
            return extracted_info[0][2]

    def update_tables_with_tweet_retweet_data(self, posts, authors,
                                              post_citatsions):
        self._db.addPosts(posts)
        self._db.add_authors(authors)
        self._db.addReferences(post_citatsions)

    def fill_authors_time_line(self):
        '''
        Fetches the posts for the authors that are given under authors_twitter_ids_for_timeline_filling in the config file +
        update the db
        '''
        self._db.create_authors_index()
        self._db.create_posts_index()
        author_screen_names_number_of_posts = self._db.get_author_screen_names_and_number_of_posts(
            self._minimal_num_of_posts)
        author_screen_names_number_of_posts_dict = self._create_author_screen_name_number_of_posts_dictionary(
            author_screen_names_number_of_posts)
        index = 1
        for author_name in author_screen_names_number_of_posts_dict:
            print("Get timeline for {0} : {1}/{2}".format(
                author_name, str(index),
                str(len(author_screen_names_number_of_posts_dict))))
            index += 1
            posts = []
            logging.info("Fetching timeline for author: " + str(author_name))
            posts_counter = 0
            try:
                posts_needed_from_osn = self._minimal_num_of_posts - author_screen_names_number_of_posts_dict[
                    author_name]
                timeline = self._social_network_crawler.get_timeline_by_author_name(
                    author_name, posts_needed_from_osn)
                # logging.info("Retrived timeline lenght: " + str(len(timeline)))
                if timeline is not None:
                    for post in timeline:
                        tweet_post_twitter_id = str(post.id)
                        tweet_url = generate_tweet_url(tweet_post_twitter_id,
                                                       author_name)
                        tweet_creation_time = post.created_at
                        tweet_str_publication_date = extract_tweet_publiction_date(
                            tweet_creation_time)
                        tweet_guid = compute_post_guid(
                            post_url=tweet_url,
                            author_name=author_name,
                            str_publication_date=tweet_str_publication_date)
                        if self._db.contains_post(tweet_url):
                            continue
                        posts_counter = posts_counter + 1
                        tweet_author_guid = compute_author_guid_by_author_name(
                            author_name)
                        post = self._db.create_post_from_tweet_data(
                            post, self._domain)
                        posts.append(post)
            except Exception as e:
                logging.error(
                    "Cannot fetch data for author: {0}. Error message: {1}".
                    format(author_name, e.message))
            logging.info("Number of posts inserted for author {0}: {1}".format(
                author_name, posts_counter))
            self._db.addPosts(posts)

    def assign_manually_labeled_authors(self):
        self._db.assign_manually_labeled_authors()

    def delete_acquired_authors(self):
        self._db.delete_acquired_authors()
        self._db.delete_posts_with_missing_authors()

    def delete_manually_labeled_authors(self):
        self._db.delete_manually_labeled_authors()
        self._db.delete_posts_with_missing_authors()

    def assign_acquired_and_crowd_turfer_profiles(self):
        self._db.assign_crowdturfer_profiles()
        self._db.assign_acquired_profiles()

    def _create_author_screen_name_number_of_posts_dictionary(
            self, author_screen_names_number_of_posts):
        author_screen_names_number_of_posts_dict = {}
        for record in author_screen_names_number_of_posts:
            author_screen_name = record[0]
            num_of_posts = record[1]
            author_screen_names_number_of_posts_dict[
                author_screen_name] = num_of_posts
        logging.info("Number of users to retrieve timelines: " +
                     str(len(author_screen_names_number_of_posts_dict)))
        return author_screen_names_number_of_posts_dict
Beispiel #3
0
class Generic_Twitter_Crawler(object):
    def __init__(self, db):
        # AbstractController.__init__(self, db)
        self._db = db
        self._twitter_rest_api = Twitter_Rest_Api(db)
        self._config_parser = getConfig()
        self._domain = unicode(self._config_parser.get("DEFAULT", "domain"))

    def retrieve_and_save_data_from_twitter_by_terms(self, terms):
        posts, total_twitter_users = self.get_posts_and_authors_by_terms(terms)
        self._db.addPosts(posts)
        self._add_users_to_db(total_twitter_users)

    def retrive_and_save_data_from_twitter_by_post_id(self, post_id, label):
        post, user = self.get_post_and_author_by_post_id(post_id)
        try:
            converted_post = self._db.create_post_from_tweet_data(
                post, self._domain)
            converted_post.post_type = label
            self._db.addPost(converted_post)
            self._add_users_to_db([user])
            self._db.commit()

        except TwitterError as e:
            exception_response = e[0][0]
            logging.info("e.massage =" + exception_response["message"])
            code = exception_response["code"]
            logging.info("e.code =" + str(exception_response["code"]))

            if code == 88:
                sec = self._twitter_rest_api.get_sleep_time_for_twitter_status_id(
                )
                logging.info("Seconds to wait from catched crush is: " +
                             str(sec))
                if sec != 0:
                    commons.count_down_time(sec)
                    self._num_of_twitter_status_id_requests = 0
                return self.retrive_and_save_data_from_twitter_by_post_id(
                    post_id, label)

    def get_posts_and_authors_by_terms(self, terms):
        term_posts_dictionary = self.get_posts_by_terms(terms)
        total_twitter_users = []
        posts = []
        for term in term_posts_dictionary:
            posts += [
                self._db.create_post_from_tweet_data(tweet, self._domain)
                for tweet in term_posts_dictionary[term]
            ]
            total_twitter_users += [
                post.user for post in term_posts_dictionary[term]
            ]
        return posts, total_twitter_users

    def get_post_and_author_by_post_id(self, post_id):
        post = self._twitter_rest_api.get_post_by_post_id(post_id)
        user = post.user
        return post, user

    def _add_users_to_db(self, total_twitter_users):
        author_type = None
        insertion_type = None
        self._twitter_rest_api.save_authors_and_connections(
            total_twitter_users, author_type, insertion_type)

    def get_posts_by_terms(self, terms):
        return self._twitter_rest_api.get_posts_by_terms(terms)
Beispiel #4
0
class PostManager(Method_Executor):
    def __init__(self, db):
        Method_Executor.__init__(self, db)
        self._twitter_api = TwitterApiRequester()
        self._social_network_crawler = Twitter_Rest_Api(db)
        self._influence_strategy = self._config_parser.eval(self.__class__.__name__, "post_strategy")
        self._source_group = self._config_parser.eval(self.__class__.__name__, "source_group")
        self._target_group = self._config_parser.eval(self.__class__.__name__, "target_group")
        self._user_id = self._config_parser.eval(self.__class__.__name__, "user_id")
        self._number_of_posts = self._config_parser.eval(self.__class__.__name__, "number_of_posts")
        self._retweet_precent = self._config_parser.eval(self.__class__.__name__, "retweet_precent")
        self._related_hashtags = self._config_parser.eval(self.__class__.__name__, "related_hashtags")
        self._posts_num = self._config_parser.eval(self.__class__.__name__, "posts_num")

    def _publish_post(self, post, message, media):
        self._twitter_api = TwitterApiRequester()
        statuses = self._twitter_api.api.PostUpdate(message, media)
        activity = self._db.create_activity(self._user_id, post.post_osn_id, statuses.id, 'twitter_post', 'twitter',
                                            message, datetime.datetime.utcnow(), "twitter")
        return activity

    def _retweet_post(self, post):
        self._twitter_api = TwitterApiRequester()
        statuses = self._twitter_api.api.PostRetweet(post.post_osn_id, trim_user=False)
        activity = self._db.create_activity(self._user_id, post.post_osn_id, statuses.id, 'twitter_retweet', 'twitter',
                                            post.content, datetime.datetime.utcnow(), "twitter")
        return activity

    def _get_posts(self):

        team_guid = self._db.get_author_guid_by_screen_name(self._source_group)
        team_posts = []
        if (len(team_guid) == 1):
            team_posts = self._db.get_posts_by_author_guid(team_guid[0])
        else:
            for i in team_guid:
                team_posts.append(self._db.get_posts_by_author_guid(i))
            team_posts = [sublist for item in team_posts for sublist in item]

        return self._split_into_retweet_and_tweet(team_posts)

    def _split_into_retweet_and_tweet(self,team_posts):
        team_posts_without_retweet = []
        team_posts_with_retweet = []
        for post in team_posts:
            prefix = str(post.content[0:2])
            if prefix != "RT":
                team_posts_without_retweet.append(post)
            else:
                team_posts_with_retweet.append(post)
        return team_posts_without_retweet, team_posts_with_retweet

    def influence_strategy_sort(self, team_posts_without_retweet, team_posts_with_retweet):
        if self._influence_strategy == "last":
            team_posts_without_retweet.sort(key=lambda x: x.date, reverse=True)
            team_posts_with_retweet.sort(key=lambda x: x.date, reverse=True)

        if self._influence_strategy == "popular":
            team_posts_without_retweet.sort(key=lambda x: x.favorite_count, reverse=True)
            team_posts_with_retweet.sort(key=lambda x: x.favorite_count, reverse=True)

        return team_posts_without_retweet, team_posts_with_retweet

    def _create_tweet_content(self, post):

        tweet_length = 270
        message = post.content
        media = post.media_path
        message = message + '\n' + "@" + self._target_group + " #" + self._target_group + " " + str(
            datetime.datetime.utcnow())
        for i in self._related_hashtags:
            if (len(message + " " + i) > tweet_length):
                break
            else:
                message = message + " " + i
        if (len(message) > tweet_length):
            message = message[0:tweet_length]

        return message, media

    def _post_func(self, post, team_posts_without_retweet):
        try:
            message, media = self._create_tweet_content(post)
            activity = self._publish_post(post, message, media)
            flag = 'True'
            print("date: " + str(datetime.datetime.utcnow()) + " post number: " + str(
                self._posts_num) + " succeed to send a tweet")
            self._db.addPosts([activity])
            return str(flag), team_posts_without_retweet
        except Exception as e:
            print("Failed  {}".format(e))
            flag = False
            if (len(team_posts_without_retweet) >= 1):
                del team_posts_without_retweet[0]
            return str(flag), team_posts_without_retweet

    def _retweet_func(self, post, team_posts_with_retweet):
        try:
            activity = self._retweet_post(post)
            flag = 'True'
            print("date: " + str(datetime.datetime.utcnow()) + " post number: " + str(
                self._posts_num) + " succeed to send a retweet")
            self._db.addPosts([activity])
            return str(flag), team_posts_with_retweet
        except Exception as e:
            print("Failed  {}".format(e))
            flag = False
            if (len(team_posts_with_retweet) >= 1):
                del team_posts_with_retweet[0]
            return str(flag), team_posts_with_retweet

    def _execute_post_process(self, team_posts_without_retweet, team_posts_with_retweet):

        if (self._number_of_posts <= self._posts_num):
            return
        else:
            team_posts_without_retweet, team_posts_with_retweet = self.influence_strategy_sort(
                team_posts_without_retweet, team_posts_with_retweet)

            flag = 'False'
            self._posts_num = self._posts_num + 1
            coin = random.uniform(0, 1)

            while flag == 'False':
                if (coin >= self._retweet_precent):
                    post, team_posts_without_retweet, team_posts_with_retweet = self._selecting_post(
                        team_posts_without_retweet, team_posts_with_retweet, "post")
                else:
                    post, team_posts_without_retweet, team_posts_with_retweet = self._selecting_post(
                        team_posts_without_retweet, team_posts_with_retweet, "retweet")

                if (coin >= self._retweet_precent):
                    flag, team_posts_without_retweet = self._post_func(post, team_posts_without_retweet)

                else:
                    flag,team_posts_with_retweet = self._retweet_func(post, team_posts_with_retweet)


    def _selecting_post(self, team_posts_without_retweet, team_posts_with_retweet, type):

        post_exist = True
        while post_exist == True:
            if type == "post":
                ans = team_posts_without_retweet[0]
                message = ans.content
                while "@" + self._target_group in message:
                    if (len(team_posts_without_retweet) >= 1):
                        del team_posts_without_retweet[0]
                        ans = team_posts_without_retweet[0]
                        message = ans.content
                    else:
                        print("End of tweets")
            else:
                ans = team_posts_with_retweet[0]

            post_exist = self._db.check_if_post_sent(ans, self._user_id)
            if (post_exist == True):
                if type == "post":
                    if (len(team_posts_without_retweet) >= 1):
                        del team_posts_without_retweet[0]
                    else:
                        print("End of tweets")
                else:
                    if (len(team_posts_with_retweet) >= 1):
                        del team_posts_with_retweet[0]
                    else:
                        print("End of tweets")

        return ans, team_posts_without_retweet, team_posts_with_retweet

    def time_schedule(self):

        hours_in_a_day = 24 * 60
        minute_window = float(hours_in_a_day) / self._number_of_posts

        while True:
            self._posts_num = 0
            self._convert_timeline_tweets_to_posts_for_author_screen_names(self._source_group)
            without_retweet, with_retweet = self._get_posts()
            self._execute_post_process(without_retweet, with_retweet)

            schedule.every(minute_window).minutes.do(self._execute_post_process, without_retweet, with_retweet)

            while True:
                if (self._number_of_posts <= self._posts_num):
                    break
                schedule.run_pending()
                time.sleep(1)

    def calculate_posts_stat(self):

        author_guid = "0927dc1a-8bcb-3488-99ed-7a962aee56e2"
        date = "2020-03-04 03:28:20"

        ids = self._db.source_destination()
        author_posts = self._db.posts_statics_from_date(author_guid, date)
        author_posts_guid = self._db.posts_statics_guids(author_guid, date)
        author_posts_guid = [ids[i] for i in author_posts_guid]
        influencers_posts = self._db.posts_statics_from_date_for_specific_posts(author_posts_guid)

        df1 = pd.DataFrame(author_posts,
                           columns=['author_guid', 'post_count', 'retweet_sum', 'favorite_sum', 'retweet_avg',
                                    'favorite_avg'])
        df2 = pd.DataFrame(influencers_posts,
                           columns=['author_guid', 'post_count', 'retweet_sum', 'favorite_sum', 'retweet_avg',
                                    'favorite_avg'])
        frames = [df1, df2]
        result = pd.concat(frames)
        result.to_csv(author_guid + ".csv")

    def _convert_timeline_tweets_to_posts_for_author_screen_names(self, author_screen_names):
        posts = []
        for i, account_screen_name in enumerate(author_screen_names):
            try:

                timeline_tweets = self._social_network_crawler.get_timeline(account_screen_name, 3200)
                if timeline_tweets is not None:
                    print("\rSearching timeline tweets for author_guid: {0} {1}/{2} retrieved:{3}".format(
                        account_screen_name, i,
                        len(author_screen_names), len(timeline_tweets)),
                          end='')
                    for timeline_tweet in timeline_tweets:
                        post = self._db.create_post_from_tweet_data_api(timeline_tweet, self._domain)
                        posts.append(post)
            except requests.exceptions.ConnectionError as errc:
                x = 3


            except TwitterError as e:
                if e.message == "Not authorized.":
                    logging.info("Not authorized for user id: {0}".format(account_screen_name))
                    continue

        self._db.addPosts(posts)
        self.fill_data_for_sources()

    def fill_author_guid_to_posts(self):
        posts = self._db.get_all_posts()
        num_of_posts = len(posts)
        for i, post in enumerate(posts):
            msg = "\rPosts to fill: [{0}/{1}]".format(i, num_of_posts)
            print(msg, end="")
            post.author_guid = compute_author_guid_by_author_name(post.author)
        self._db.addPosts(posts)
        self._db.insert_or_update_authors_from_posts(self._domain, {}, {})

    def fill_data_for_sources(self):
        print("---complete_missing_information_for_authors_by_screen_names ---")

        twitter_author_screen_names = self._db.get_missing_data_twitter_screen_names_by_posts()
        author_type = None
        are_user_ids = False
        inseration_type = DB_Insertion_Type.MISSING_DATA_COMPLEMENTOR
        # retrieve_full_data_for_missing_users
        i = 1
        for author_screen_names in self._split_into_equal_chunks(twitter_author_screen_names, 10000):
            twitter_users = self._social_network_crawler.handle_get_users_request(
                author_screen_names, are_user_ids, author_type, inseration_type)

            print('retrieve authors {}/{}'.format(i * 10000,
                                                  len(twitter_author_screen_names)))
            i += 1
            self._social_network_crawler.save_authors_and_connections(twitter_users, author_type, inseration_type)

        self.fill_author_guid_to_posts()

        print("---complete_missing_information_for_authors_by_screen_names was completed!!!!---")
        #logging.info("---complete_missing_information_for_authors_by_screen_names was completed!!!!---")

    def _split_into_equal_chunks(self,elements, num_of_chunks):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(elements), num_of_chunks):
            yield elements[i:i + num_of_chunks]