def get_users_data(user_name1, user_name2):
    user1 = engine.get_user_info(user_name1)
    user2 = engine.get_user_info(user_name2)

    db.save_user(user1.serialise())
    db.save_user(user2.serialise())

    timeline1 = tools.flush(user1.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10]
    timeline2 = tools.flush(user2.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10]
    print len(timeline1)
    print len(timeline2)
    mc1 = markov_chain_machine.create_model(timeline1, user_name1, boost)
    mc2 = markov_chain_machine.create_model(timeline2, user_name2, boost)

    return mc1, mc2
    def _get_data(self, t_user):
        """
        forming user in our model
        returning m_user object
        """
        try:
            result = m_user(tools.imply_dog(t_user.screen_name, with_dog=True))
            if t_user.protected:
                log.debug('user %s is protected... skip him' % t_user.screen_name)
                return None

            result.real_name = t_user.name
            lists = t_user.lists()
            self._count_requests += 1
            log.debug("get lists +1")
            result.set_lists(tools.flush(lists, lambda x: x.name), len(lists))

            result.followers_count = t_user.followers_count
            result.friends_count = t_user.friends_count

            result.favorites_count = t_user.favourites_count
            result.timeline = self._get_time_line(t_user)
            result.timeline_count = t_user.statuses_count
            result.inited_ = t_user.created_at.strftime(props.time_format)
            return result

        except tweepy.TweepError as e:
            if 'Rate limit exceeded' in str(e):
                log.info('oook wil be sleep...')
                time.sleep(360)
                return self._get_data(t_user)
def create_model_main(users, model_id, is_normalise=True):
    """
    creating markov chain model for users text
    """
    mc = markov_chain(model_id, booster)
    for m_user in users:
        timeline_text = tools.flush(m_user.timeline, lambda x:x['text'])
        for timeline_text_el in timeline_text:
            message = get_words(timeline_text_el, is_normalise=is_normalise)
            mc.add_message(message)
    mc.save()
    return mc
def create_model(user, is_normalise=True, mc=None):
    """
    creating model for one user
    """
    if not mc:
        mc = markov_chain(user.name_, booster)

    timeline_text = tools.flush(user.timeline, lambda x:x['text'])

    for tt_el in timeline_text:
        mc.add_message(get_words(tt_el), is_normalise)

    mc.save()
    return mc
    def get_user_info(self, start_user):
        """
        input is user tweepy object
        evaluating statistic of tweets timeline (perls, text, hashtags, etc)
        return result: user obj in my model, followers: list of tweepy model users, friends: like followers
        """
        t_user = None
        try:
            start_user_obj = self._prepare_user_t_object(start_user)
            if not start_user_obj:
                log.warn("start user is none")
                return None
            t_user = start_user_obj
            log.info('getting user info for user: %s' % '@' + t_user.screen_name)
            #forming user data
            user = self._get_data(t_user)
            if not user:
                log.warn('when getting data user is none')
                return None

            log.debug('creating statistic of user perls and hash_tags')
            #with processing by tools flushing text from timeline
            hashtags_urls_mentions = functions.get_hash_tags_urls_mentions(
                tools.flush(user.timeline, lambda x: x['text']))
            #appending timeline and also forming mention relations
            user.set_timeline_info(hashtags_urls_mentions)

            log.debug('retrieving relations (friends,followers)')
            relation_object = self._get_user_relations(t_user)
            user.set_relations(relation_object)

            return user

        except Exception as e:
            log.exception(e)
            log.info("counts of request is: %s" % self._count_requests)
            log.warn('error in info for user...\n%s' % '\n' + '\n'.join(t_user.__dict__.items()))

            if isinstance(e, tweepy.TweepError) and 'Rate limit exceeded' in e.message:
                log.info('oook wil be sleep...')
                time.sleep(360)
                return self.get_user_info(start_user)

            if 'Invalid / expired Token' in str(e):
                log.exception("!!!!!!!! CHANGE ACCESS TOKEN !!!!!!!!")
                raise e
def form_timeline(user_timeline):
    true_timeline = tools.flush(user_timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))
    return true_timeline
    line = f.readline()
    while line:
        if _is_message_element(line):
            if not message:
                message = {}
            element = _get_element(line)
            if element[0] == 'T':
                message['time'] = element[1]
            elif element[0] == 'U':
                user = element[1]
                message['user'] = user[user.index('twitter.com') + len('twitter.com') + 1:]
            elif element[0] == 'W':
                message['words'] = element[1]
        if message and len(message) == 3:
            if message['words'] != 'No Post Title':
                if to_what:
                    log.debug('save message > %s'%message)
                    to_what.save_message(message)
                users.add(message['user'])
                message = None
        line = f.readline()
    return users


if __name__ == '__main__':
    result = extract_messages("c:/temp/tweets2009-12.txt")
    user = set(tools.flush(result, by_what=lambda x:x['user']))



def create_statistic_of_tweets(timeline):
    timeline = tools.flush(timeline, lambda x:x['text'])
    result = __get_statistic_of_tweets(timeline)
    return result