def get_users_data(user_name1, user_name2): user1 = engine.get_user_info(user_name1) user2 = engine.get_user_info(user_name2) db.save_user(user1.serialise()) db.save_user(user2.serialise()) timeline1 = tools.flush(user1.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10] timeline2 = tools.flush(user2.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10] print len(timeline1) print len(timeline2) mc1 = markov_chain_machine.create_model(timeline1, user_name1, boost) mc2 = markov_chain_machine.create_model(timeline2, user_name2, boost) return mc1, mc2
def _get_data(self, t_user): """ forming user in our model returning m_user object """ try: result = m_user(tools.imply_dog(t_user.screen_name, with_dog=True)) if t_user.protected: log.debug('user %s is protected... skip him' % t_user.screen_name) return None result.real_name = t_user.name lists = t_user.lists() self._count_requests += 1 log.debug("get lists +1") result.set_lists(tools.flush(lists, lambda x: x.name), len(lists)) result.followers_count = t_user.followers_count result.friends_count = t_user.friends_count result.favorites_count = t_user.favourites_count result.timeline = self._get_time_line(t_user) result.timeline_count = t_user.statuses_count result.inited_ = t_user.created_at.strftime(props.time_format) return result except tweepy.TweepError as e: if 'Rate limit exceeded' in str(e): log.info('oook wil be sleep...') time.sleep(360) return self._get_data(t_user)
def create_model_main(users, model_id, is_normalise=True): """ creating markov chain model for users text """ mc = markov_chain(model_id, booster) for m_user in users: timeline_text = tools.flush(m_user.timeline, lambda x:x['text']) for timeline_text_el in timeline_text: message = get_words(timeline_text_el, is_normalise=is_normalise) mc.add_message(message) mc.save() return mc
def create_model(user, is_normalise=True, mc=None): """ creating model for one user """ if not mc: mc = markov_chain(user.name_, booster) timeline_text = tools.flush(user.timeline, lambda x:x['text']) for tt_el in timeline_text: mc.add_message(get_words(tt_el), is_normalise) mc.save() return mc
def get_user_info(self, start_user): """ input is user tweepy object evaluating statistic of tweets timeline (perls, text, hashtags, etc) return result: user obj in my model, followers: list of tweepy model users, friends: like followers """ t_user = None try: start_user_obj = self._prepare_user_t_object(start_user) if not start_user_obj: log.warn("start user is none") return None t_user = start_user_obj log.info('getting user info for user: %s' % '@' + t_user.screen_name) #forming user data user = self._get_data(t_user) if not user: log.warn('when getting data user is none') return None log.debug('creating statistic of user perls and hash_tags') #with processing by tools flushing text from timeline hashtags_urls_mentions = functions.get_hash_tags_urls_mentions( tools.flush(user.timeline, lambda x: x['text'])) #appending timeline and also forming mention relations user.set_timeline_info(hashtags_urls_mentions) log.debug('retrieving relations (friends,followers)') relation_object = self._get_user_relations(t_user) user.set_relations(relation_object) return user except Exception as e: log.exception(e) log.info("counts of request is: %s" % self._count_requests) log.warn('error in info for user...\n%s' % '\n' + '\n'.join(t_user.__dict__.items())) if isinstance(e, tweepy.TweepError) and 'Rate limit exceeded' in e.message: log.info('oook wil be sleep...') time.sleep(360) return self.get_user_info(start_user) if 'Invalid / expired Token' in str(e): log.exception("!!!!!!!! CHANGE ACCESS TOKEN !!!!!!!!") raise e
def form_timeline(user_timeline): true_timeline = tools.flush(user_timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True)) return true_timeline
line = f.readline() while line: if _is_message_element(line): if not message: message = {} element = _get_element(line) if element[0] == 'T': message['time'] = element[1] elif element[0] == 'U': user = element[1] message['user'] = user[user.index('twitter.com') + len('twitter.com') + 1:] elif element[0] == 'W': message['words'] = element[1] if message and len(message) == 3: if message['words'] != 'No Post Title': if to_what: log.debug('save message > %s'%message) to_what.save_message(message) users.add(message['user']) message = None line = f.readline() return users if __name__ == '__main__': result = extract_messages("c:/temp/tweets2009-12.txt") user = set(tools.flush(result, by_what=lambda x:x['user']))
def create_statistic_of_tweets(timeline): timeline = tools.flush(timeline, lambda x:x['text']) result = __get_statistic_of_tweets(timeline) return result