def test_generate_model():
    messages = twitter_timeline_parser.extract_messages("c:/temp/tweets2009-12.txt", limit=100)
    log.info('\n'.join([message['words'] for message in messages]))
    log.info(len(messages))
    booster = db_mc_handler()
    mc_l = markov_chain('test',booster)
    for i  in range(len(messages)):
        message = messages[i]
        mc_l.add_message(split_to_words(message['words']))
        log.info('appending %s words: %s relations: %s' % (i, mc_l.words_count_, mc_l.relations_count_))

    log.info('................................')
def test_model_():
    booster = db_mc_handler(truncate=True)

    mc1 = markov_chain('left_test', booster,n_of_gram_=2)
    mc2 = markov_chain('right_test', booster,n_of_gram_=2)

    mc1.add_message(['a', 'b', 'c', 'd'])
    mc1.add_message(['a', 'b', 'c', 'd'])
    mc1.add_message(['a1', 'b1', 'c1', 'd1'])
    mc1.add_message(['a','a1','b','b1','c','c1','d','d1'])

    mc2.add_message(['a', 'b', 'c', 'd'])
    mc2.add_message(['a2', 'b2', 'c2', 'd2'])

    mc1.save()
    mc2.save()

    mc1.print_me()
    mc2.print_me()
import visualise.vis2d_machine as vis

__author__ = '4ikist'
__doc__ = """
Эксперимент 1.
1) Создание общей модели определенного класса людей.
2) Нахождение весов людей на основе принадлежности ленты определенного человека к общей модели.
3) Класстеризация на людей на основе разницы

"""
log = loggers.logger

main_db = db_handler()
engine = engines.tweepy_engine(out=main_db)

booster = db_mc_handler()


def get_users(filename):
    """
    forming users some from db or scrapping from ttr
    """
    result = []
    users = open(filename).readlines()
    for user in users:
        name_ = tools.imply_dog(user, with_dog=True).strip()
        log.info('find user by name "%s"' % name_)
        m_user = main_db.get_user({'name_': name_})
        if m_user:
            log.info('user found %s' % m_user.name_)
            result.append(m_user)
from analysing_data import markov_chain_machine
from analysing_data.markov_chain_machine import markov_chain
import text_proc.text_processing as tp
from analysing_data.booster import db_mc_handler
from model.db import db_handler
from search_engine.twitter_engine import tweepy_engine
from analysing_data.mc_difference_logic import diff_markov_chains
import tools

__author__ = '4ikist'

db = db_handler(host_='localhost', port_=27017, db_name_='ttr_exp')
boost = db_mc_handler()
engine = tweepy_engine(out=db)


def get_users_data(user_name1, user_name2):
    user1 = engine.get_user_info(user_name1)
    user2 = engine.get_user_info(user_name2)

    db.save_user(user1.serialise())
    db.save_user(user2.serialise())

    timeline1 = tools.flush(user1.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10]
    timeline2 = tools.flush(user2.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10]
    print len(timeline1)
    print len(timeline2)
    mc1 = markov_chain_machine.create_model(timeline1, user_name1, boost)
    mc2 = markov_chain_machine.create_model(timeline2, user_name2, boost)

    return mc1, mc2
from analysing_data.markov_chain_machine import markov_chain
import loggers
from model.db import db_handler
from search_engine import twitter_engine
from search_engine.twitter_engine import tweepy_engine
import tools
from visualise import vis_machine


__author__ = '4ikist'

db_ = db_handler(truncate=False)

api_engine = twitter_engine.tweepy_engine(out=db_)

booster = db_mc_handler(truncate=False)
vis_processor = vis_machine

log = loggers.logger

def model_splitter(message):
    message_ = message.split()
    return message_


def process_names(file_name, class_name):
    """
    get from file ser names, scrapping saving and forming markov chains for any user timeline
    """
    names = open(file_name).readlines()
    result = []