Esempio n. 1
0
def generate_status(tweet_list):
    '''(list) -> str,
    returns markov-generated status'''
    tweet_text = ' '.join(tweet_list)
    try:
        mc = MarkovGenerator(tweet_text, 90, tokenize_fun=twitter_tokenize)
        status = mc.generate_words().lower()
        return status
    except ValueError as e:
        print e
        pass
Esempio n. 2
0
    def test_train(self):
        generator = MarkovGenerator(
            MarkovGenerator.TrainStrategy(database=self.db,
                                          text_path='test_texts/test.txt',
                                          window_size=2))

        manual_model = {
            'end бои': {
                'у': 1
            },
            'бои у': {
                'сопоцкина': 1
            },
            'у сопоцкина': {
                'и': 1
            },
            'сопоцкина и': {
                'друскеник': 1
            },
            'и друскеник': {
                'закончились': 1
            },
            'друскеник закончились': {
                'отступлением': 1
            },
            'закончились отступлением': {
                'германцев': 1
            },
            'отступлением германцев': {
                '.': 1
            },
            'германцев .': {
                'end': 1
            },
        }

        model = dict()
        tokenizer = generator.strategy.tokenizer
        for line in self.db['model'].find():
            line_key = ' '.join(
                [tokenizer.idx2word(idx) for idx in line['key'].split()])
            line_value = {
                tokenizer.idx2word(key): value
                for key, value in line['value'].items()
            }
            model[line_key] = line_value

        self.assertEqual(manual_model, model)
Esempio n. 3
0
def generate_random_content():
    global contents
    global titles

    def random_content():
        return ".\n\n".join([(mg.say().strip()) for _i in range(4)])

    start = time.time()
    print("Generating random content...", end="")
    sys.stdout.flush()

    mg = MarkovGenerator(2)
    txt = file('/rinjani/var/data/milton-paradise.txt').read()
    txt = re.sub('["*]*-', '', txt)
    mg.learn(txt)
    contents = [random_content() for _i in range(200)]
    titles = [
        re.sub(r'[^\w\s]+', '', truncate_words(mg.say().strip(), 10))
        for _i in range(100)
    ]

    print(" finished in %ds." % (time.time() - start))
    sys.stdout.flush()
Esempio n. 4
0
import random
from markov import MarkovGenerator
from markov_by_char import CharacterMarkovGenerator

# word MarkovGenerator
generator = MarkovGenerator(n=2, max=500)

# character MarkovGenerator
#generator = CharacterMarkovGenerator(n=3, max=100)

for line in open('white-album.txt'):
  line = line.strip()
  generator.feed(line)
  generator.feed(line)
  
for line in open('black-album.txt'):
  line = line.strip()
  generator.feed(line)

for i in range(3):
    print generator.generate()
Esempio n. 5
0
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from markov import MarkovGenerator

file = open("input/" + sys.argv[1])
lines = file.readlines()

generator = MarkovGenerator(n=2, max=3000)
for line in lines:
    generator.feed(line)
text = generator.generate()
print text