Example #1
0
class MalayalamMarkov:
    def __init__(self, input_db=None, output_db=None):
        malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR)
        malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE)
        if input_db:
            storage = SqliteStorage(db=input_db)
        elif output_db:
            storage = SqliteStorage(db=output_db)
        self.markov = MarkovText(scanner=malayalam_scanner,
                                 formatter=malayalam_formatter,
                                 storage=storage)

    def add_text(self, text):
        if text:
            self.markov.data(text)

    def predict(self, start, words, count):
        results = []
        for i in range(count):
            results.append(
                self.markov(max_length=words,
                            reply_to=start,
                            reply_mode=ReplyMode.END))
        return results

    def save(self):
        self.markov.save()

    def from_db(self, db_filename):
        storage = SqliteStorage(db=db_filename)
        self.markov = MarkovText.from_storage(storage)
Example #2
0
async def markov(ctx, id):
    url = "https://www.fimfiction.net/story/download/" + id + "/txt"
    markovgenerate = MarkovText()
    print('Getting story with id {0}...'.format(id))
    markovgenerate.data(requests.get(url).content.decode(encoding="UTF-8"))
    print('Story received.')
    response = markovgenerate(max_length=5000)
    print('Markov chain response generated.')
    await ctx.send(response)
Example #3
0
 def __init__(self, input_db=None, output_db=None):
     malayalam_scanner = RegExpScanner(expr=MALAYALAM_EXPR)
     malayalam_formatter = Formatter(replace=MALAYALAM_REPLACE)
     if input_db:
         storage = SqliteStorage(db=input_db)
     elif output_db:
         storage = SqliteStorage(db=output_db)
     self.markov = MarkovText(scanner=malayalam_scanner,
                              formatter=malayalam_formatter,
                              storage=storage)
Example #4
0
    async def markovgen(self, ctx):
        randomized_int = random.randint(1, 602)
        async with aiofiles.open(f"markov/markov ({randomized_int}).txt") as f:
            text = MarkovText()
            async for line in f:
                text.data(line, part=True)

        clean = await commands.clean_content(fix_channel_mentions=True
                                             ).convert(ctx, text())
        await ctx.send(clean)
Example #5
0
def test_markov_text_generate(mocker, ss, data, args, res):
    fmt = mocker.patch('markovchain.text.MarkovText.format', wraps=list)
    markov = MarkovText(parser=Parser(state_sizes=[ss]),
                        scanner=Scanner(lambda x: x),
                        storage=JsonStorage(backward=True))
    markov.data(data)
    if isinstance(res, type):
        with pytest.raises(res):
            markov(*args)
    else:
        assert markov(*args) == res
        assert fmt.call_count == 1
def markovchain_example():
    markov = MarkovText()

    with open('word_generation/definitions.txt') as fp:
        for line in fp:
            markov.data(line, part=True)
    markov.data('', part=False)

    print(markov(max_length=16) + '\n')
    print(markov(max_length=16, reply_to='sentence start',
                 reply_mode=ReplyMode.END) + '\n')
Example #7
0
def main():
    conf, kwargs = get_config()
    chat_logger = logging.getLogger('chat')
    media_logger = logging.getLogger('media')
    loop = asyncio.get_event_loop()
    configure_logger(chat_logger,
                     log_file=conf.get('chat_log_file', None),
                     log_format='%(message)s',
                     log_level=logging.INFO)
    configure_logger(media_logger,
                     log_file=conf.get('media_log_file', None),
                     log_format='[%(asctime).19s] %(message)s',
                     log_level=logging.INFO)
    markov = MarkovText.from_file(conf['markov'], storage=SqliteStorage)
    bot = MarkovBot(markov,
                    chat_logger,
                    media_logger,
                    order=conf.get('order', None),
                    learn=conf.get('learn', False),
                    trigger=conf.get('trigger', None),
                    loop=loop,
                    **kwargs)
    shell = Shell(conf.get('shell', None), bot, loop=loop)
    try:
        task = loop.create_task(bot.run())
        if shell.task is not None:
            task_ = asyncio.gather(task, shell.task)
        else:
            task_ = task
        loop.run_until_complete(task_)
    except (CytubeError, SocketIOError) as ex:
        print(repr(ex), file=sys.stderr)
    except KeyboardInterrupt:
        return 0
    finally:
        task_.cancel()
        task.cancel()
        shell.close()
        loop.run_until_complete(task)
        if shell.task is not None:
            loop.run_until_complete(shell.task)
        markov.save()
        loop.close()

    return 1
Example #8
0
 def from_db(self, db_filename):
     storage = SqliteStorage(db=db_filename)
     self.markov = MarkovText.from_storage(storage)
Example #9
0
import finmeter, random, lwvlib, json, re
from uralicNLP import semfi
from uralicNLP import uralicApi
from nltk.tokenize import RegexpTokenizer
from collections import Counter
import numpy as np
from numpy.random import choice
from markovchain import JsonStorage
from markovchain.text import MarkovText, ReplyMode

wv = lwvlib.load("fin-word2vec-lemma.bin", 10000, 500000)
vowels = ['a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö']
markov = MarkovText.from_file('kalevala_and_others_markov.json')
fallback_markov = MarkovText.from_file('fallback_markov.json')


def count_syllables(verse):
    ''' Count syllables in a verse '''

    tokenizer = RegexpTokenizer(r'\w+')
    n_syllables = 0
    for word in tokenizer.tokenize(verse):

        try:
            n_syllables += len(finmeter.hyphenate(word).split("-"))
        except Exception as e:
            pass
            # print(e)
            # print(verse)
            # print("Error täällä: count_syllables")
Example #10
0
#Not my code, this was just to test the Markov Chain Python in-built library. Source of the setup and code goes to: http://dead-beef.tk/markovchain/
from markovchain import JsonStorage
from markovchain.text import MarkovText, ReplyMode

markov = MarkovText()

#with open('data_extracted_6.txt') as fp:
with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp:
    markov.data(fp.read())

with open('NoPrefaceEmilyDickinsonBooks12242.txt') as fp:
    for line in fp:
        markov.data(line, part=True)
markov.data('', part=False)

print(markov())
print(markov(max_length=40, reply_to='sentence start', reply_mode=ReplyMode.END))

markov.save('markov.json')

markov = MarkovText.from_file('markov.json')


#NOTE: Fix the whole Sentence Start. Shinanigan in the output - why is it even printing that out?
Example #11
0
def generate_markov(input_path):
    markov = MarkovText()
    with open(input_path) as fp:
        markov.data(fp.read())

    return markov
Example #12
0
def construct_model():
    return MarkovText(parser=Parser([3]))
Example #13
0
def test_markov_text_data(mocker):
    mock = mocker.patch('markovchain.Markov.data', return_value=1)
    markov = MarkovText()
    assert markov.data([1, 2], True) == 1
    mock.assert_called_once_with([1, 2], True)
Example #14
0
def test_markov_text_format(test, join_with):
    fmt = Mock(return_value=2)
    markov = MarkovText(formatter=fmt)
    markov.storage.state_separator = join_with
    assert markov.format(test) == 2
    fmt.assert_called_with(join_with.join(test))