def train(input_file: str):
    df = load_data(input_file)
    cur_text = ""
    link_pattern = re.compile("http\S+")
    for idx in range(len(df["text"])):
        if df["isRetweet"][idx] == "t":
            continue
        if 'http' in df["text"][idx]:
            sample = re.sub(link_pattern, "", df["text"][idx])
        else:
            sample = df["text"][idx].replace("\n", " ")

        cur_text += sample.replace("\n", " ")
    data_model = markovify.Text(cur_text, state_size=3)
    model_json = data_model.to_json()

    with open("model.json", 'w') as f:
        f.write(model_json)
Exemple #2
0
    def execute(self, text):
        # Build the model.
        if Configuration.config["activate_nlp"]:
            SpacyText.load_dict()
            text_model = SpacyText(text)
        else:
            text_model = markovify.Text(text)

        body = ""
        sentences = []
        # Create randomly-generated sentences
        for i in range(Configuration.config["sentences_to_generate"]):
            sentence = text_model.make_sentence()
            if sentence is not None and sentence not in sentences:
                body += sentence
                sentences.append(sentence)

        return body
Exemple #3
0
def get_model(corpus):
    file_id = re.findall("/(.+?)_", corpus)[0]
    model_path = "models/" + file_id + "_markov_model.json"

    if not path.exists(model_path):  # if model doesn't exist, make it
        # Get raw text as string
        with open(corpus) as f:
            text = f.read()

        # Build the model
        text_model = markovify.Text(text)
        model_json = text_model.to_json()
        with open(model_path, "wb") as o:  # store
            json.dump(model_json, o)
    else:
        with open(model_path, "rb") as f:  # retrieve
            text_model = markovify.Text.from_json(json.load(f))
    return text_model
Exemple #4
0
def random_sample_text(num_sentences=3):
    """Attempts to generate a random piece of text content."""

    text = getattr(g, 'sample_text_cached', None)
    is_decoding_needed = False

    if not text:
        url = random.choice(
            app.config['EDITABLE_SAMPLE_TEXT_SCRAPE_URLS'])

        r = requests.get(url)

        # Thanks to:
        # https://github.com/kennethreitz/requests/issues/1604 ...
        # #issuecomment-24476927
        r.encoding = 'utf-8'

        is_decoding_needed = (type(r.text).__name__ == 'unicode')
        text = (
            is_decoding_needed
            and r.text.encode('utf-8')
            or r.text)
        g.sample_text_cached = text

    # Use a Markov chain generator for random sentences based on
    # sample input (e.g. text of a book).
    # https://github.com/jsvine/markovify
    # See also:
    # http://agiliq.com/blog/2009/06/ ...
    # generating-pseudo-random-text-with-markov-chains-u/
    text_model = markovify.Text(text)

    sentences = []
    for i in range(num_sentences):
        s = text_model.make_sentence()

        if s:
            if is_decoding_needed:
                s = s.decode('utf-8')

            s = unidecode(s.strip())
            sentences.append(s)

    return '<p>{0}</p>'.format(' '.join(sentences))
Exemple #5
0
def getContent(synopsCountMax):
    # Get raw text as string.
    synops = ""
    short = ""

    with open("OUTPUT/synopsis.txt") as f:
        text = f.read()

    text_model = markovify.Text(text, state_size=3)

    while not synops:
        synops = (text_model.make_sentence())
        print "produced synops"
    synopsCount = synops.count("")

    print synops

    remaining = eval("synopsCountMax - synopsCount")

    while eval("synopsCount > synopsCountMax"):
        print "too long, shortening"
        synops = (text_model.make_sentence())
        synopsCount = synops.count("")

    remaining = eval("synopsCountMax - synopsCount")

    while eval("synopsCount < synopsCountMax"):
        short = ""
        print "making short"
        while not short:
            short = (text_model.make_short_sentence(remaining))
        print short
        shortCount = short.count("")
        if short:
            synops += str(" " + short)
            synopsCount = synops.count("")
            remaining = eval("synopsCountMax - synopsCount")
            if synopsCount > 180:
                synopsCount = eval("synopsCountMax + 1")
        else:
            synopsCount = eval("synopsCountMax + 1")

    print "--------------"
    return synops
def trainModel():
    global model
    print("Loading datas ...")
    df = pd.read_csv('messages.csv', engine='python', encoding='utf8')
    df.iloc[0] = ['Temps', 'Expediteur', 'Message']
    df.columns = df.iloc[0]
    df = df.drop(df.index[0])
    df['Message'] = df['Message'].str.lower()  # converts to lowercase

    dfPerso = df[df.Expediteur == EXPEDITEUR_NAME]
    dfPerso.dropna()

    speeches = list(dfPerso['Message'].str.split('\n', expand=True).stack())

    print("Training model ...")

    model = markovify.Text(speeches, state_size=2)

    print("Model create")
Exemple #7
0
def main():
    logging.basicConfig(level=logging.INFO)
    tag = "polityka"
    # download(tag)
    state_size = 3


    model = markovify.Text(load(tag), state_size=state_size)
    #
    # op = model.make_sentence(tries=500, max_overlap_ratio=0.5)
    # start = random.randint(0, len(op.split()) - state_size)
    # start_state = " ".join(op.split()[start:start + state_size])
    # print(start_state)
    # comment = model.make_sentence(init_state=start_state, tries=500, max_overlap_ratio=0.5)

    print(model.make_short_sentence(max_chars=1000, min_chars=100))
    a = generate_message(model)
    print(a)
    print(len(a))
Exemple #8
0
def text_to_model(tup):
    '''given an abstract, train a markov model

    the 1 will be used for weights, later'''
    _, text = tup
    try:
        # retain_original set to False to save lots of RAM
        text_model = markovify.Text(text, state_size=STATE_SIZE, \
                                    retain_original=False)

        # class is not serializable, so extract json first
        # this makes a Text type object, so we coerce to str
        model_json = str(text_model.to_json())
        # TODO: change key for category
        return _, model_json
    except:
        # TODO FIXME: many articles being lost due to illegal characters. see issue tracker.
        print("model skipped in text_to_model:", text[:50])
        pass
Exemple #9
0
def main():
    with open("prideandprejudice.txt") as f:
        text = f.read()
    text_model = markovify.Text(text)

    novel = ''
    words = 0

    while (words < 50000):
        line = text_model.make_sentence_with_start("Mr. Darcy")
        line = line.split()
        words += len(line)

        for word in line:
            novel += ''.join(word) + ' '
        novel += '\n'

    with open("mrdarcyandmrdarcy.txt", "w") as f:
        f.write(novel)
Exemple #10
0
    def parse_sentence(self, sentence):
        emotion_analysis = te.get_emotion(str(sentence))
        if emotion_analysis[self.opposite] > emotion_analysis[self.feeling]:
            # Re write it
            sentence = self.rewrite(sentence)
        elif emotion_analysis[self.feeling] > emotion_analysis[self.opposite]:
            # Take its words
            #self.words_markov = markovify.combine([self.words_markov,markovify.Text(str(sentence))],[1,1])
            self.sentences.append(str(sentence))
            self.markov_blob = markovify.Text(self.sentences)
            for word in sentence.words:
                #self.words.append(Word(word,te.get_emotion(word),self.feeling,self.opposite))
                self.words.add_word(word)

        #for word in word_bag:
        #if word == self.SCREAM:
        # power surge?

        return sentence
def get_markov_model():
  corpora_files = load_corpora()
  corpora_file_names = corpora_files.keys()
  markov_models = {key: markovify.Text(corpora_files[key], well_formed=False) for key in corpora_file_names}
  # Random weight from 0-2 with intervals of 0.1
  random_weights = random.sample([x * 0.1 for x in range(0, 20)], len(corpora_file_names))

  # Print out the weights of each text for the user 
  print("RANDOM WEIGHTS:")
  for idx, key in enumerate(corpora_file_names):
    print(key + ": " + str(random_weights[idx]))
  print("")

  # Combine Markov Chains 
  model_combo = markovify.combine(list(markov_models.values()))
  model_combo = model_combo.compile()

  print(model_combo.make_sentence())
  return model_combo
def markov(filename):
    corpus = ""

    # Get raw text as string.
    with open(str(filename)) as f:  #the filename contains the normal lyrics
        text = f.read()
        for line in text.split("\n"):
            if line != "":
                if line[-1] not in "!?.;)":
                    corpus += line + ". "

    # Build the model.
    text_model = markovify.Text(corpus)

    neural_lyrics = ""
    for i in range(len(text.split("\n"))):
        neural_lyrics += ((str(text_model.make_sentence())[:-1]))
        neural_lyrics += ("\n")
    return neural_lyrics
Exemple #13
0
def respond(text):
    text_model = markovify.Text(text.lower())
    print(text.lower())
    msg = None
    try:
        msg = text_model.make_sentence_with_start("i")
    except:
        False

    print(msg)

    if (msg == None):
        msg = text_model.make_sentence()
        print(msg)

    if (msg != None):
        return "Tbh, " + msg
    else:
        return "Tbh, I have nothing interesting to say"
def generate_markov_model(songs, model_state_size):
    """
    Generates a harmonic markov model based off of all the hooktheory songs
    given

    Args:
        songs (list of HKTObject): A list HKTObjects representing the corpus
        model_state_size (int): The state size that should be used for the model

    Returns:
        A markov model that represents the entire harmonic corpus given
    """

    markovs = []

    for song in songs:
        #For each song the harmonic progression is put into a string
        #Each chord is sperated by a space
        text = ""
        for segment in song.segments:
            #print(segment)
            for chord in segment.chordsNoRest:
                #print(chord)
                text += chord.roman_basic + " "
                #print(chord.roman_basic)

        #Weird conversion here
        text = str(text)

        #We create a seperate model for every song and put them into a list
        if text != '':
            #print("Text: "+text)
            model = markovify.Text(text, state_size=model_state_size)
            markovs.append(model)

    #Then we combine all the models in the list
    #print(markovs)

    #print(markovs)
    combo = markovify.combine(markovs)

    return combo
Exemple #15
0
def respond(interview_question):
    global index
    global transcript
    answer = ''

    # get question words
    wordlist = interview_question.split()

    # check if actually a question
    if not wordlist[-1].endswith('?') and wordlist[-1] != '?':
        return 'Please, let\'s stick to questions only.'

    # check if already asked
    if (len(transcript) >= 1):
        for i in transcript:
            if (transcript[i]['question'].lower() ==
                    interview_question.lower()):
                return interviewer + ', please. You already asked me that.'

    # check if common question
    common = is_common_question(interview_question)
    if (type(common) is str):
        return common

    # Build the model.
    text_model = markovify.Text(text)

    # generate response
    for i in range(random.randint(1, 5)):
        answer += text_model.make_sentence()

    # save question + response
    # transcript[str(index)]['answer'] = nswer
    transcript.update(
        {index: {
            'question': interview_question,
            'response': answer
        }})

    # increment index + return response
    index += 1
    return answer
Exemple #16
0
async def user_markov_response(message):
    user = message.mentions[0]
    logger.info("found user {} for bottalk command".format(user))
    sentences = u""
    random_dt = random_date(message.channel)
    async for log in message.channel.history(limit=2000, after=random_dt):
        if log.author == user:
            sentences += log.clean_content + "\n"
    if len(sentences) == 0:
        await message.channel.send("I got nothing 🤷")
        return
    try:
        text_model = markovify.Text(sentences, well_formed=False)
        s = text_model.make_short_sentence(300, tries=50)
        if not s or len(s) < 1:
            s = "My apologies, I cannot quite grasp the essence of that user."
        await message.channel.send(s)
    except Exception as e:
        logger.error("Shat self: {}".format(e))
        await message.channel.send("Sorry, I've just gone and shat myself.")
Exemple #17
0
 def generate_anek(self):
     corpus = ""
     try:
         aneks = session.query(Anek)
         for anek in aneks:
             corpus += f"{anek.text}\n"
         model = markovify.Text(corpus)
         anek = ""
         for i in range(randint(2, 6)):
             anek += model.make_short_sentence(500) + " "
         anek = anek.replace("–", "\n–")
         anek = anek.replace("- ", "\n- ")
         anek = anek.replace("—", "\n—")
         return anek
     except Exception as e:
         with open("log", "a") as log:
             log.write(
                 f'[{time.ctime()}] [DeepAneks] Generation has failed. Exception {e.__class__} caught: retrying in 30 seconds...\n'
             )
         time.sleep(30)
Exemple #18
0
def gen_markov(f=None, u=None):
    if f:
        with open(f) as f:
            print(Fore.GREEN + '[+] Parsing text file.')
            text = f.read()

    elif u:
        print(Fore.GREEN + '[+] Parsing specified URL.')
        r = requests.get(u)
        text = get_text(r.text)

    try:
        print(Fore.GREEN + '[+] Generating a model.')
        model = markovify.Text(text)
        print(Fore.GREEN + '[+] Attempting to generate a Markov chain.')
        markov_text = model.make_short_sentence(280)
        return markov_text

    except UnboundLocalError:
        print(Fore.RED + '[!] Fatal error. Aborting process.')
Exemple #19
0
def createText(nrSentences=10, nrWords=150):
    text_model2 = None

    for filename in glob.glob(os.path.join('DataSet', '*.txt')):
        with open(filename, encoding='utf8') as f:
            print(filename)
            try:
                text = f.read()
            except UnicodeDecodeError as uniDecErr:
                continue
        text_model = markovify.Text(text)
        if text_model2:
            text_model2 = markovify.combine([text_model, text_model2])
        else:
            text_model2 = text_model

    for i in range(nrSentences):
        result = text_model2.make_short_sentence(nrWords)
        print(result)
        outp.insert(INSERT, result)
Exemple #20
0
def main():
    parser = argparse.ArgumentParser(
        description='generate a markov chain model from a corpus of text')
    parser.add_argument('--corpus',
                        '-c',
                        help='corpus file path',
                        required=True)
    parser.add_argument('--output',
                        '-o',
                        help='output model path',
                        required=True)
    args = parser.parse_args()

    with open(args.corpus, 'r') as fp:
        corpus = fp.read()

    model = markovify.Text(corpus)

    with open(args.output, 'w') as fp:
        fp.write(model.to_json())
Exemple #21
0
def build_poems(scope_map: Dict[PoemType, Scope], poem_type: PoemType):
    scope = scope_map[poem_type]

    logger.info(f'Fetching text blob for "{poem_type.value}" scope.')

    response = requests.get(rabbit_text_endpoint,
                            params={
                                'from': format_date(scope.fr),
                                'until': format_date(scope.un)
                            },
                            timeout=15)

    try:
        response.raise_for_status()
    except HTTPError as e:
        logger.warning(e)
        return

    if not response.text:
        logger.warning(f'Text blob empty for "{poem_type.value}", skipping.')
        return

    logger.info(f'Generating markov chain with "{poem_type.value}" scope.')

    text_model = markovify.Text(response.text)

    for i in range(10):
        poem = Poem([text_model.make_sentence() for _ in range(5)],
                    datetime.utcnow().replace(tzinfo=timezone.utc))
        logger.debug(json.dumps(poem.json(), indent=2))

        response = requests.post(rabbit_poem_endpoint,
                                 json=poem.json(),
                                 headers={'X-Api-Key': rabbit_api_key},
                                 params={'scope': poem_type.value},
                                 timeout=15)

        try:
            response.raise_for_status()
        except HTTPError as e:
            logger.warning(e)
Exemple #22
0
def mk_trainer(bot_data="training", bot_model="bot_1"):
    mk_model_delete(bot_model)

    # This determines the line count of all the training files in the training folder.
    file_count_total = 0
    training_file_names = []
    for filename in os.listdir(f'{dataset_dir}/{bot_data}'):
        if filename.endswith(".txt"):
            training_file_names.append(filename)
            file_count_total += 1
        else:
            continue

    if not training_file_names:
        raise Exception(
            f"No files in the {dataset_dir}/{bot_data} folder, please add .txt files with line-by-line conversations"
        )

    with tqdm(total=file_count_total,
              postfix=text_color("Training MK Model", BIPur),
              leave=True,
              ascii=load_bar_mode,
              colour=load_bar_colour,
              dynamic_ncols=True) as progress_bar:
        combined_model = None
        for filename in training_file_names:
            with open(f'{dataset_dir}/{bot_data}/{filename}',
                      encoding="ISO-8859-1") as f:
                model = markovify.Text(f, retain_original=False)
                if combined_model:
                    combined_model = markovify.combine(
                        models=[combined_model, model])
                else:
                    combined_model = model

            progress_bar.update(1)

    model_json = combined_model.to_json()

    with open(f'{models_dir}/{bot_model}/model.json', 'w') as outfile:
        json.dump(model_json, outfile)
Exemple #23
0
def load_markov_model(fp, col, kind='list'):
    df = pd.read_excel(fp)
    # filter these
    df = df[df['related'] != 'Error']
    corpus = df[col].tolist()
    if kind == 'list':
        line_level = '\n'.join(
            ['\n'.join(eval(article)) for article in corpus])
        article_level = '\n'.join(
            [' '.join(eval(article)) for article in corpus])
        line_level_model = mk.NewlineText(line_level, 4)
        recipe = [
            str(i) + ': ' + line_level_model.make_sentence()
            for i in range(1, 6)
        ]
        print('\n'.join(recipe))
    else:
        corpus = df[col].tolist()
        line_level = '\n'.join(corpus)
        article_level = '\n'.join(corpus)
        line_level_model = mk.Text(line_level, 4)
    with open('corpus\\' + col + ' - line level.txt', 'w') as fo:
        fo.write(line_level)
    with open('corpus\\' + col + ' - article level.txt', 'w') as fo:
        fo.write(article_level)

    article_level_model = mk.NewlineText(article_level, 4)

    print()
    print(line_level_model.make_sentence())
    print(line_level_model.make_sentence())
    print(line_level_model.make_sentence())
    print(line_level_model.make_sentence())
    print('_' * 20)
    print(article_level_model.make_sentence())
    print(article_level_model.make_sentence())
    print(article_level_model.make_sentence())
    print(article_level_model.make_sentence())
    print(article_level_model.make_sentence())

    return line_level_model, article_level_model
Exemple #24
0
async def babelli(ctx, arg, arg2):
    msg = ""
    key = arg
    url = "https://oaflopean.pythonanywhere.com/?key=" + key
    data = requests.post(url, auth=('oaflopean', 'babellibot'))
    text_model = markovify.Text(data.content.decode("utf-8"))
    for i in range(int(arg2)):
        try:
            msg = msg + " " + text_model.make_sentence() + " "
        except TypeError:
            continue
    print(data)
    if len(msg) == 0:
        await ctx.send("Sorry! Try more options.")
    else:
        chunks, chunk_size = len(msg), len(msg) / (len(msg) / 1995)
        list = [
            msg[i:i + int(chunk_size)]
            for i in range(0, chunks, int(chunk_size))
        ]
        for msg_pt in list:
            await ctx.send(msg_pt)
        book = Books()
        book.title = arg + " " + str(arg2)
        book.author = ctx.message.author.name
        book.description = msg
        s = "abcdefghijklmnopqrstuvwxyz"
        passlen = 12
        book.uri = "".join(random.sample(s, passlen))
        book.reddit_url = "http://oaflopean.pythonanywhere.com/?key=" + book.uri
        post = RedditPost(uri=book.uri,
                          reddit_url=book.reddit_url,
                          title=book.title,
                          body=book.description,
                          username=book.username)
        db.session.add(post)
        db.session.commit()
        db.session.add(book)
        db.session.commit()

        await ctx.send(book.reddit_url)
def createPoem(file):
    """
        Using the Markofiy module to rearrange the poem into a new poem

            :param file: txt file
            :param type: object

            :return: newPoem
    """

    with open(file) as f:
        text = f.read()

    text_model = markovify.Text(text, state_size=1)
    newtext = text_model.make_sentence()

    with open('/Users/hakeem/Desktop/LoremIpsumGen/app/results/results.txt',
              'a') as f:
        newPoem = f.write(newtext + '\n')

    return newPoem
Exemple #26
0
 def make_padding(self):
     if self.dynamic:
         f = open(self.corpus, 'r')
         text = markovify.Text(f)
         self.logger.info('generating dynamic padding from corpus')
         pad = '<p style="font-size: 0px">'
         for i in range(1, 50):
             temp = text.make_sentence()
             if temp is not None:
                 pad += ' ' + temp
                 if i % 5 == 0:
                     pad += ' </br>'
             else:
                 pad += ' </br>'
         pad += ' </p>'
         self.logger.info('dynamic padding generated successfully')
         f.close()
     else:
         self.logger.warning('message created using static padding')
         pad = STATIC_PADDING
     return pad
Exemple #27
0
def test_main():
    """
    Basic functional test
    """
    assert markov_novel
    path = 'tmp'
    os.makedirs(path)
    os.chdir(path)
    # Get raw text as string.
    from os.path import dirname, abspath
    filename = os.path.join(
        dirname(dirname(abspath(__file__))), 'tests/futuristmanifest.txt')
    with open(filename) as f:
        text = f.read()
    # Build the model.
    text_model = markovify.Text(text)
    novel = markov_novel.Novel(text_model, chapter_count=1)
    novel.write(novel_title='my-novel', filetype='md')
    assert os.path.exists(os.path.join(os.getcwd(), 'my-novel.md'))
    os.chdir(os.pardir)
    shutil.rmtree('tmp', ignore_errors=True)
Exemple #28
0
def tootmarkov(bot, trigger):
    keys = open(os.getcwd() + "/SECRET_SAUCE/masto.txt", "r")
    client_id = keys.readline().rstrip()
    client_secret = keys.readline().rstrip()
    access_token = keys.readline().rstrip()
    api_base_url = keys.readline().rstrip()
    mastodon = Mastodon(client_id, client_secret, access_token, api_base_url)
    keys.close()

    # Get raw text as string.
    f = open(os.getcwd() + "/all_of_bgtopics.txt", "r")
    text = f.read()

    # Build the model.
    text_model = markovify.Text(text)

    mytoot = text_model.make_short_sentence(140)
    output = mytoot + "\n\n[Generated by TootMarkov]"

    mastodon.toot(output)
    bot.say("I tooted: " + mytoot)
Exemple #29
0
def markov_chain():
    # Get raw text as string.
    with open("./data/corpora/text_1.txt", encoding='utf8') as f:
        text = f.read()

    # Build the model.
    text_model = markovify.Text(text)

    # Print five randomly-generated sentences
    print('\n\n---Print five randomly-generated sentences---')
    for i in range(5):
        print()
        print(text_model.make_sentence())

    # Print three randomly-generated sentences of no more than 280 characters
    print(
        '\n\n---Print three randomly-generated sentences of no more than 280 characters---'
    )
    for i in range(3):
        print()
        print(text_model.make_short_sentence(280))
def combine_all_files(ctx):
    import markovify

    state_size = ctx.config.get('state_size', 2)
    files = os.listdir(path="texts")

    with open("output_corpus.txt", 'w') as f:
        f.write('\n')

    files = files[1:-1]

    for file in files:
        with open(f"texts/{file}") as input_file:
            text = input_file.read()
            try:
                markovify.Text(text, state_size=state_size)
                with open("output_corpus.txt", "a") as output_corpus:
                    output_corpus.write('\n' + text)
                logging.info('Added...')
            except Exception as e:
                logging.error(e)