Ejemplo n.º 1
0
def main():
    """
    Executes a MarkovChain for text generation.

    Then it will be wait for user input.
    If length of user input less than `window` parameter of the chain,
    then random text will be generated, else last 3 words of the input will be
    taken as a start of generated text.
    If you want break the process, then enter `48598ee283437e810f2f0eb1cf66e217`.
    """
    chain = MarkovChain()
    # path relative to command line that executes that script.
    chain.chain = extensions.file.json.read("./src/markov-chain/generated-chains/ru/my-favorites-3-window.json")

    while True:
        start_text = input()

        if (start_text == "48598ee283437e810f2f0eb1cf66e217"):
            break

        # 3 - how many windows in the chain.
        start_text = handle_input_text(start_text, 3)

        if (start_text):
            print(chain.generate(start=start_text))
        else:
            print(chain.generate())
Ejemplo n.º 2
0
def main(argv=None):
    args = parse_args()

    markov = MarkovChain([], args.order)

    samples, postprocessor = prepare_samples_and_postprocessor(args)
    markov.add_samples(samples)

    for i in range(args.count):
        sequence = markov.generate(args.length)
        print((postprocessor(sequence)))
Ejemplo n.º 3
0
class ModMarkovChain(Mod):
    def __init__(
            self,
            markov_chain_db: str,
            head_db: str,
            pmi_db: str,
            logger=None
    ):
        Mod.__init__(self, logger)
        self.markov_chain_db = markov_chain_db
        self.head_db = head_db
        self.gen = MarkovChain(self.markov_chain_db)
        self.hs = HeadSelector(self.head_db, pmi_db)

    def gen_from_sentence(self, sent, num=5):
        heads = self.hs.select(sent, num=num)
        print(heads)
        replies = []
        for head, score in heads:
            query = (params.START_SYMBOL, head, )
            query_cands = []

            # search
            min_len = float("inf")
            min_sent = ""
            for i in range(10):
                sent = self.gen.generate(query)
                if len(sent) < min_len:
                    min_sent = sent
                    min_len = len(min_sent)
                query_cands.append(sent)
            # log
            for _cands in query_cands:
                self.logger.info("".join(_cands[1:]))
            if min_sent:
                replies.append(min_sent)

        return ["".join(_[1:]) for _ in replies]

    def can_utter(self, message, master):
        return True

    def utter(self, message, master):
        return [
            (random.uniform(0.7, 1.0),
             text, "markov_chain", dict())
            for text in self.gen_from_sentence(
                    message["text"],
                    num=3
            )
        ]
Ejemplo n.º 4
0
class ModMarkovChain(Mod):
    def __init__(self,
                 markov_chain_db: str,
                 head_db: str,
                 pmi_db: str,
                 logger=None):
        Mod.__init__(self, logger)
        self.markov_chain_db = markov_chain_db
        self.head_db = head_db
        self.gen = MarkovChain(self.markov_chain_db)
        self.hs = HeadSelector(self.head_db, pmi_db)

    def gen_from_sentence(self, sent, num=5):
        heads = self.hs.select(sent, num=num)
        print(heads)
        replies = []
        for head, score in heads:
            query = (
                params.START_SYMBOL,
                head,
            )
            query_cands = []

            # search
            min_len = float("inf")
            min_sent = ""
            for i in range(10):
                sent = self.gen.generate(query)
                if len(sent) < min_len:
                    min_sent = sent
                    min_len = len(min_sent)
                query_cands.append(sent)
            # log
            for _cands in query_cands:
                self.logger.info("".join(_cands[1:]))
            if min_sent:
                replies.append(min_sent)

        return ["".join(_[1:]) for _ in replies]

    def can_utter(self, message, master):
        return True

    def utter(self, message, master):
        return [(random.uniform(0.7, 1.0), text, "markov_chain", dict())
                for text in self.gen_from_sentence(message["text"], num=3)]
Ejemplo n.º 5
0
class Generator:
    questions = []

    def __init__(self, goal):
        self.engine = GrammarEngine('./dialogueSystem/grammar/generator.txt')
        self.p_engine = GrammarEngine(
            './dialogueSystem/grammar/polarity_response.txt')

        f_dracula = open('./dialogueSystem/dracula.txt', encoding="utf8")
        whole_dracula = f_dracula.read()
        train_dracula = whole_dracula[:int(len(whole_dracula) * 0.8)]
        train_dracula_file = open('./dialogueSystem/train_dracula.txt',
                                  "w+",
                                  encoding="utf8")
        train_dracula_file.write(train_dracula)

        self.identity_chain = MarkovChain('./dialogueSystem/train_dracula.txt',
                                          "word", 3)

        if goal == "Friend":
            f_questions = open('./dialogueSystem/questionsFriendGoal.txt',
                               encoding="utf8")
        else:  # User
            f_questions = open('./dialogueSystem/questionsUserGoal.txt',
                               encoding="utf8")
        self.questions = f_questions.read().splitlines()
        # print(self.questions)
        self.asked_questions = []
        self.prev_count = -1

        self.model = DialogTag('distilbert-base-uncased')  # dialogue tags

    # Resolve obligation: TODO(Starr) <- generate according to question type
    def resolveObligation(self, gameState, userResponse):
        # if gameState.questionActType == "a type of question", do something
        # engine = GrammarEngine('./dialogueSystem/grammar/response_question.txt')
        # return engine.generate('response')

        # return yes/ no type of response
        # return gameState.questionActType
        return self.engine.generate('obligation')

    # Strong sentiment
    def addressPositiveSentiment(self, gameState, userResponse):
        return self.p_engine.generate('positive')

    def addressNegativeSentiment(self, gameState, userResponse):
        return self.p_engine.generate('negative')

    # Subjectivity: Rie
    def addressSubj(self, gameState, userResponse):
        response = TextBlob(userResponse)
        subjectivity = response.sentiment.subjectivity
        if subjectivity < 0.4:
            return "That sounds like a fact. I believe you for that."
        elif subjectivity >= 0.4 and subjectivity < 0.8:
            return "Actually, are you sure about that? That sounds a bit subjective."
        else:
            return "Oh, but that's just your opinion, right? I don't know if I should trust that as a fact, my friend."
        return "ERROR"

    # Keyphrase: Allison
    '''
  The keyphrases that trigger this:
  - askedWhoTheCallerIs: Markov Chain (Dracula)
  - greetedCaller
  - saidGoodbye
  '''

    def keyphraseTrigger(self, gameState, userResponse, keyphrase):
        if keyphrase == "askedWhoTheCallerIs":
            # MarkovChain
            response = "Well, I'm glad you asked. I'm a travelling storyteller, and I would love to hear your story. But wait! I'll tell you my story first. I hope you like it.\n\n"
            return response + self.identity_chain.generate(55)
        elif keyphrase == "greetedCaller":
            return "Hello, my friend. Good to hear your voice. I hope I'm not interrupting anything. I just wanted to ask you a few questions if that's okay."
        elif keyphrase == "otherLanguage":
            return self.engine.generate('confused')
        else:  # saidGoodbye
            return "I guess this is the end of our call. Maybe we'll talk again some other day...maybe we won't. Goodbye now."
        return "ERROR"

    # Profanity: Rie
    def addressProf(self, gameState, userResponse):
        # if profanity.contains_profanity(userResponse):
        dirtyWords = userResponse.split(" ")
        cleanWords = profanity.censor(userResponse).split(" ")
        for i in range(len(dirtyWords)):
            dirtyWord = ""
            cleanWord = ""
            if dirtyWords[i] != cleanWords[i]:
                if dirtyWords[i][-1] in [',', '.', '!', '?']:
                    dirtyWord = dirtyWords[i][:-1]
                    dirtyWord = '"' + dirtyWord + '"'
                else:
                    dirtyWord = '"' + dirtyWords[i] + '"'
                if cleanWords[i][-1] in [',', '.', '!', '?']:
                    cleanWord = cleanWords[i][:-1]
                else:
                    cleanWord = cleanWords[i]
                self.engine.set_variable("dirty", dirtyWord)
                self.engine.set_variable("clean", cleanWord)
                return self.engine.generate('profanity')
        return "Something has gone wrong"

    # Nothing detected: Rie
    def fallback(self, gameState, userResponse):
        return self.engine.generate('fallback')
        # print("What do you mean? Give me more detail, or I will come to you tonight.")

    # Default/ user info getting questions: Allison
    # should be determined by which goal we have for this game gameState.getGoal()
    def askQuestion(self, gameState, userResponse):
        # print("INFO COUNT: ", gameState.informationCount)
        # response = self.questions[gameState.informationCount]

        if self.prev_count == gameState.informationCount:
            response = self.asked_questions[-1]
        elif gameState.informationCount == 0:
            response = self.questions[0]
            self.asked_questions.append(response)
            self.prev_count = gameState.informationCount
        else:
            response = random.choice(self.questions)
            while response in self.asked_questions:
                response = random.choice(self.questions)
            self.asked_questions.append(response)
            self.prev_count = gameState.informationCount

        if '%' in response:
            words = response.split()
            temp_words = words.copy()

            for i in range(0, len(words)):
                some_word = words[i]
                first_mem = ""
                last_mem = ""

                # print("SOME WORD: ", some_word)
                if some_word[-1] == "s" and some_word[-2] == "'":
                    last_mem += some_word[-2:]
                    some_word = some_word[:-2]
                    # print("NO 's WORD: ", some_word)
                if some_word[-1] in '!#$&()*+, -./:;<=>?@[\]^_`{|}~':
                    last_mem += some_word[-1]
                    some_word = some_word[:-1]
                if some_word[0] in '!#$&()*+, -./:;<=>?@[\]^_`{|}~':
                    first_mem += some_word[0]
                    some_word = some_word[1:]

                # print("PROCESSED WORD: ", some_word)
                if some_word[0] == '%' and some_word[-1] == '%':
                    key = some_word[1:-1]
                    if key not in gameState.information:
                        # self.askQuestion(gameState, userResponse)
                        terms = key.split('_')
                        if terms[0] == "grow":
                            replace = "your town"
                        else:
                            replace = "a " + terms[0]
                    else:
                        replace = gameState.information[some_word[1:-1]]
                    temp_words[i] = first_mem + replace + last_mem

            # print(temp_words)
            response = " ".join(temp_words)
            if response[-1] != '?':
                response += "?"

        if gameState.informationCount > 0 and self.prev_count != gameState.informationCount:
            transitions = [
                "I see. ", "Interesting...", "Oh really? ", "Ah, ok. ",
                "Hm...", "Yeah? ", "Alright. ", "Aha. "
            ]
            response = random.choice(transitions) + response
        return response

    # Allison
    # If the user gave information, let's respond to it!
    # Use the gameState to see what info we asked for, and then say something about it
    def addressAvoidance(self, gameState, userResponse):
        options = [
            "I'm not sure what you mean.", "You're avoiding my questions.",
            "I won't be able to write a good story if you don't answer my questions.",
            "I don't think I get it.", "Could you say that again?"
        ]
        return random.choice(options)

    def elizaTransformation(self, gameState, userResponse):
        transformed_response = eliza_transform.transform(userResponse)
        return transformed_response

    def addressQuestion(self, gameState, userResponse):
        options = [
            "Let's focus on your experiences for now.",
            "Let's not talk about me.",
            "Are you sure you should be asking me questions?",
            "We don't have time to both ask questions!",
            "You can ask me questions later"
        ]
        return random.choice(options)
Ejemplo n.º 6
0
class MasterpieceWriter(object):
    def __init__(self, sentence_tokenizer, word_tokenizer):
        self.sentence_tokenizer = sentence_tokenizer
        self.word_tokenizer = word_tokenizer

        self.markov_chain = MarkovChain()
        self.word_contexts = defaultdict(list)

        self.word_counts = Counter()
        self.word_pair_counts = Counter()

    def _paragraphs_from_file(self, file_name):
        with open(file_name) as f:
            for line in f:
                line = line.strip()
                if line != "":
                    yield line

    def _get_words_and_contexts(self, input_files):
        for file_name in input_files:
            for paragr in self._paragraphs_from_file(file_name):
                sentences = self.sentence_tokenizer.tokenize(paragr)
                if len(sentences) == 0:
                    continue

                yield PARA_BEGIN, None
                for sentence in sentences:
                    words, contexts = self.word_tokenizer.tokenize(sentence)
                    if len(words) == 0:
                        continue

                    yield SENT_BEGIN, None
                    for word in words:
                        yield (word, None)
                    yield SENT_END, None

                    if contexts is not None:
                        yield None, contexts

                yield PARA_END, None

    def train(self, training_files):
        prev_prev_word, prev_word = None, None
        for word, contexts in self._get_words_and_contexts(training_files):
            if contexts is not None:
                for ctx_key in contexts:
                    self.word_contexts[ctx_key].extend(contexts[ctx_key])

            if word is not None:
                # Train markov chain (need at least 3 tokens)
                if prev_prev_word is not None:
                    self.markov_chain.add((prev_prev_word, prev_word),
                                          (prev_word, word))
                # Collect stats
                if word not in ALL_SPECIAL:
                    self.word_counts[word] += 1
                    if prev_word not in ALL_SPECIAL:
                        self.word_pair_counts[(prev_word, word)] += 1

                # Update prev_prev_word and prev_word
                prev_prev_word, prev_word = prev_word, word

    def stats(self, top=10):
        return dict(most_common_words=self.word_counts.most_common(top),
                    most_common_word_pairs=self.word_pair_counts.most_common(top))

    def generate_masterpiece(self, prng=None):
        yield PARA_BEGIN
        yield SENT_BEGIN
        for next in self.markov_chain.generate((PARA_BEGIN, SENT_BEGIN), prng):
            w1, w2 = next
            yield w2