예제 #1
0
    def generate(self,
                 message: str,
                 doc: Doc = None,
                 ignore_topics: List[str] = []) -> Optional[str]:

        if doc is None:
            filtered_message = MarkovFilters.filter_input(message)
            doc = self._nlp(filtered_message)

        subjects = []
        for token in doc:
            if (token.text in ignore_topics):
                continue
            markov_word = self._markov_model.select(token.text)
            if markov_word is not None:
                subjects.append(markov_word)
        if len(subjects) == 0:
            UNHEARD_LIST = [
                "Didn’t catch that", "Try again", "Are you even trying",
                "That might be too much for me right now",
                "I’ll learn how eventually",
                "I don't know how to respond to that yet"
            ]
            UNHEARD_RESPONSE = random.choice(UNHEARD_LIST)
            return UNHEARD_RESPONSE

        def structure_generator():
            sentence_stats_manager = InputTextStatManager()
            while True:
                choices, p_values = sentence_stats_manager.probabilities()
                if len(choices) > 0:
                    num_sentences = np.random.choice(choices, p=p_values)
                else:
                    num_sentences = np.random.randint(1, 5)
                yield self._structure_scheduler.predict(
                    num_sentences=num_sentences)

        generator = MarkovGenerator(structure_generator=structure_generator(),
                                    subjects=subjects)

        reply_words = []
        sentences = generator.generate(db=self._markov_model)
        if sentences is None:
            MISUNDERSTOOD_LIST = [
                'Huh.', 'Huh', 'Huh!', 'Huh?', 'Huh!?', 'HUH?'
            ]
            MISUNDERSTOOD_REPONSE = random.choice(MISUNDERSTOOD_LIST)
            return MISUNDERSTOOD_REPONSE
        for sentence in sentences:
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, word.text)
                else:
                    text = word.text
                reply_words.append(text)

        reply = " ".join(reply_words)
        filtered_reply = MarkovFilters.smooth_output(reply)

        return filtered_reply
예제 #2
0
 def from_token(token: Token) -> 'MarkovWord':
     if CapitalizationMode.from_token(
             token,
             CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND:
         compound = True
     else:
         compound = False
     return MarkovWord(token.text,
                       Pos.from_token(token),
                       compound=compound,
                       neighbors={})
예제 #3
0
 def from_token(token: Token) -> 'MarkovNeighbor':
     key = token.text.lower()
     text = token.text
     if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND:
         compound = True
     else:
         compound = False
     pos = Pos.from_token(token)
     values = [0, 0]
     dist = [0] * (MARKOV_WINDOW_SIZE * 2 + 1)
     return MarkovNeighbor(key, text, pos, compound, values, dist)
예제 #4
0
 def from_token(token: Token) -> 'MarkovNeighbor':
     key = token.text.lower()
     text = token.text
     if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND:
         compound = True
     else:
         compound = False
     pos = Pos.from_token(token)
     values = [0, 0]
     dist = [0] * (MARKOV_WINDOW_SIZE * 2 + 1)
     return MarkovNeighbor(key, text, pos, compound, values, dist)
예제 #5
0
    def preprocess(self, doc: Doc) -> bool:
        if len(self.data) >= STRUCTURE_MODEL_TRAINING_MAX_SIZE:
            return False

        sequence = []
        previous_item = None
        for sentence_idx, sentence in enumerate(doc.sents):
            if len(self.data) >= STRUCTURE_MODEL_TRAINING_MAX_SIZE:
                return False

            for token_idx, token in enumerate(sentence):
                item = StructureFeatureAnalyzer.analyze(
                    token,
                    CapitalizationMode.from_token(
                        token, CAPITALIZATION_COMPOUND_RULES))
                label = item

                if len(sequence) == 0:
                    # Offset data by one, making label point to the next data item
                    sequence.append(
                        PoSCapitalizationMode(
                            Pos.NONE, CapitalizationMode.NONE).to_embedding())
                else:
                    sequence.append(previous_item)

                # We only want the latest SEQUENCE_LENGTH items
                sequence = sequence[-StructureModel.SEQUENCE_LENGTH:]

                self.data.append(sequence.copy())
                self.labels.append(label)

                previous_item = item

            # Handle EOS after each sentence
            item = PoSCapitalizationMode(
                Pos.EOS, CapitalizationMode.NONE).to_embedding()
            label = item

            sequence.append(previous_item)

            # We only want the latest SEQUENCE_LENGTH items
            sequence = sequence[-StructureModel.SEQUENCE_LENGTH:]

            self.data.append(sequence.copy())
            self.labels.append(label)

            previous_item = item
        return True
예제 #6
0
def main():
    np.random.seed(int(time.time()))

    markov_db = MarkovTrieDb(MARKOV_DB_PATH)

    structure_model = StructureModelScheduler(use_gpu=USE_GPU)
    structure_model.start()
    structure_model.load(STRUCTURE_MODEL_PATH)

    subjects = []
    for word in ['Some', 'Words', 'Here']:
        select_word = markov_db.select(word)
        if select_word is not None:
            subjects.append(select_word)
        else:
            print("Couldn't select %s" % word)

    for i in range(0, 1000):

        def structure_generator():
            while True:
                yield structure_model.predict(num_sentences=1)

        markov_generator = MarkovGenerator(structure_generator(), subjects)

        words = []
        sentences = markov_generator.generate(markov_db)
        if sentences is None:
            continue

        for sentence_idx, sentence in enumerate(sentences):
            pos_list = [word.pos for word in sentence]
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(
                        word.mode,
                        sentences[sentence_idx][word_idx].text,
                    )
                else:
                    text = word.text
                words.append(text)

        message = " ".join(words)
        message = MarkovFilters.smooth_output(message)

        print(message)
    def generate(self, message: str, doc: Doc = None, ignore_topics: List[str] = []) -> Optional[str]:

        if doc is None:
            filtered_message = MarkovFilters.filter_input(message)
            doc = self._nlp(filtered_message)

        subjects = []
        for token in doc:
            if(token.text in ignore_topics):
                continue
            markov_word = self._markov_model.select(token.text)
            if markov_word is not None:
                subjects.append(markov_word)
        if len(subjects) == 0:
            return "I wasn't trained on that!"

        def structure_generator():
            sentence_stats_manager = InputTextStatManager()
            while True:
                choices, p_values = sentence_stats_manager.probabilities()
                if len(choices) > 0:
                    num_sentences = np.random.choice(choices, p=p_values)
                else:
                    num_sentences = np.random.randint(1, 5)
                yield self._structure_scheduler.predict(num_sentences=num_sentences)

        generator = MarkovGenerator(structure_generator=structure_generator(), subjects=subjects)

        reply_words = []
        sentences = generator.generate(db=self._markov_model)
        if sentences is None:
            return "Huh?"
        for sentence in sentences:
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, word.text)
                else:
                    text = word.text
                reply_words.append(text)

        reply = " ".join(reply_words)
        filtered_reply = MarkovFilters.smooth_output(reply)

        return filtered_reply
예제 #8
0
def main():
    np.random.seed(int(time.time()))

    markov_db = MarkovTrieDb(MARKOV_DB_PATH)

    structure_model = StructureModelScheduler(use_gpu=USE_GPU)
    structure_model.start()
    structure_model.load(STRUCTURE_MODEL_PATH)

    subjects = []
    for word in ['Some', 'Words', 'Here']:
        select_word = markov_db.select(word)
        if select_word is not None:
            subjects.append(select_word)
        else:
            print("Couldn't select %s" % word)

    for i in range(0, 1000):

        def structure_generator():
            while True:
                yield structure_model.predict(num_sentences=1)

        markov_generator = MarkovGenerator(structure_generator(), subjects)

        words = []
        sentences = markov_generator.generate(markov_db)
        if sentences is None:
            continue

        for sentence_idx, sentence in enumerate(sentences):
            pos_list = [word.pos for word in sentence]
            for word_idx, word in enumerate(sentence):
                if not word.compound:
                    text = CapitalizationMode.transform(word.mode, sentences[sentence_idx][word_idx].text, )
                else:
                    text = word.text
                words.append(text)

        message = " ".join(words)
        message = MarkovFilters.smooth_output(message)

        print(message)
예제 #9
0
파일: structure.py 프로젝트: csvance/FTBot
    def preprocess(self, doc: Doc) -> bool:
        if len(self.data) >= STRUCTURE_MODEL_TRAINING_MAX_SIZE:
            return False

        sequence = []
        previous_item = None
        for sentence_idx, sentence in enumerate(doc.sents):
            if len(self.data) >= STRUCTURE_MODEL_TRAINING_MAX_SIZE:
                return False

            for token_idx, token in enumerate(sentence):
                item = StructureFeatureAnalyzer.analyze(
                    token, CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES))
                label = item

                if len(sequence) == 0:
                    # Offset data by one, making label point to the next data item
                    sequence.append(PoSCapitalizationMode(Pos.NONE, CapitalizationMode.NONE).to_embedding())
                else:
                    sequence.append(previous_item)

                # We only want the latest SEQUENCE_LENGTH items
                sequence = sequence[-StructureModel.SEQUENCE_LENGTH:]

                self.data.append(sequence.copy())
                self.labels.append(label)

                previous_item = item

            # Handle EOS after each sentence
            item = PoSCapitalizationMode(Pos.EOS, CapitalizationMode.NONE).to_embedding()
            label = item

            sequence.append(previous_item)

            # We only want the latest SEQUENCE_LENGTH items
            sequence = sequence[-StructureModel.SEQUENCE_LENGTH:]

            self.data.append(sequence.copy())
            self.labels.append(label)

            previous_item = item
        return True
예제 #10
0
 def from_token(token: Token) -> 'MarkovWord':
     if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND:
         compound = True
     else:
         compound = False
     return MarkovWord(token.text, Pos.from_token(token), compound=compound, neighbors={})
예제 #11
0
 def analyze(token: Token, mode: CapitalizationMode):
     pos = Pos.from_token(token)
     mode = PoSCapitalizationMode(pos, mode)
     return mode.to_embedding()
예제 #12
0
 def from_embedding(embedding: int):
     pos_part = int(embedding / len(CapitalizationMode))
     mode_part = int(embedding % len(CapitalizationMode))
     return PoSCapitalizationMode(Pos(pos_part), CapitalizationMode(mode_part))
예제 #13
0
파일: structure.py 프로젝트: csvance/FTBot
 def analyze(token: Token, mode: CapitalizationMode):
     pos = Pos.from_token(token)
     mode = PoSCapitalizationMode(pos, mode)
     return mode.to_embedding()