def soundex(self, string):
        syllable = self.split_syllables(string)

        with open("word/word_bank.txt", "r") as f:
            data = f.readlines()

        symbols = [",", " ", ".", "!", "?", "-"]

        sentence = []
        sentence_to_say = []
        undefined_word = []

        for word in syllable:
            # print(word)
            xvocal = self.getVocal(word)
            x = soundex.encode_word(word)
            # print(f"soundex x : {x} | {xvocal} - {word}")
            undefined = False
            for line in data:
                y = soundex.encode_word(line.strip())
                yvocal = self.getVocal(line.strip())
                if y is x and yvocal is xvocal:
                    # print(f"soundex x : {y} | {yvocal} - {line}")
                    sentence.append(line.strip())
                    sentence_to_say.append(line.strip())
                    undefined = False
                    break
                elif word in symbols:
                    sentence.append(word)
                    sentence_to_say.append(word)
                    undefined = False
                    break
                undefined = True

            if undefined:
                undefined_word.append(word)
                sentence.append(f"\033[1;31;40m {word} \033[0m")
                sentence_to_say.append(word)

        self.show_unknown(undefined_word)

        # export sentence

        result = " "
        result = result.join(sentence_to_say)
        return result
예제 #2
0
 def recommend(self, word: str) -> Dict[str, float]:
     if word is None or word == '':
         return {}
     code = soundex.encode_word(word)
     recommendations = self.phonetic_index[
         code] if code in self.phonetic_index.keys() else []
     weight = [1.0 for i in range(len(recommendations))]
     return self.recommendations_post_processing(
         dict(zip(recommendations, weight)))
예제 #3
0
def search_model(input_value, **kwargs):
    model = kwargs.get("model", False)
    field = kwargs.get("field", False)
    filter_args = kwargs.get("filter", False)

    closest_match = None

    if isinstance(input_value, (str, unicode)) and model and field:
        text_soundex = soundex.encode_word(input_value)
        simple_text = (re.sub("[^A-Za-z0-9]", "",
                              input_value).strip().replace(" ", "").lower())

        if filter_args:
            model_items = model.objects.filter(**filter_args)
        else:
            model_items = model.objects.all()

        matches = []

        for item in model_items:
            field_value = getattr(item, field, "")

            field_soundex = soundex.encode_word(field_value)
            simple_field_text = (re.sub("[^A-Za-z0-9]",
                                        "", field_value).strip().replace(
                                            " ", "").lower())
            word_distance = distance(input_value, field_value)

            if text_soundex == field_soundex:
                matches.append({"distance": 0, "item": item})

            elif simple_text in simple_field_text:
                matches.append({"distance": 1, "item": item})

            else:
                if word_distance < 10:
                    matches.append({"distance": word_distance, "item": item})

            matches = sorted(matches, key=itemgetter("distance"))

            if len(matches) > 0 and matches[0].get("distance") < 5:
                closest_match = matches[0]

    return closest_match.get("item")
예제 #4
0
 def build_index(vocabulary: set) -> Dict[str, Set[str]]:
     idx = dict()
     for value_word in vocabulary:
         key = soundex.encode_word(value_word)
         if key in idx.keys():
             idx[key].add(value_word)
         else:
             idx[key] = set()
             idx[key].add(value_word)
     return idx
 def test_encode(self):
     assert soundex.encode_word("Example") == soundex.encode_word(
         "Ekzampul")
예제 #6
0
def parse_text(request):
    """
    Traverses the language model and compares samples using soundex and Levenshtein distance of the interpreted text.
    :param request:
    :return:
    """

    data = request_to_dict(request)
    text = data.get("text")

    try:
        interaction_model = LanguageModel.objects.get(enabled=True)

    except LanguageModel.DoesNotExist:
        resp = {"_text": text}

    else:
        text_soundex = soundex.encode_word(text)
        simple_text = text.strip().replace(" ", "").lower()

        matches = []

        for intent in interaction_model.intents.filter(enabled=True):

            for sample in intent.samples:
                sample_soundex = soundex.encode_word(sample)
                word_distance = distance(text, sample)

                simple_sample = sample.strip().replace(" ", "").lower()

                if text_soundex == sample_soundex:
                    matches.append(
                        {"distance": 0, "intent": intent.name, "sample": sample}
                    )

                elif simple_text in simple_sample:
                    matches.append(
                        {"distance": 1, "intent": intent.name, "sample": sample}
                    )

                else:
                    if word_distance < 10:
                        matches.append(
                            {
                                "distance": word_distance,
                                "intent": intent.name,
                                "sample": sample,
                            }
                        )

        matches = sorted(matches, key=itemgetter("distance"))

        if len(matches) > 0 and matches[0].get("distance") < 5:
            closest_match = matches[0]

        else:
            closest_match = None

        resp = {
            "_text": text,
            # 'closest_match': closest_match,
            "intent": closest_match.get("intent") if closest_match else closest_match,
            # 'matches': matches
        }

    return Response(resp, status=status.HTTP_200_OK, headers=NO_CACHE_HEADERS)