Exemple #1
0
def corr_craft_glass(text, search_result):  # TODO: Combine into single crafting-related function
    """
    >>> corr_craft_glass("Делать грубый зелёное стекло")
    'Варить грубое зелёное стекло'
    >>> corr_craft_glass("Делать гигантский хрусталь лезвие топора")
    'Делать гигантское лезвие топора из хрусталя'
    """
    material = search_result.group(3)
    material_gender = get_gender(material)
    words = search_result.group(2).split()
    product = search_result.group(4).split()
    verb = search_result.group(1)
    if not product:
        verb = "Варить"
        adjectives = (inflect_adjective(adj, material_gender, "accs", animated=False) for adj in words)
        result = "{} {} {}".format(verb, " ".join(adjectives), material)
    else:
        index = next(
            (i for i, item in enumerate(words) if item in {"грубое", "зелёное", "прозрачное", "грубый"}), len(words)
        )
        product_adjectives = words[:index]
        if any_in_tag({"NOUN", "nomn"}, custom_parse(product[0])):
            product_gender = get_gender(product[0])
            product[0] = inflect_noun(product[0], case="accs")
        else:
            product_gender = get_gender(product[-1])
            product_adjectives += product[:-1]
            product = [inflect_noun(product[-1], case="accs")]

        product_adjectives = [
            inflect_adjective(adj, product_gender, case="accs", animated=False) for adj in product_adjectives
        ]
Exemple #2
0
def corr_adjective_relief(text, search_result):
    """
    >>> corr_adjective_relief("Заснеженный Густой овсяница")
    'Заснеженная густая овсяница'
    >>> corr_adjective_relief("Густой мюленбергия")
    'Густая мюленбергия'
    >>> corr_adjective_relief("Густой морошка")
    'Густая морошка'
    >>> corr_adjective_relief("Заснеженный Густой куропаточья трава")
    'Заснеженная густая куропаточья трава'
    """
    adjective = search_result.group(1)
    obj = search_result.group(2)

    if " " in obj:
        words = obj.split(" ")
        if is_adjective(words[0]):
            gender = get_gender(words[-1])
            new_word = inflect_adjective(words[0], gender, "nomn")
            text = text.replace(words[0], new_word)
            new_word = inflect_adjective(adjective, gender, "nomn")
            text = text.replace(adjective, new_word)
    else:
        gender = get_gender(obj)
        new_word = inflect_adjective(adjective, gender, "nomn")
        if new_word:
            text = "{} {}".format(new_word, obj)

    return text.capitalize()
Exemple #3
0
def corr_jewelers_shop(_, search_result):
    """
    >>> corr_jewelers_shop("Огранить из необработанного адамантина")
    'Огранить необработанный адамантин'
    >>> corr_jewelers_shop("Инкрустировать Предметы обстановки с из необработанного адамантина")
    'Инкрустировать предметы обстановки необработанным адамантином'
    >>> corr_jewelers_shop("Огранить из фарфора")
    'Огранить фарфор'
    """

    first_part = search_result.group(1)
    words = search_result.group(2).split()
    if first_part == "Огранить":
        # accusative case
        tags = None
        if words[0] == "из":
            words = words[1:]
            tags = {"gent"}
        item = words[-1]
        gender = get_gender(item, known_tags=tags)
        words = [inflect_adjective(word, gender, "accs", animated=False) for word in words[:-1]]
        parse = list(filter(lambda x: {gender, "inan"} in x.tag, custom_parse(item)))
        if item == "адамантина":
            item = "адамантин"
        else:
            item = parse[0].inflect({"accs"}).word
        words.append(item)
    else:
        # instrumental/ablative case ('incrust with smth')
        words = [custom_parse(word)[0].inflect({"ablt"}).word for word in words if word != "из"]

    if first_part.endswith(" с"):
        first_part = first_part[:-2]
    text = first_part + " " + " ".join(words)
    return text.capitalize()
Exemple #4
0
def corr_gem_cutting(text, search_result):
    """
    >>> corr_gem_cutting("(бриолетовый восковые опалы)")
    '(бриолетовые восковые опалы)'
    >>> corr_gem_cutting("большой шерлы")
    'большие шерлы'
    """
    words = search_result.group(1).split()
    if words[-1] in body_parts:
        return corr_item_body_parts(text)

    gender = get_gender(words[-1], {"NOUN", "nomn"})

    new_list = []
    for word in words[:-1]:
        if word in make_adjective:
            adj = make_adjective[word]
            word = inflect_adjective(adj, gender)
        elif is_adjective(word):
            word = inflect_adjective(word, gender)
        new_list.append(word)

    new_list.append(words[-1])

    return text.replace(search_result.group(0), " ".join(new_list))
Exemple #5
0
def corr_clothiers_shop(_, search_result):
    """
    >>> corr_clothiers_shop("Делать ткань роба")
    'Шить робу из ткани'
    >>> corr_clothiers_shop("Делать шёлк роба")
    'Шить шёлковую робу'
    >>> corr_clothiers_shop("Изготовить ткань мешок")
    'Шить мешок из ткани'
    >>> corr_clothiers_shop("Вышивать кожа изображение")
    'Вышивать изображение на коже'
    >>> corr_clothiers_shop("Делать пряжа рубаха")
    'Вязать рубаху из пряжи'
    >>> corr_clothiers_shop("Делать ткань верёвка")
    'Вить верёвку из ткани'
    """
    verb = search_result.group(1)
    material = search_result.group(2)
    product = search_result.group(3).strip()

    if not product:
        return None  # Leave as is eg. 'Ткать шёлк'
    elif verb == "Вышивать":  # Sew
        if material == "пряжа":
            # вязать <product> из пряжи
            verb, preposition, material = "Вязать", "из", "пряжи"
        else:
            # вышивать <product> на <material>
            preposition = "на"
            material = inflect_noun(material, case="loct", orig_form={"nomn"})

        return "{} {} {} {}".format(verb, product, preposition, material)
    else:
        if product in {"щит", "баклер"}:
            _, of_material = cloth_subst[material]  # Don't change the verb, leave 'Делать'/'Изготовить'
        else:
            verb, of_material = cloth_subst[material]

        if product == "верёвка":
            verb = "Вить"

        product_accus = inflect_noun(product, case="accs", orig_form={"nomn"})

        if material in make_adjective:  # "шёлк" -> "шёлковый"
            gender = get_gender(product, {"nomn"})
            material_adj = inflect_adjective(make_adjective[material], gender, "accs", animated=False)
            return "{} {} {}".format(verb, material_adj, product_accus)  # {Шить} {шёлковую} {робу}
        else:
            return "{} {} {}".format(verb, product_accus, of_material)  # {Шить} {робу} {из ткани}
Exemple #6
0
def corr_of_material_item(text, _):
    """
    >>> corr_of_material_item("риз алевролита мемориал")
    '≡алевролитовый мемориал'
    >>> corr_of_material_item("из алевролита доспешная стойка")
    'алевролитовая доспешная стойка'
    >>> corr_of_material_item("(из висмутовой бронзы короткие мечи [3])")
    '(короткие мечи из висмутовой бронзы [3])'
    >>> corr_of_material_item("риз берёзы гробр")
    '≡берёзовый гроб≡'
    """
    search_result = re_of_material_item.search(text)
    initial_string = search_result.group(1)
    words = initial_string.split()

    if len(words) == 2:
        parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(words[1])))
        assert len(parse) == 1
        replacement_string = parse[0].normal_form
    elif words[1] == "древесины":
        # Ultra simple case
        if "дерева" in words:  # 'из древесины миндального дерева'
            cut_index = words.index("дерева") + 1
        elif "пекан" in words:  # 'из древесины ореха пекан'
            cut_index = words.index("пекан") + 1
        elif any_in_tag({"NOUN", "gent"}, custom_parse(words[2])):  # 'из древесины яблони'
            cut_index = 3
        else:
            cut_index = -1
        replacement_string = " ".join(words[cut_index:] + words[:cut_index])
    elif all(any_in_tag({"ADJF", "gent"}, custom_parse(adj)) for adj in words[1:-1]) and any_in_tag(
        {"NOUN", "gent"}, custom_parse(words[-1])
    ):
        # All words after 'из' except the last word are adjectives in genitive
        # The last is a noun in genitive
        material = words[-1]
        gender = get_gender(material, known_tags={"gent"})
        parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(material)))
        material = parse[0].normal_form
        adjs = words[1:-1]
        adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs]
Exemple #7
0
def corr_container(text, _):
    """
    >>> corr_container('(дварфийское пиво бочка (из ольхи))')
    '(Бочка дварфийского пива (ольховая))'
    >>> corr_container("(дварфийское вино бочка (из клёна) <#8>)")
    '(Бочка дварфийского вина (кленовая) <#8>)'
    >>> corr_container("(Семя бочка (из лумбанга) <#10>)")
    '(Бочка семян (лумбанговая) <#10>)'
    """
    search_result = re_container.search(text)
    initial_string = search_result.group(0)
    containment = search_result.group(1)
    if containment in replace_containment:
        containment = replace_containment[containment]
    if containment.endswith("кровь"):
        words = containment.split()
        if words[0] in possessive_adjectives:
            words[0] = possessive_adjectives[words[0]]
            words = to_genitive_case_list(words)
        else:
            words = [to_genitive_case_single_noun(words[-1])] + list(to_genitive_case_list(words[:-1]))
        containment = " ".join(words)
    elif containment.startswith("из "):
        containment = containment[3:]  # Words after 'из' are already in genitive case
    elif containment in {"слитков/блоков", "специй"}:
        pass  # Already in genitive case
    elif containment.startswith("семена"):
        words = containment.split()
        words[0] = to_genitive_case(words[0])
        containment = " ".join(words)
    else:
        containment = to_genitive_case(containment)
    container = search_result.group(2)
    of_material = search_result.group(3)
    if not of_material:
        replacement_string = container + " " + containment
    elif (
        " " not in of_material
        and is_adjective(of_material)
        or of_material in make_adjective
        or of_material[3:] in make_adjective
    ):
        if " " not in of_material and is_adjective(of_material):
            adjective = of_material
        elif of_material in make_adjective:
            adjective = make_adjective[of_material]
        elif of_material[3:] in make_adjective:
            adjective = make_adjective[of_material[3:]]
        else:
            adjective = None
        gender = get_gender(container, {"nomn"})
        adjective = inflect_adjective(adjective, gender)
        replacement_string = "{} {} ({})".format(container, containment, adjective)
    else:
        words = of_material.split()
        material = None
        if of_material.startswith("из ") or len(of_material) <= 2:
            material = of_material
        elif (
            len(words) >= 2
            and words[-2] == "из"
            and (words[-1] in materials or any(mat.startswith(words[-1]) for mat in materials))
        ):
            # Try to fix truncated materail names, eg. '(ямный краситель мешок (гигантский пещерный паук из шёл'
            if words[-1] not in materials:  # Fix partial material name eg. 'шерст', 'шёлк'
                candidates = [mat for mat in materials if mat.startswith(words[-1])]
                if len(candidates) == 1:
                    words[-1] = candidates[0]
                else:
                    material = of_material  # Partial name is not recognized (too short)

            if not material:
                material = " ".join(words[-2:] + list(to_genitive_case_list(words[:-2])))
        else:
            gen_case = list(to_genitive_case_list(of_material.split()))
            if None not in gen_case:
                material = "из " + " ".join(gen_case)
            else:
                material = of_material
        replacement_string = "{} {} ({}".format(container, containment, material)
        if initial_string[-1] == ")":
            replacement_string += ")"
    text = text.replace(initial_string, replacement_string.capitalize())
    return text
Exemple #8
0
def corr_forge(_, search_result):
    """
    >>> corr_forge("Ковать из меди болты")
    'Ковать медные болты'
    >>> corr_forge("Кузница из железа Наконечники стрел баллисты")
    'Ковать железные наконечники стрел баллисты'
    >>> corr_forge("Делать из адамантина Колчан")
    'Делать адамантиновый колчан'
    """
    verb = search_result.group(1)
    words = search_result.group(2).split()
    assert len(words) >= 3

    assert words[0] == "из"
    # Second word ia adjective in gent case
    second_is_adjf_in_gent = any_in_tag({"ADJF", "gent"}, custom_parse(words[1]))
    # Third word is noun in gent case
    third_is_noun_in_gent = any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))
    if second_is_adjf_in_gent and third_is_noun_in_gent:
        of_material = words[:3]
        obj = words[3:]
    else:
        # Second word is noun in gent case
        assert any_in_tag({"NOUN", "gent"}, custom_parse(words[1]))
        of_material = words[:2]
        obj = words[2:]

    of_material = " ".join(of_material)
    noun_index = None
    parse = None
    gender = None

    if len(obj) == 1:
        noun_index = 0
        parse = custom_parse(obj[noun_index])
        noun = filter_noun(parse)
        gender = get_gender(obj[noun_index], known_tags={"nomn"})
        if not any_in_tag({"accs"}, noun):
            obj[0] = noun[0].inflect({"accs"}).word
    else:
        for i, word in enumerate(obj):
            parse = custom_parse(word)
            noun = filter_noun(parse)
            if noun:
                noun_index = i
                gender = get_gender(obj[noun_index])
                obj[i] = noun[0].inflect({"accs"}).word
                break  # Words after the 'item' must be left in genitive case
            elif not any_in_tag("accs", parse):
                obj[i] = parse[0].inflect({"accs"}).word

    assert parse is not None
    if not any_in_tag("accs", parse):
        obj[noun_index] = parse[0].inflect({"accs"}).word

    if verb == "Кузница":
        verb = "Ковать"

    if of_material in make_adjective:
        assert gender is not None
        material = inflect_adjective(make_adjective[of_material], gender, "accs", animated=False)
        text = verb + " " + material + " " + " ".join(obj)
    else:
        text = verb + " " + " ".join(obj) + " " + of_material

    return text.capitalize()
Exemple #9
0
        adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs]
        replacement_string = " ".join(adjs) + " " + material
    # elif (words[2] not in corr_item_general_except and len(words) > 3 and
    elif (
        len(words) > 3
        and any_in_tag({"gent"}, custom_parse(words[1]))
        and any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))  # The second word is in genitive
    ):  # The third word is a noun in genitive
        # Complex case, eg. "из висмутовой бронзы"
        of_material = " ".join(words[:3])
        words = words[3:]
        if len(words) == 1:
            first_part = words[0]
        else:
            obj = words[-1]
            gender = get_gender(obj, "NOUN")
            adjs = (inflect_adjective(adj, gender) or adj for adj in words[:-1])
            first_part = "{} {}".format(" ".join(adjs), obj)
        replacement_string = first_part + " " + of_material
    elif any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) and words[1] != "древесины":
        # Simple case, eg. "из бронзы"
        of_material = " ".join(words[:2])
        words = words[2:]
        item = words[-1]

        for word in words:
            if any_in_tag({"NOUN", "nomn"}, custom_parse(word)):
                item = word
                break

        if of_material in make_adjective: