Ejemplos de custom_parse en Python, ejemplos de changetext.utils.custom_parse en Python

Ejemplo n.º 1

0

Mostrar archivo

def corr_jewelers_shop(_, search_result):
    """
    >>> corr_jewelers_shop("Огранить из необработанного адамантина")
    'Огранить необработанный адамантин'
    >>> corr_jewelers_shop("Инкрустировать Предметы обстановки с из необработанного адамантина")
    'Инкрустировать предметы обстановки необработанным адамантином'
    >>> corr_jewelers_shop("Огранить из фарфора")
    'Огранить фарфор'
    """

    first_part = search_result.group(1)
    words = search_result.group(2).split()
    if first_part == "Огранить":
        # accusative case
        tags = None
        if words[0] == "из":
            words = words[1:]
            tags = {"gent"}
        item = words[-1]
        gender = get_gender(item, known_tags=tags)
        words = [inflect_adjective(word, gender, "accs", animated=False) for word in words[:-1]]
        parse = list(filter(lambda x: {gender, "inan"} in x.tag, custom_parse(item)))
        if item == "адамантина":
            item = "адамантин"
        else:
            item = parse[0].inflect({"accs"}).word
        words.append(item)
    else:
        # instrumental/ablative case ('incrust with smth')
        words = [custom_parse(word)[0].inflect({"ablt"}).word for word in words if word != "из"]

    if first_part.endswith(" с"):
        first_part = first_part[:-2]
    text = first_part + " " + " ".join(words)
    return text.capitalize()

Ejemplo n.º 2

0

Mostrar archivo

def corr_craft_glass(text, search_result):  # TODO: Combine into single crafting-related function
    """
    >>> corr_craft_glass("Делать грубый зелёное стекло")
    'Варить грубое зелёное стекло'
    >>> corr_craft_glass("Делать гигантский хрусталь лезвие топора")
    'Делать гигантское лезвие топора из хрусталя'
    """
    material = search_result.group(3)
    material_gender = get_gender(material)
    words = search_result.group(2).split()
    product = search_result.group(4).split()
    verb = search_result.group(1)
    if not product:
        verb = "Варить"
        adjectives = (inflect_adjective(adj, material_gender, "accs", animated=False) for adj in words)
        result = "{} {} {}".format(verb, " ".join(adjectives), material)
    else:
        index = next(
            (i for i, item in enumerate(words) if item in {"грубое", "зелёное", "прозрачное", "грубый"}), len(words)
        )
        product_adjectives = words[:index]
        if any_in_tag({"NOUN", "nomn"}, custom_parse(product[0])):
            product_gender = get_gender(product[0])
            product[0] = inflect_noun(product[0], case="accs")
        else:
            product_gender = get_gender(product[-1])
            product_adjectives += product[:-1]
            product = [inflect_noun(product[-1], case="accs")]

        product_adjectives = [
            inflect_adjective(adj, product_gender, case="accs", animated=False) for adj in product_adjectives
        ]

Ejemplo n.º 3

0

Mostrar archivo

def corr_of_material_item(text, _):
    """
    >>> corr_of_material_item("риз алевролита мемориал")
    '≡алевролитовый мемориал'
    >>> corr_of_material_item("из алевролита доспешная стойка")
    'алевролитовая доспешная стойка'
    >>> corr_of_material_item("(из висмутовой бронзы короткие мечи [3])")
    '(короткие мечи из висмутовой бронзы [3])'
    >>> corr_of_material_item("риз берёзы гробр")
    '≡берёзовый гроб≡'
    """
    search_result = re_of_material_item.search(text)
    initial_string = search_result.group(1)
    words = initial_string.split()

    if len(words) == 2:
        parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(words[1])))
        assert len(parse) == 1
        replacement_string = parse[0].normal_form
    elif words[1] == "древесины":
        # Ultra simple case
        if "дерева" in words:  # 'из древесины миндального дерева'
            cut_index = words.index("дерева") + 1
        elif "пекан" in words:  # 'из древесины ореха пекан'
            cut_index = words.index("пекан") + 1
        elif any_in_tag({"NOUN", "gent"}, custom_parse(words[2])):  # 'из древесины яблони'
            cut_index = 3
        else:
            cut_index = -1
        replacement_string = " ".join(words[cut_index:] + words[:cut_index])
    elif all(any_in_tag({"ADJF", "gent"}, custom_parse(adj)) for adj in words[1:-1]) and any_in_tag(
        {"NOUN", "gent"}, custom_parse(words[-1])
    ):
        # All words after 'из' except the last word are adjectives in genitive
        # The last is a noun in genitive
        material = words[-1]
        gender = get_gender(material, known_tags={"gent"})
        parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(material)))
        material = parse[0].normal_form
        adjs = words[1:-1]
        adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs]

Ejemplo n.º 4

0

Mostrar archivo

def corr_craft_general(text, search_result):
    """
    >>> corr_craft_general("Изготовить камень дверь")
    'Изготовить каменную дверь'
    >>> corr_craft_general("Делать деревянный ловушка для животных")
    'Делать деревянную ловушку для животных'
    >>> corr_craft_general("Украшать кость")
    'Украшать кость'
    >>> corr_craft_general("Делать деревянный изделия")
    'Делать деревянные изделия'
    """
    verb = search_result.group(1)
    words = search_result.group(2).split()
    product = None
    if len(words) > 1:
        for i, word in enumerate(words[1:], 1):
            if any_in_tag({"NOUN", "nomn"}, custom_parse(word)) and word not in make_adjective:
                product = " ".join(words[i:])
                words = words[:i]
                break
    else:
        product = words[0]
        words = []

    product_gender = get_main_word_gender(product)

    if " " not in product:
        orig_form = {"plur" if product_gender == "plur" else "sing", "inan"}
        product = inflect_noun(product, "accs", orig_form=orig_form)
        assert product is not None
    else:
        product = inflect_collocation(product, {"accs"})

    if words:
        if len(words) == 1 and words[0] not in make_adjective and not is_adjective(words[0]):
            material = inflect_noun(words[0], "gent", orig_form={"nomn", "inan"})  # рог -> (из) рога
            assert material is not None
            result = "{} {} из {}".format(verb, product, material)
        else:
            adjectives = [
                make_adjective[word] if word in make_adjective else word if is_adjective(word) else None
                for word in words
            ]
            assert all(adj is not None for adj in adjectives)
            adjectives = [inflect_adjective(adj, product_gender, "accs", animated=False) for adj in adjectives]
            result = "{} {} {}".format(verb, " ".join(adjectives), product)
    else:
        result = "{} {}".format(verb, product)

    return text.replace(search_result.group(0), result).capitalize()

Ejemplo n.º 5

0

Mostrar archivo

Archivo: preliminary_changes.py Proyecto: dfint/changetextpy_script

def corr_ending_s(text):
    """
    >>> corr_ending_s("трупs [2]")
    'трупы [2]'
    """
    search_result = re_ending_s.search(text)
    number = search_result.group(1)
    group2 = search_result.group(2)
    if number and " " not in group2:
        number = int(number)
        parse = [
            x for x in custom_parse(group2)
            if {"NOUN", "nomn", "sing"} in x.tag
        ]
        assert len(parse) == 1
        replacement_string = "{:d} {}".format(
            number, parse[0].make_agree_with_number(number).word)
    elif group2 in dict_ending_s:
        replacement_string = dict_ending_s[group2]
    elif " " not in group2:
        new_form = corr_ending_s_internal(group2)
        if new_form:
            replacement_string = new_form
        else:
            return None
    else:
        words = group2.split()
        if words[-1] in dict_ending_s:
            words[-1] = dict_ending_s[words[-1]]
            replacement_string = " ".join(words)
        else:
            new_form = corr_ending_s_internal(words[-1])
            if new_form:
                words[-1] = new_form
                replacement_string = " ".join(words)
            else:
                return None

    return text.replace(search_result.group(0), replacement_string)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: preliminary_changes.py Proyecto: dfint/changetextpy_script

def corr_ending_s_internal(text):
    parse = [
        x for x in custom_parse(text)
        if {"NOUN", "nomn", "sing"} in x.tag or {"VERB", "2per"} in x.tag
    ]

    if not parse:
        # Cannot determine part of speech
        return None

    new_forms = set()

    if parse[0].tag.POS == "NOUN":
        new_forms.add(parse[0].inflect({"plur"}).word)
    else:  # parse[0].tag.POS == 'VERB':
        new_forms.add(parse[0].inflect({"3per", "sing"}).word)

    if len(new_forms) > 1:
        # Cannot determine part of speech because of ambiguity
        return None

    return new_forms.pop()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: preliminary_changes.py Proyecto: dfint/changetextpy_script

def corr_has_verb(text, search_result):
    """
    >>> corr_has_verb(" имеет создал ")
    ' создал '
    >>> corr_has_verb(" был создал ")
    ' создал '
    >>> corr_has_verb(" был создать ")
    ' создал '
    >>> corr_has_verb(" имеет пришёл ")
    ' пришёл '
    >>> corr_has_verb(" имеет упал ")
    ' упал '
    >>> corr_has_verb(" имеет стрямкал ")
    ' стрямкал '
    """
    if search_result:
        word = search_result.group(2)
        parse = [
            p for p in custom_parse(word) if p.tag.POS in ("VERB", "INFN")
        ]
        if parse:
            if not any({"past"} in p.tag for p in parse):
                word = parse[0].inflect({"VERB", "past", "sing"}).word
            return text.replace(search_result.group(0), word)

Ejemplo n.º 8

0

Mostrar archivo

def corr_item_body_parts(text, search_result):
    """
    >>> corr_item_body_parts("{крыса останки}")
    '{Останки крысы}'
    >>> corr_item_body_parts("мотылёк останки")
    'Останки мотылька'
    >>> corr_item_body_parts("кеа труп")
    'Труп кеа'
    >>> corr_item_body_parts("{сипуха голень}")
    '{Голень сипухи}'
    """
    initial_string = search_result.group(1)
    words = search_result.group(2).split()
    if words[-1] in {"частичный", "искалеченный"}:
        replacement_string = "{} {} {}".format(
            words[-1],
            search_result.group(3),
            " ".join(to_genitive_case_list(words[:-1])),
        )
    else:
        if any("GRND" in custom_parse(word)[0].tag for word in words):  # Ignore participles
            return None
        replacement_string = search_result.group(3) + " " + " ".join(to_genitive_case_list(words))
    return text.replace(initial_string, replacement_string.capitalize())

Ejemplo n.º 9

0

Mostrar archivo

Archivo: tag_correction.py Proyecto: dfint/changetextpy_script

def corr_tags(text, state=None):
    state = state or get_state()
    text_parts = []
    get_index = None
    set_indices = set()
    capitalize_indices = set()
    inflect_next = set()
    for i, item in enumerate(parse_tags(text)):
        if not item.strip():
            pass
        elif item[0] == "<":
            item = item.strip("<>")
            if not item:
                return None
            tags, _, item = item.partition(":")
            tags = set(tags.split(","))

            if "capitalize" in tags:
                tags.remove("capitalize")
                capitalize_indices.add(len(text_parts))

            if item:
                # Inflect the word inside the tag after the colon
                word = item.strip()

                if "get-form" in tags:
                    if get_index is not None:
                        raise ValueError(
                            "Duplicate <get-form> tag in {!r}".format(text))
                    get_index = len(text_parts)
                    tags.remove("get-form")
                elif "set-form" in tags:
                    set_indices.add(len(text_parts))
                    tags.remove("set-form")

                if tags:
                    item = inflect_text(word, tags)
                else:
                    item = word
            else:
                # Inflect a part of text after the tag till the ending point of the sentence.
                inflect_next = tags
                continue
        elif inflect_next:
            item, tail = split_sentence(item)
            item = item.lstrip(" ")
            if not any_cyr(item.split(" ")[0]):
                if item.strip()[0].isdigit():
                    if "loct" in inflect_next:
                        inflect_next.remove("loct")
                        inflect_next.add(
                            "loc2")  # inflect into 'году' instead of 'годе'
                    item, tail1 = cut_number(item)
                    item += " " + custom_parse("год")[0].inflect(
                        inflect_next).word + tail1.lstrip(",")
                elif (not text_parts
                      or not any_cyr(text_parts[-1].rstrip().split(" ")[-1])
                      ) and inflect_next == {"gent"}:
                    text_parts.append("of ")
                pass
            else:
                tags = inflect_next - {"masc", "femn", "neut", "plur"}
                if "," in item:
                    item = inflect_enumeration(item, tags)
                elif " " in item:
                    item = inflect_collocation(item, tags)
                else:
                    p = custom_parse(item)[0]
                    item = p.inflect(inflect_next).word
            item += tail
            inflect_next = set()
        else:
            pass
        text_parts.append(item)

    delayed = ""
    if inflect_next:
        delayed += "<{}>".format(",".join(inflect_next))

    if get_index is not None:
        form = get_form(text_parts[get_index])
        form -= {
            "anim", "inan"
        }  # discard these two because they don't matter for the nominal case

        for i in set_indices:
            word = text_parts[i]
            text_parts[i] = inflect_text(word, form)

    if capitalize_indices:
        for i in capitalize_indices:
            if i >= len(text_parts):
                delayed += "<capitalize>"
            else:
                for part in text_parts[i].split():
                    if part:
                        text_parts[i] = text_parts[i].replace(
                            part, part.capitalize(), 1)
                        break

    if delayed:
        state.prev_tail += delayed

    return smart_join(text_parts)

Ejemplo n.º 10

0

Mostrar archivo

def corr_forge(_, search_result):
    """
    >>> corr_forge("Ковать из меди болты")
    'Ковать медные болты'
    >>> corr_forge("Кузница из железа Наконечники стрел баллисты")
    'Ковать железные наконечники стрел баллисты'
    >>> corr_forge("Делать из адамантина Колчан")
    'Делать адамантиновый колчан'
    """
    verb = search_result.group(1)
    words = search_result.group(2).split()
    assert len(words) >= 3

    assert words[0] == "из"
    # Second word ia adjective in gent case
    second_is_adjf_in_gent = any_in_tag({"ADJF", "gent"}, custom_parse(words[1]))
    # Third word is noun in gent case
    third_is_noun_in_gent = any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))
    if second_is_adjf_in_gent and third_is_noun_in_gent:
        of_material = words[:3]
        obj = words[3:]
    else:
        # Second word is noun in gent case
        assert any_in_tag({"NOUN", "gent"}, custom_parse(words[1]))
        of_material = words[:2]
        obj = words[2:]

    of_material = " ".join(of_material)
    noun_index = None
    parse = None
    gender = None

    if len(obj) == 1:
        noun_index = 0
        parse = custom_parse(obj[noun_index])
        noun = filter_noun(parse)
        gender = get_gender(obj[noun_index], known_tags={"nomn"})
        if not any_in_tag({"accs"}, noun):
            obj[0] = noun[0].inflect({"accs"}).word
    else:
        for i, word in enumerate(obj):
            parse = custom_parse(word)
            noun = filter_noun(parse)
            if noun:
                noun_index = i
                gender = get_gender(obj[noun_index])
                obj[i] = noun[0].inflect({"accs"}).word
                break  # Words after the 'item' must be left in genitive case
            elif not any_in_tag("accs", parse):
                obj[i] = parse[0].inflect({"accs"}).word

    assert parse is not None
    if not any_in_tag("accs", parse):
        obj[noun_index] = parse[0].inflect({"accs"}).word

    if verb == "Кузница":
        verb = "Ковать"

    if of_material in make_adjective:
        assert gender is not None
        material = inflect_adjective(make_adjective[of_material], gender, "accs", animated=False)
        text = verb + " " + material + " " + " ".join(obj)
    else:
        text = verb + " " + " ".join(obj) + " " + of_material

    return text.capitalize()

Ejemplo n.º 11

0

Mostrar archivo

 elif all(any_in_tag({"ADJF", "gent"}, custom_parse(adj)) for adj in words[1:-1]) and any_in_tag(
     {"NOUN", "gent"}, custom_parse(words[-1])
 ):
     # All words after 'из' except the last word are adjectives in genitive
     # The last is a noun in genitive
     material = words[-1]
     gender = get_gender(material, known_tags={"gent"})
     parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(material)))
     material = parse[0].normal_form
     adjs = words[1:-1]
     adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs]
     replacement_string = " ".join(adjs) + " " + material
 # elif (words[2] not in corr_item_general_except and len(words) > 3 and
 elif (
     len(words) > 3
     and any_in_tag({"gent"}, custom_parse(words[1]))
     and any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))  # The second word is in genitive
 ):  # The third word is a noun in genitive
     # Complex case, eg. "из висмутовой бронзы"
     of_material = " ".join(words[:3])
     words = words[3:]
     if len(words) == 1:
         first_part = words[0]
     else:
         obj = words[-1]
         gender = get_gender(obj, "NOUN")
         adjs = (inflect_adjective(adj, gender) or adj for adj in words[:-1])
         first_part = "{} {}".format(" ".join(adjs), obj)
     replacement_string = first_part + " " + of_material
 elif any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) and words[1] != "древесины":
     # Simple case, eg. "из бронзы"