def corr_craft_glass(text, search_result): # TODO: Combine into single crafting-related function """ >>> corr_craft_glass("Делать грубый зелёное стекло") 'Варить грубое зелёное стекло' >>> corr_craft_glass("Делать гигантский хрусталь лезвие топора") 'Делать гигантское лезвие топора из хрусталя' """ material = search_result.group(3) material_gender = get_gender(material) words = search_result.group(2).split() product = search_result.group(4).split() verb = search_result.group(1) if not product: verb = "Варить" adjectives = (inflect_adjective(adj, material_gender, "accs", animated=False) for adj in words) result = "{} {} {}".format(verb, " ".join(adjectives), material) else: index = next( (i for i, item in enumerate(words) if item in {"грубое", "зелёное", "прозрачное", "грубый"}), len(words) ) product_adjectives = words[:index] if any_in_tag({"NOUN", "nomn"}, custom_parse(product[0])): product_gender = get_gender(product[0]) product[0] = inflect_noun(product[0], case="accs") else: product_gender = get_gender(product[-1]) product_adjectives += product[:-1] product = [inflect_noun(product[-1], case="accs")] product_adjectives = [ inflect_adjective(adj, product_gender, case="accs", animated=False) for adj in product_adjectives ]
def corr_adjective_relief(text, search_result): """ >>> corr_adjective_relief("Заснеженный Густой овсяница") 'Заснеженная густая овсяница' >>> corr_adjective_relief("Густой мюленбергия") 'Густая мюленбергия' >>> corr_adjective_relief("Густой морошка") 'Густая морошка' >>> corr_adjective_relief("Заснеженный Густой куропаточья трава") 'Заснеженная густая куропаточья трава' """ adjective = search_result.group(1) obj = search_result.group(2) if " " in obj: words = obj.split(" ") if is_adjective(words[0]): gender = get_gender(words[-1]) new_word = inflect_adjective(words[0], gender, "nomn") text = text.replace(words[0], new_word) new_word = inflect_adjective(adjective, gender, "nomn") text = text.replace(adjective, new_word) else: gender = get_gender(obj) new_word = inflect_adjective(adjective, gender, "nomn") if new_word: text = "{} {}".format(new_word, obj) return text.capitalize()
def corr_jewelers_shop(_, search_result): """ >>> corr_jewelers_shop("Огранить из необработанного адамантина") 'Огранить необработанный адамантин' >>> corr_jewelers_shop("Инкрустировать Предметы обстановки с из необработанного адамантина") 'Инкрустировать предметы обстановки необработанным адамантином' >>> corr_jewelers_shop("Огранить из фарфора") 'Огранить фарфор' """ first_part = search_result.group(1) words = search_result.group(2).split() if first_part == "Огранить": # accusative case tags = None if words[0] == "из": words = words[1:] tags = {"gent"} item = words[-1] gender = get_gender(item, known_tags=tags) words = [inflect_adjective(word, gender, "accs", animated=False) for word in words[:-1]] parse = list(filter(lambda x: {gender, "inan"} in x.tag, custom_parse(item))) if item == "адамантина": item = "адамантин" else: item = parse[0].inflect({"accs"}).word words.append(item) else: # instrumental/ablative case ('incrust with smth') words = [custom_parse(word)[0].inflect({"ablt"}).word for word in words if word != "из"] if first_part.endswith(" с"): first_part = first_part[:-2] text = first_part + " " + " ".join(words) return text.capitalize()
def corr_gem_cutting(text, search_result): """ >>> corr_gem_cutting("(бриолетовый восковые опалы)") '(бриолетовые восковые опалы)' >>> corr_gem_cutting("большой шерлы") 'большие шерлы' """ words = search_result.group(1).split() if words[-1] in body_parts: return corr_item_body_parts(text) gender = get_gender(words[-1], {"NOUN", "nomn"}) new_list = [] for word in words[:-1]: if word in make_adjective: adj = make_adjective[word] word = inflect_adjective(adj, gender) elif is_adjective(word): word = inflect_adjective(word, gender) new_list.append(word) new_list.append(words[-1]) return text.replace(search_result.group(0), " ".join(new_list))
def corr_clothiers_shop(_, search_result): """ >>> corr_clothiers_shop("Делать ткань роба") 'Шить робу из ткани' >>> corr_clothiers_shop("Делать шёлк роба") 'Шить шёлковую робу' >>> corr_clothiers_shop("Изготовить ткань мешок") 'Шить мешок из ткани' >>> corr_clothiers_shop("Вышивать кожа изображение") 'Вышивать изображение на коже' >>> corr_clothiers_shop("Делать пряжа рубаха") 'Вязать рубаху из пряжи' >>> corr_clothiers_shop("Делать ткань верёвка") 'Вить верёвку из ткани' """ verb = search_result.group(1) material = search_result.group(2) product = search_result.group(3).strip() if not product: return None # Leave as is eg. 'Ткать шёлк' elif verb == "Вышивать": # Sew if material == "пряжа": # вязать <product> из пряжи verb, preposition, material = "Вязать", "из", "пряжи" else: # вышивать <product> на <material> preposition = "на" material = inflect_noun(material, case="loct", orig_form={"nomn"}) return "{} {} {} {}".format(verb, product, preposition, material) else: if product in {"щит", "баклер"}: _, of_material = cloth_subst[material] # Don't change the verb, leave 'Делать'/'Изготовить' else: verb, of_material = cloth_subst[material] if product == "верёвка": verb = "Вить" product_accus = inflect_noun(product, case="accs", orig_form={"nomn"}) if material in make_adjective: # "шёлк" -> "шёлковый" gender = get_gender(product, {"nomn"}) material_adj = inflect_adjective(make_adjective[material], gender, "accs", animated=False) return "{} {} {}".format(verb, material_adj, product_accus) # {Шить} {шёлковую} {робу} else: return "{} {} {}".format(verb, product_accus, of_material) # {Шить} {робу} {из ткани}
def corr_of_material_item(text, _): """ >>> corr_of_material_item("риз алевролита мемориал") '≡алевролитовый мемориал' >>> corr_of_material_item("из алевролита доспешная стойка") 'алевролитовая доспешная стойка' >>> corr_of_material_item("(из висмутовой бронзы короткие мечи [3])") '(короткие мечи из висмутовой бронзы [3])' >>> corr_of_material_item("риз берёзы гробр") '≡берёзовый гроб≡' """ search_result = re_of_material_item.search(text) initial_string = search_result.group(1) words = initial_string.split() if len(words) == 2: parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(words[1]))) assert len(parse) == 1 replacement_string = parse[0].normal_form elif words[1] == "древесины": # Ultra simple case if "дерева" in words: # 'из древесины миндального дерева' cut_index = words.index("дерева") + 1 elif "пекан" in words: # 'из древесины ореха пекан' cut_index = words.index("пекан") + 1 elif any_in_tag({"NOUN", "gent"}, custom_parse(words[2])): # 'из древесины яблони' cut_index = 3 else: cut_index = -1 replacement_string = " ".join(words[cut_index:] + words[:cut_index]) elif all(any_in_tag({"ADJF", "gent"}, custom_parse(adj)) for adj in words[1:-1]) and any_in_tag( {"NOUN", "gent"}, custom_parse(words[-1]) ): # All words after 'из' except the last word are adjectives in genitive # The last is a noun in genitive material = words[-1] gender = get_gender(material, known_tags={"gent"}) parse = list(filter(lambda x: {"NOUN", "gent"} in x.tag, custom_parse(material))) material = parse[0].normal_form adjs = words[1:-1] adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs]
def corr_container(text, _): """ >>> corr_container('(дварфийское пиво бочка (из ольхи))') '(Бочка дварфийского пива (ольховая))' >>> corr_container("(дварфийское вино бочка (из клёна) <#8>)") '(Бочка дварфийского вина (кленовая) <#8>)' >>> corr_container("(Семя бочка (из лумбанга) <#10>)") '(Бочка семян (лумбанговая) <#10>)' """ search_result = re_container.search(text) initial_string = search_result.group(0) containment = search_result.group(1) if containment in replace_containment: containment = replace_containment[containment] if containment.endswith("кровь"): words = containment.split() if words[0] in possessive_adjectives: words[0] = possessive_adjectives[words[0]] words = to_genitive_case_list(words) else: words = [to_genitive_case_single_noun(words[-1])] + list(to_genitive_case_list(words[:-1])) containment = " ".join(words) elif containment.startswith("из "): containment = containment[3:] # Words after 'из' are already in genitive case elif containment in {"слитков/блоков", "специй"}: pass # Already in genitive case elif containment.startswith("семена"): words = containment.split() words[0] = to_genitive_case(words[0]) containment = " ".join(words) else: containment = to_genitive_case(containment) container = search_result.group(2) of_material = search_result.group(3) if not of_material: replacement_string = container + " " + containment elif ( " " not in of_material and is_adjective(of_material) or of_material in make_adjective or of_material[3:] in make_adjective ): if " " not in of_material and is_adjective(of_material): adjective = of_material elif of_material in make_adjective: adjective = make_adjective[of_material] elif of_material[3:] in make_adjective: adjective = make_adjective[of_material[3:]] else: adjective = None gender = get_gender(container, {"nomn"}) adjective = inflect_adjective(adjective, gender) replacement_string = "{} {} ({})".format(container, containment, adjective) else: words = of_material.split() material = None if of_material.startswith("из ") or len(of_material) <= 2: material = of_material elif ( len(words) >= 2 and words[-2] == "из" and (words[-1] in materials or any(mat.startswith(words[-1]) for mat in materials)) ): # Try to fix truncated materail names, eg. '(ямный краситель мешок (гигантский пещерный паук из шёл' if words[-1] not in materials: # Fix partial material name eg. 'шерст', 'шёлк' candidates = [mat for mat in materials if mat.startswith(words[-1])] if len(candidates) == 1: words[-1] = candidates[0] else: material = of_material # Partial name is not recognized (too short) if not material: material = " ".join(words[-2:] + list(to_genitive_case_list(words[:-2]))) else: gen_case = list(to_genitive_case_list(of_material.split())) if None not in gen_case: material = "из " + " ".join(gen_case) else: material = of_material replacement_string = "{} {} ({}".format(container, containment, material) if initial_string[-1] == ")": replacement_string += ")" text = text.replace(initial_string, replacement_string.capitalize()) return text
def corr_forge(_, search_result): """ >>> corr_forge("Ковать из меди болты") 'Ковать медные болты' >>> corr_forge("Кузница из железа Наконечники стрел баллисты") 'Ковать железные наконечники стрел баллисты' >>> corr_forge("Делать из адамантина Колчан") 'Делать адамантиновый колчан' """ verb = search_result.group(1) words = search_result.group(2).split() assert len(words) >= 3 assert words[0] == "из" # Second word ia adjective in gent case second_is_adjf_in_gent = any_in_tag({"ADJF", "gent"}, custom_parse(words[1])) # Third word is noun in gent case third_is_noun_in_gent = any_in_tag({"NOUN", "gent"}, custom_parse(words[2])) if second_is_adjf_in_gent and third_is_noun_in_gent: of_material = words[:3] obj = words[3:] else: # Second word is noun in gent case assert any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) of_material = words[:2] obj = words[2:] of_material = " ".join(of_material) noun_index = None parse = None gender = None if len(obj) == 1: noun_index = 0 parse = custom_parse(obj[noun_index]) noun = filter_noun(parse) gender = get_gender(obj[noun_index], known_tags={"nomn"}) if not any_in_tag({"accs"}, noun): obj[0] = noun[0].inflect({"accs"}).word else: for i, word in enumerate(obj): parse = custom_parse(word) noun = filter_noun(parse) if noun: noun_index = i gender = get_gender(obj[noun_index]) obj[i] = noun[0].inflect({"accs"}).word break # Words after the 'item' must be left in genitive case elif not any_in_tag("accs", parse): obj[i] = parse[0].inflect({"accs"}).word assert parse is not None if not any_in_tag("accs", parse): obj[noun_index] = parse[0].inflect({"accs"}).word if verb == "Кузница": verb = "Ковать" if of_material in make_adjective: assert gender is not None material = inflect_adjective(make_adjective[of_material], gender, "accs", animated=False) text = verb + " " + material + " " + " ".join(obj) else: text = verb + " " + " ".join(obj) + " " + of_material return text.capitalize()
adjs = [inflect_adjective(adj, gender, case="nomn") for adj in adjs] replacement_string = " ".join(adjs) + " " + material # elif (words[2] not in corr_item_general_except and len(words) > 3 and elif ( len(words) > 3 and any_in_tag({"gent"}, custom_parse(words[1])) and any_in_tag({"NOUN", "gent"}, custom_parse(words[2])) # The second word is in genitive ): # The third word is a noun in genitive # Complex case, eg. "из висмутовой бронзы" of_material = " ".join(words[:3]) words = words[3:] if len(words) == 1: first_part = words[0] else: obj = words[-1] gender = get_gender(obj, "NOUN") adjs = (inflect_adjective(adj, gender) or adj for adj in words[:-1]) first_part = "{} {}".format(" ".join(adjs), obj) replacement_string = first_part + " " + of_material elif any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) and words[1] != "древесины": # Simple case, eg. "из бронзы" of_material = " ".join(words[:2]) words = words[2:] item = words[-1] for word in words: if any_in_tag({"NOUN", "nomn"}, custom_parse(word)): item = word break if of_material in make_adjective: