def nom_and_gen_xml(content0, content1, content2, content3, content4): result = [] tag_span = 0 sense_numbers_in_end = [] def remove_sense_numbers(text): sense_numbers_in_end = [] if text.strip().endswith('1.'): sense_numbers_in_end = create_nodes_for_sense_numbers('1.') text = text[:text.index('1.')] elif text.strip().endswith('1. a)'): sense_numbers_in_end = create_nodes_for_sense_numbers('1. a)') text = text[:text.index('1.')] return text, sense_numbers_in_end def create_nodes_for_sense_numbers(sense_number): res = [] if sense_number == '1.': res.append(nf.create_bold_hi_node('1.')) elif sense_number == '1. a)': res.append(nf.create_bold_hi_node('1.')) res.append(nf.create_bold_hi_node(' a)')) return res if content0.rstrip()[-1] == ',': form_lemma = nf.create_form_lemma_node(content0.rstrip()[:-1]) usg_gen = nf.create_usg_node('gen.') if content2.strip() == '-': content3, sense_numbers_in_end = remove_sense_numbers(content3) form_inflected = nf.create_form_inflected_node(' -' + content3) tag_span = 4 else: content2, sense_numbers_in_end = remove_sense_numbers(content2) form_inflected = nf.create_form_inflected_node(' ' + content2.lstrip()) tag_span = 3 else: form_lemma = nf.create_form_lemma_node(content0) usg_gen = nf.create_usg_node('gen.') if content3.strip() == '-': content4, sense_numbers_in_end = remove_sense_numbers(content4) form_inflected = nf.create_form_inflected_node(' -' + content4) tag_span = 5 else: content3, sense_numbers_in_end = remove_sense_numbers(content3) form_inflected = nf.create_form_inflected_node(' ' + content3.lstrip()) tag_span = 4 pc = nf.create_pc_node(', ') result.extend([form_lemma, pc, *usg_gen, form_inflected]) return result, tag_span, sense_numbers_in_end
def deal_with_completely_unknown_entry(entry): if len(entry.contents) and entry.contents[0].text: first_node = entry.contents[0] if len(re.split(', | ', first_node.text)) == 1: entry.encoded_parts['senses'].append(nf.create_form_lemma_node(first_node.text)) entry.contents.pop(0) [entry.encoded_parts['senses'].append(x) for x in entry.contents]
def adj_multiple_forms_xml(content0): content0_split = content0.split(', ') result = [] form_lemma = nf.create_form_lemma_node(content0_split[0]) result.append(form_lemma) for word in content0_split[1:]: form_inflected = nf.create_form_inflected_node(word) pc = nf.create_pc_node(', ') result.append(pc) result.append(form_inflected) return result
def noun_xml(content0, content1): result = [] content0_split = content0.rsplit(', ', 1) form_lemma = nf.create_form_lemma_node(content0_split[0]) result.append(form_lemma) if content0_split[1]: pc = nf.create_pc_node(', ') form_inflected = nf.create_form_inflected_node(content0_split[1]) result.append(pc) result.append(form_inflected) gram_grp = nf.create_gram_grp(content1) result.append(gram_grp) return result
def verb_xml(entry_type, content0, content1): result = [] content0_split = content0.split(', ') form_lemma = nf.create_form_lemma_node(content0_split[0]) result.append(form_lemma) if len(content0_split) > 1: for word in content0_split[1:]: pc = nf.create_pc_node(', ') form_inflected = nf.create_form_inflected_node(word) result.append(pc) result.append(form_inflected) if entry_type != 'special_verb': gram_grp = nf.create_gram_grp(content1, "iType") result.append(gram_grp) return result
def unknown_initial_xml(content0): res = [] content0_split = [ x for x in content0.split(', ') if not is_empty_string(x) ] for i in range(len(content0_split)): if i == 0: form_lemma = nf.create_form_lemma_node(content0_split[i]) res.append(form_lemma) if len(content0_split) > 1: pc = nf.create_pc_node(', ') res.append(pc) # TODO: Extra comma / missing comma elif i == len(content0_split) - 1: form_inflected = nf.create_form_inflected_node(content0_split[i]) res.append(form_inflected) else: form_inflected = nf.create_form_inflected_node(content0_split[i]) pc = nf.create_pc_node(', ') res.append(form_inflected) res.append(pc) return res
def adj_1_2_decl_xml(content0, content1): form_lemma = nf.create_form_lemma_node(content0) gram_grp = nf.create_gram_grp(content1, "????") return [form_lemma, gram_grp]
def adv_conjunct_xml(content0, content1): form_lemma = nf.create_form_lemma_node(content0) gram_grp = nf.create_gram_grp(content1, "????") return [form_lemma, gram_grp]