예제 #1
0
def nom_and_gen_xml(content0, content1, content2, content3, content4):
    result = []
    tag_span = 0
    sense_numbers_in_end = []

    def remove_sense_numbers(text):
        sense_numbers_in_end = []
        if text.strip().endswith('1.'):
            sense_numbers_in_end = create_nodes_for_sense_numbers('1.')
            text = text[:text.index('1.')]
        elif text.strip().endswith('1. a)'):
            sense_numbers_in_end = create_nodes_for_sense_numbers('1. a)')
            text = text[:text.index('1.')]

        return text, sense_numbers_in_end

    def create_nodes_for_sense_numbers(sense_number):
        res = []
        if sense_number == '1.':
            res.append(nf.create_bold_hi_node('1.'))
        elif sense_number == '1. a)':
            res.append(nf.create_bold_hi_node('1.'))
            res.append(nf.create_bold_hi_node(' a)'))

        return res



    if content0.rstrip()[-1] == ',':
        form_lemma = nf.create_form_lemma_node(content0.rstrip()[:-1])
        usg_gen = nf.create_usg_node('gen.')
        if content2.strip() == '-':
            content3, sense_numbers_in_end = remove_sense_numbers(content3)
            form_inflected = nf.create_form_inflected_node(' -' + content3)
            tag_span = 4
        else:
            content2, sense_numbers_in_end = remove_sense_numbers(content2)
            form_inflected = nf.create_form_inflected_node(' ' + content2.lstrip())
            tag_span = 3

    else:
        form_lemma = nf.create_form_lemma_node(content0)
        usg_gen = nf.create_usg_node('gen.')
        if content3.strip() == '-':
            content4, sense_numbers_in_end = remove_sense_numbers(content4)
            form_inflected = nf.create_form_inflected_node(' -' + content4)
            tag_span = 5
        else:
            content3, sense_numbers_in_end = remove_sense_numbers(content3)
            form_inflected = nf.create_form_inflected_node(' ' + content3.lstrip())
            tag_span = 4

    pc = nf.create_pc_node(', ')

    result.extend([form_lemma, pc, *usg_gen, form_inflected])

    return result, tag_span, sense_numbers_in_end
예제 #2
0
def deal_with_completely_unknown_entry(entry):
    if len(entry.contents) and entry.contents[0].text:
        first_node = entry.contents[0]

        if len(re.split(', | ', first_node.text)) == 1:
            entry.encoded_parts['senses'].append(nf.create_form_lemma_node(first_node.text))
            entry.contents.pop(0)

    [entry.encoded_parts['senses'].append(x) for x in entry.contents]
예제 #3
0
def adj_multiple_forms_xml(content0):
    content0_split = content0.split(', ')
    result = []

    form_lemma = nf.create_form_lemma_node(content0_split[0])
    result.append(form_lemma)

    for word in content0_split[1:]:
        form_inflected = nf.create_form_inflected_node(word)
        pc = nf.create_pc_node(', ')
        result.append(pc)
        result.append(form_inflected)

    return result
예제 #4
0
def noun_xml(content0, content1):
    result = []

    content0_split = content0.rsplit(', ', 1)
    form_lemma = nf.create_form_lemma_node(content0_split[0])
    result.append(form_lemma)

    if content0_split[1]:
        pc = nf.create_pc_node(', ')
        form_inflected = nf.create_form_inflected_node(content0_split[1])
        result.append(pc)
        result.append(form_inflected)

    gram_grp = nf.create_gram_grp(content1)
    result.append(gram_grp)

    return result
예제 #5
0
def verb_xml(entry_type, content0, content1):
    result = []
    content0_split = content0.split(', ')
    form_lemma = nf.create_form_lemma_node(content0_split[0])
    result.append(form_lemma)

    if len(content0_split) > 1:
        for word in content0_split[1:]:
            pc = nf.create_pc_node(', ')
            form_inflected = nf.create_form_inflected_node(word)
            result.append(pc)
            result.append(form_inflected)

    if entry_type != 'special_verb':
        gram_grp = nf.create_gram_grp(content1, "iType")
        result.append(gram_grp)

    return result
def unknown_initial_xml(content0):
    res = []
    content0_split = [
        x for x in content0.split(', ') if not is_empty_string(x)
    ]
    for i in range(len(content0_split)):
        if i == 0:
            form_lemma = nf.create_form_lemma_node(content0_split[i])
            res.append(form_lemma)

            if len(content0_split) > 1:
                pc = nf.create_pc_node(', ')
                res.append(pc)
            # TODO: Extra comma / missing comma
        elif i == len(content0_split) - 1:
            form_inflected = nf.create_form_inflected_node(content0_split[i])
            res.append(form_inflected)
        else:
            form_inflected = nf.create_form_inflected_node(content0_split[i])
            pc = nf.create_pc_node(', ')
            res.append(form_inflected)
            res.append(pc)
    return res
예제 #7
0
def adj_1_2_decl_xml(content0, content1):
    form_lemma = nf.create_form_lemma_node(content0)
    gram_grp = nf.create_gram_grp(content1, "????")

    return [form_lemma, gram_grp]
예제 #8
0
def adv_conjunct_xml(content0, content1):
    form_lemma = nf.create_form_lemma_node(content0)
    gram_grp = nf.create_gram_grp(content1, "????")
    return [form_lemma, gram_grp]