Esempio n. 1
0
    px_handle.close()
    msg("Read {} paragraph annotations".format(len(data)))
    if not_found:
        msg("Could not find {} label/line entries in index: {}".format(len(not_found), sorted({lab for lab in not_found})))
    else:
        msg("All label/line entries found in index")
    return data


# # Integrating the px data
# 
# 

# In[ ]:

px = ExtraData(API)
px.deliver_annots(
    'para', 
    {'title': 'Paragraph numbers', 'date': '2015'},
    [
        ('px/px_data.{}'.format(source+version), 'px', read_px, (
            ('etcbc4', 'px', 'instruction'),
            ('etcbc4', 'px', 'number_in_ch'),
            ('etcbc4', 'px', 'pargr'),
        )),
    ],
)


# ## Checking: loading the new features
Esempio n. 2
0
            vlab = F.label.v(vnode)
            for (windex, ketiv) in missed[vnode]:
                e += 1
                if e > error_limit:
                    break
                print("NOT IN DATA: {:<10} {:<20} #{}".format(vlab, ketiv, windex))
    else:
        msg("All ketivs found in the data")
    return [(x[0], x[2], x[3]) for x in data]


# # Compose the annotation package

# In[21]:

lex = ExtraData(API)

ph_base = "{}/{}.{}{}".format("ph", "phono", source, version)
kq_base = "{}/{}.{}{}".format("kq", "kq", source, version)

msg("Writing annotation package ...")
lex.deliver_annots(
    "lexicon",
    {"title": "Lexicon lookups, phonetic transcription, ketiv-qere, statistics", "date": "2015"},
    [
        (ph_base, "ph", get_phono, (("etcbc4", "ph", "phono"), ("etcbc4", "ph", "phono_sep"))),
        (kq_base, "kq", get_kq, (("etcbc4", "kq", "g_qere_utf8"), ("etcbc4", "kq", "qtrailer_utf8"))),
        (
            "lexicon/lex_data",
            "lex",
            get_lex,