px_handle.close() msg("Read {} paragraph annotations".format(len(data))) if not_found: msg("Could not find {} label/line entries in index: {}".format(len(not_found), sorted({lab for lab in not_found}))) else: msg("All label/line entries found in index") return data # # Integrating the px data # # # In[ ]: px = ExtraData(API) px.deliver_annots( 'para', {'title': 'Paragraph numbers', 'date': '2015'}, [ ('px/px_data.{}'.format(source+version), 'px', read_px, ( ('etcbc4', 'px', 'instruction'), ('etcbc4', 'px', 'number_in_ch'), ('etcbc4', 'px', 'pargr'), )), ], ) # ## Checking: loading the new features
vlab = F.label.v(vnode) for (windex, ketiv) in missed[vnode]: e += 1 if e > error_limit: break print("NOT IN DATA: {:<10} {:<20} #{}".format(vlab, ketiv, windex)) else: msg("All ketivs found in the data") return [(x[0], x[2], x[3]) for x in data] # # Compose the annotation package # In[21]: lex = ExtraData(API) ph_base = "{}/{}.{}{}".format("ph", "phono", source, version) kq_base = "{}/{}.{}{}".format("kq", "kq", source, version) msg("Writing annotation package ...") lex.deliver_annots( "lexicon", {"title": "Lexicon lookups, phonetic transcription, ketiv-qere, statistics", "date": "2015"}, [ (ph_base, "ph", get_phono, (("etcbc4", "ph", "phono"), ("etcbc4", "ph", "phono_sep"))), (kq_base, "kq", get_kq, (("etcbc4", "kq", "g_qere_utf8"), ("etcbc4", "kq", "qtrailer_utf8"))), ( "lexicon/lex_data", "lex", get_lex,