def add_nominal_stems(ctx): """Add regular noun stems to the database.""" # Since there are so many nominal stems, the SQLAlchemy calls here # are a little more low-level. conn = ctx.engine.connect() ins = NominalStem.__table__.insert() gender_group = ENUM['gender_group'] buf = [] i = 0 tick = util.tick_every(5000) for row in util.read_csv(ctx.config['NOMINAL_STEMS']): genders_id = gender_group[row['stem_genders']] buf.append({ 'name': row['stem'], 'pos_id': Tag.NOMINAL, 'genders_id': genders_id, }) tick(row['stem']) i += 1 if i % 500 == 0: conn.execute(ins, buf) buf = [] # Add any remainder. if buf: conn.execute(ins, buf)
def add_indeclinables(ctx): """Add indeclinables to the database.""" session = ctx.session tick = util.tick_every(300) for row in util.read_csv(ctx.config['INDECLINABLES']): session.add(Indeclinable(name=row['name'])) tick(row['name']) session.commit() session.close()
def add_roots(ctx, prefix_map): """Populates :class:`Root` and its subclasses.""" # TODO: modified roots session = ctx.session e_vclass = ENUM['vclass'] e_voice = ENUM['voice'] root_map = {} # (name, hom) -> id # First pass: Root tick = util.tick_every(100) for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']): name, hom = row['root'], row['hom'] # A root can have multiple paradigms (= multiple appearances) if (name, hom) in root_map: continue root = Root(name=name) session.add(root) session.flush() root_map[(name, hom)] = root.id tick(name) # Second pass: Paradigm for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']): name, hom = row['root'], row['hom'] vclass, voice = row['class'], row['voice'] assert (name, hom) in root_map root_id = root_map[(name, hom)] paradigm = Paradigm(root_id=root_id, vclass_id=e_vclass[vclass], voice_id=e_voice[voice]) session.add(paradigm) session.commit() # Prefixed roots for i, row in enumerate(util.read_csv(ctx.config['PREFIXED_ROOTS'])): name = row['prefixed_root'] basis = row['unprefixed_root'] hom = row['hom'] prefixes = row['prefixes'].split('-') assert (basis, hom) in root_map basis_id = root_map[(basis, hom)] for prefix in prefixes: # TODO pass prefixed_root = PrefixedRoot(name=name, basis_id=basis_id) session.add(prefixed_root) session.flush() root_map[(name, hom)] = prefixed_root.id tick(name) session.commit() session.close() return root_map