Ejemplo n.º 1
0
def add_nominal_stems(ctx):
    """Add regular noun stems to the database."""
    # Since there are so many nominal stems, the SQLAlchemy calls here
    # are a little more low-level.

    conn = ctx.engine.connect()
    ins = NominalStem.__table__.insert()
    gender_group = ENUM['gender_group']

    buf = []
    i = 0
    tick = util.tick_every(5000)
    for row in util.read_csv(ctx.config['NOMINAL_STEMS']):
        genders_id = gender_group[row['stem_genders']]

        buf.append({
            'name': row['stem'],
            'pos_id': Tag.NOMINAL,
            'genders_id': genders_id,
        })

        tick(row['stem'])
        i += 1
        if i % 500 == 0:
            conn.execute(ins, buf)
            buf = []

    # Add any remainder.
    if buf:
        conn.execute(ins, buf)
Ejemplo n.º 2
0
def add_nominal_stems(ctx):
    """Add regular noun stems to the database."""
    # Since there are so many nominal stems, the SQLAlchemy calls here
    # are a little more low-level.

    conn = ctx.engine.connect()
    ins = NominalStem.__table__.insert()
    gender_group = ENUM['gender_group']

    buf = []
    i = 0
    tick = util.tick_every(5000)
    for row in util.read_csv(ctx.config['NOMINAL_STEMS']):
        genders_id = gender_group[row['stem_genders']]

        buf.append({
            'name': row['stem'],
            'pos_id': Tag.NOMINAL,
            'genders_id': genders_id,
        })

        tick(row['stem'])
        i += 1
        if i % 500 == 0:
            conn.execute(ins, buf)
            buf = []

    # Add any remainder.
    if buf:
        conn.execute(ins, buf)
Ejemplo n.º 3
0
def add_indeclinables(ctx):
    """Add indeclinables to the database."""
    session = ctx.session
    tick = util.tick_every(300)

    for row in util.read_csv(ctx.config['INDECLINABLES']):
        session.add(Indeclinable(name=row['name']))
        tick(row['name'])

    session.commit()
    session.close()
Ejemplo n.º 4
0
def add_indeclinables(ctx):
    """Add indeclinables to the database."""
    session = ctx.session
    tick = util.tick_every(300)

    for row in util.read_csv(ctx.config['INDECLINABLES']):
        session.add(Indeclinable(name=row['name']))
        tick(row['name'])

    session.commit()
    session.close()
Ejemplo n.º 5
0
def add_roots(ctx, prefix_map):
    """Populates :class:`Root` and its subclasses."""

    # TODO: modified roots
    session = ctx.session
    e_vclass = ENUM['vclass']
    e_voice = ENUM['voice']

    root_map = {}  # (name, hom) -> id

    # First pass: Root
    tick = util.tick_every(100)
    for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']):
        name, hom = row['root'], row['hom']

        # A root can have multiple paradigms (= multiple appearances)
        if (name, hom) in root_map:
            continue

        root = Root(name=name)
        session.add(root)
        session.flush()
        root_map[(name, hom)] = root.id

        tick(name)

    # Second pass: Paradigm
    for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']):
        name, hom = row['root'], row['hom']
        vclass, voice = row['class'], row['voice']

        assert (name, hom) in root_map
        root_id = root_map[(name, hom)]
        paradigm = Paradigm(root_id=root_id, vclass_id=e_vclass[vclass],
                            voice_id=e_voice[voice])
        session.add(paradigm)

    session.commit()

    # Prefixed roots
    for i, row in enumerate(util.read_csv(ctx.config['PREFIXED_ROOTS'])):
        name = row['prefixed_root']
        basis = row['unprefixed_root']
        hom = row['hom']
        prefixes = row['prefixes'].split('-')

        assert (basis, hom) in root_map
        basis_id = root_map[(basis, hom)]
        for prefix in prefixes:
            # TODO
            pass

        prefixed_root = PrefixedRoot(name=name, basis_id=basis_id)
        session.add(prefixed_root)
        session.flush()
        root_map[(name, hom)] = prefixed_root.id

        tick(name)

    session.commit()
    session.close()

    return root_map
Ejemplo n.º 6
0
def add_roots(ctx, prefix_map):
    """Populates :class:`Root` and its subclasses."""

    # TODO: modified roots
    session = ctx.session
    e_vclass = ENUM['vclass']
    e_voice = ENUM['voice']

    root_map = {}  # (name, hom) -> id

    # First pass: Root
    tick = util.tick_every(100)
    for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']):
        name, hom = row['root'], row['hom']

        # A root can have multiple paradigms (= multiple appearances)
        if (name, hom) in root_map:
            continue

        root = Root(name=name)
        session.add(root)
        session.flush()
        root_map[(name, hom)] = root.id

        tick(name)

    # Second pass: Paradigm
    for row in util.read_csv(ctx.config['UNPREFIXED_ROOTS']):
        name, hom = row['root'], row['hom']
        vclass, voice = row['class'], row['voice']

        assert (name, hom) in root_map
        root_id = root_map[(name, hom)]
        paradigm = Paradigm(root_id=root_id,
                            vclass_id=e_vclass[vclass],
                            voice_id=e_voice[voice])
        session.add(paradigm)

    session.commit()

    # Prefixed roots
    for i, row in enumerate(util.read_csv(ctx.config['PREFIXED_ROOTS'])):
        name = row['prefixed_root']
        basis = row['unprefixed_root']
        hom = row['hom']
        prefixes = row['prefixes'].split('-')

        assert (basis, hom) in root_map
        basis_id = root_map[(basis, hom)]
        for prefix in prefixes:
            # TODO
            pass

        prefixed_root = PrefixedRoot(name=name, basis_id=basis_id)
        session.add(prefixed_root)
        session.flush()
        root_map[(name, hom)] = prefixed_root.id

        tick(name)

    session.commit()
    session.close()

    return root_map