Beispiel #1
0
def write_mw_prefixed_roots(prefixed_roots, unprefixed_roots, prefix_groups,
                            sandhi_rules, out_path):
    """Parse the prefixes in a prefix root and write the parsed roots."""

    with util.read_csv(prefix_groups) as reader:
        prefix_groups = {x['group']: x['prefixes'] for x in reader}
    with util.read_csv(unprefixed_roots) as reader:
        root_set = {(x['root'], x['hom']) for x in reader}

    candidate_homs = [None] + [str(i) for i in range(1, 10)]
    sandhi = make_sandhi_object(sandhi_rules)

    rows = []
    for row in util.read_csv_rows(prefixed_roots):
        for group in sandhi.split_off(row['prefixed_root'],
                                      row['unprefixed_root']):
            if group in prefix_groups:
                basis, hom = row['unprefixed_root'], row['hom']
                if (basis, hom) not in root_set:
                    for x in candidate_homs:
                        if (basis, x) in root_set:
                            hom = x
                            break
                    if (basis, hom) not in root_set:
                        continue

                rows.append((row['prefixed_root'], prefix_groups[group],
                             row['unprefixed_root'], hom))
                break

    labels = ['prefixed_root', 'prefixes', 'unprefixed_root', 'hom']
    with util.write_csv(out_path, labels) as write_row:
        for row in rows:
            write_row(dict(zip(labels, row)))
Beispiel #2
0
def write_mw_prefixed_roots(prefixed_roots, unprefixed_roots, prefix_groups,
                            sandhi_rules, out_path):
    """Parse the prefixes in a prefix root and write the parsed roots."""

    with util.read_csv(prefix_groups) as reader:
        prefix_groups = {x['group']: x['prefixes'] for x in reader}
    with util.read_csv(unprefixed_roots) as reader:
        root_set = {(x['root'], x['hom']) for x in reader}

    candidate_homs = [None] + [str(i) for i in range(1, 10)]
    sandhi = make_sandhi_object(sandhi_rules)

    rows = []
    for row in util.read_csv_rows(prefixed_roots):
        for group in sandhi.split_off(row['prefixed_root'],
                                      row['unprefixed_root']):
            if group in prefix_groups:
                basis, hom = row['unprefixed_root'], row['hom']
                if (basis, hom) not in root_set:
                    for x in candidate_homs:
                        if (basis, x) in root_set:
                            hom = x
                            break
                    if (basis, hom) not in root_set:
                        continue

                rows.append((row['prefixed_root'], prefix_groups[group],
                             row['unprefixed_root'], hom))
                break

    labels = ['prefixed_root', 'prefixes', 'unprefixed_root', 'hom']
    with util.write_csv(out_path, labels) as write_row:
        for row in rows:
            write_row(dict(zip(labels, row)))
Beispiel #3
0
def get_patients():
    rows = read_csv_rows('data.csv')
    fields = rows.pop(0)
    mrn = fields.index('MRN')
    patients = {}
    for row in rows:
        if row[mrn] not in patients:
            patients[row[mrn]] = Patient(fields, row)
        patients[row[mrn]].add_appt(fields, row)
    return patients
Beispiel #4
0
def write_prefixed_shs_verbal_indeclinables(final_path, sandhi_rules,
                                            prefixed_roots, root_converter,
                                            out_path):
    """Write prefixed SHS verbal indeclinables."""
    sandhi = make_sandhi_object(sandhi_rules)

    root_to_prefixed = {}
    with util.read_csv(prefixed_roots) as reader:
        for row in reader:
            root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row)

    labels = None
    clean_rows = []
    with util.read_csv(final_path) as reader:
        for row in reader:
            root_pair = root_converter.get(row['root'])
            if root_pair is None:
                continue
            root, hom = root_pair

            row['root'] = root
            for result in root_to_prefixed.get(root, []):
                new_row = row.copy()
                for field in ['form', 'stem']:
                    if field in row:
                        new_row[field] = sandhi.join(
                            result['prefixes'].split('-') + [new_row[field]])
                new_row['root'] = result['prefixed_root']
                new_row['hom'] = result['hom']
                clean_rows.append(new_row)

        labels = reader.fieldnames

    labels += ['hom']
    old_rows = list(util.read_csv_rows(out_path))
    clean_rows.sort(key=lambda x: util.key_fn(x['root']))
    with util.write_csv(out_path, labels) as write_row:
        for row in old_rows:
            write_row(row)
        for row in clean_rows:
            write_row(row)
Beispiel #5
0
def write_prefixed_shs_verbal_data(data_path, prefixed_roots, root_converter,
                                   sandhi_rules, out_path):
    """Write Sanskrit Heritage Site data after converting its roots.

    :param data_path: path to the actual verb data
    :param out_path:
    """
    sandhi = make_sandhi_object(sandhi_rules)

    root_to_prefixed = {}
    with util.read_csv(prefixed_roots) as reader:
        for row in reader:
            root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row)

    labels = None
    clean_rows = []
    with util.read_csv(data_path) as reader:
        for row in reader:
            root_pair = root_converter.get(row['root'])
            if root_pair is None:
                continue
            root, hom = root_pair

            for result in root_to_prefixed.get(root, []):
                new_row = row.copy()
                for field in ['form', 'stem']:
                    if field in row:
                        new_row[field] = sandhi.join(
                            result['prefixes'].split('-') + [new_row[field]])
                new_row['root'] = result['prefixed_root']
                new_row['hom'] = hom
                clean_rows.append(new_row)
        labels = reader.fieldnames + ['hom']

    old_rows = list(util.read_csv_rows(out_path))
    clean_rows.sort(key=lambda x: util.key_fn(x['root']))
    with util.write_csv(out_path, labels) as write_row:
        for row in old_rows:
            write_row(row)
        for row in clean_rows:
            write_row(row)
Beispiel #6
0
def write_prefixed_shs_verbal_data(data_path, prefixed_roots, root_converter,
                                   sandhi_rules, out_path):
    """Write Sanskrit Heritage Site data after converting its roots.

    :param data_path: path to the actual verb data
    :param out_path:
    """
    sandhi = make_sandhi_object(sandhi_rules)

    root_to_prefixed = {}
    with util.read_csv(prefixed_roots) as reader:
        for row in reader:
            root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row)

    labels = None
    clean_rows = []
    with util.read_csv(data_path) as reader:
        for row in reader:
            root_pair = root_converter.get(row['root'])
            if root_pair is None:
                continue
            root, hom = root_pair

            for result in root_to_prefixed.get(root, []):
                new_row = row.copy()
                for field in ['form', 'stem']:
                    if field in row:
                        new_row[field] = sandhi.join(
                            result['prefixes'].split('-') + [new_row[field]])
                new_row['root'] = result['prefixed_root']
                new_row['hom'] = hom
                clean_rows.append(new_row)
        labels = reader.fieldnames + ['hom']

    old_rows = list(util.read_csv_rows(out_path))
    clean_rows.sort(key=lambda x: util.key_fn(x['root']))
    with util.write_csv(out_path, labels) as write_row:
        for row in old_rows:
            write_row(row)
        for row in clean_rows:
            write_row(row)
Beispiel #7
0
def write_prefixed_shs_verbal_indeclinables(final_path, sandhi_rules,
        prefixed_roots, root_converter, out_path):
    """Write prefixed SHS verbal indeclinables."""
    sandhi = make_sandhi_object(sandhi_rules)

    root_to_prefixed = {}
    with util.read_csv(prefixed_roots) as reader:
        for row in reader:
            root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row)

    labels = None
    clean_rows = []
    with util.read_csv(final_path) as reader:
        for row in reader:
            root_pair = root_converter.get(row['root'])
            if root_pair is None:
                continue
            root, hom = root_pair

            row['root'] = root
            for result in root_to_prefixed.get(root, []):
                new_row = row.copy()
                for field in ['form', 'stem']:
                    if field in row:
                        new_row[field] = sandhi.join(
                            result['prefixes'].split('-') + [new_row[field]])
                new_row['root'] = result['prefixed_root']
                new_row['hom'] = result['hom']
                clean_rows.append(new_row)

        labels = reader.fieldnames

    labels += ['hom']
    old_rows = list(util.read_csv_rows(out_path))
    clean_rows.sort(key=lambda x: util.key_fn(x['root']))
    with util.write_csv(out_path, labels) as write_row:
        for row in old_rows:
            write_row(row)
        for row in clean_rows:
            write_row(row)