def write_mw_prefixed_roots(prefixed_roots, unprefixed_roots, prefix_groups, sandhi_rules, out_path): """Parse the prefixes in a prefix root and write the parsed roots.""" with util.read_csv(prefix_groups) as reader: prefix_groups = {x['group']: x['prefixes'] for x in reader} with util.read_csv(unprefixed_roots) as reader: root_set = {(x['root'], x['hom']) for x in reader} candidate_homs = [None] + [str(i) for i in range(1, 10)] sandhi = make_sandhi_object(sandhi_rules) rows = [] for row in util.read_csv_rows(prefixed_roots): for group in sandhi.split_off(row['prefixed_root'], row['unprefixed_root']): if group in prefix_groups: basis, hom = row['unprefixed_root'], row['hom'] if (basis, hom) not in root_set: for x in candidate_homs: if (basis, x) in root_set: hom = x break if (basis, hom) not in root_set: continue rows.append((row['prefixed_root'], prefix_groups[group], row['unprefixed_root'], hom)) break labels = ['prefixed_root', 'prefixes', 'unprefixed_root', 'hom'] with util.write_csv(out_path, labels) as write_row: for row in rows: write_row(dict(zip(labels, row)))
def get_patients(): rows = read_csv_rows('data.csv') fields = rows.pop(0) mrn = fields.index('MRN') patients = {} for row in rows: if row[mrn] not in patients: patients[row[mrn]] = Patient(fields, row) patients[row[mrn]].add_appt(fields, row) return patients
def write_prefixed_shs_verbal_indeclinables(final_path, sandhi_rules, prefixed_roots, root_converter, out_path): """Write prefixed SHS verbal indeclinables.""" sandhi = make_sandhi_object(sandhi_rules) root_to_prefixed = {} with util.read_csv(prefixed_roots) as reader: for row in reader: root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row) labels = None clean_rows = [] with util.read_csv(final_path) as reader: for row in reader: root_pair = root_converter.get(row['root']) if root_pair is None: continue root, hom = root_pair row['root'] = root for result in root_to_prefixed.get(root, []): new_row = row.copy() for field in ['form', 'stem']: if field in row: new_row[field] = sandhi.join( result['prefixes'].split('-') + [new_row[field]]) new_row['root'] = result['prefixed_root'] new_row['hom'] = result['hom'] clean_rows.append(new_row) labels = reader.fieldnames labels += ['hom'] old_rows = list(util.read_csv_rows(out_path)) clean_rows.sort(key=lambda x: util.key_fn(x['root'])) with util.write_csv(out_path, labels) as write_row: for row in old_rows: write_row(row) for row in clean_rows: write_row(row)
def write_prefixed_shs_verbal_data(data_path, prefixed_roots, root_converter, sandhi_rules, out_path): """Write Sanskrit Heritage Site data after converting its roots. :param data_path: path to the actual verb data :param out_path: """ sandhi = make_sandhi_object(sandhi_rules) root_to_prefixed = {} with util.read_csv(prefixed_roots) as reader: for row in reader: root_to_prefixed.setdefault(row['unprefixed_root'], []).append(row) labels = None clean_rows = [] with util.read_csv(data_path) as reader: for row in reader: root_pair = root_converter.get(row['root']) if root_pair is None: continue root, hom = root_pair for result in root_to_prefixed.get(root, []): new_row = row.copy() for field in ['form', 'stem']: if field in row: new_row[field] = sandhi.join( result['prefixes'].split('-') + [new_row[field]]) new_row['root'] = result['prefixed_root'] new_row['hom'] = hom clean_rows.append(new_row) labels = reader.fieldnames + ['hom'] old_rows = list(util.read_csv_rows(out_path)) clean_rows.sort(key=lambda x: util.key_fn(x['root'])) with util.write_csv(out_path, labels) as write_row: for row in old_rows: write_row(row) for row in clean_rows: write_row(row)