def transform_rows(schema_in, transforms_in, rows_in, rows_out, id_field=None): transforms = pickle_load(transforms_in) if not transforms: cp_ns(rows_in, rows_out) else: transform = TransformSequence(transforms) transformed_header = sorted(json_load(schema_in).iterkeys()) if id_field is not None: assert id_field not in transformed_header transformed_header = [id_field] + transformed_header tasks = [] if os.path.isdir(rows_in): loom.util.mkdir_p(rows_out) for f in os.listdir(rows_in): tasks.append(( transform, transformed_header, os.path.join(rows_in, f), os.path.join(rows_out, f), )) else: tasks.append((transform, transformed_header, rows_in, rows_out)) parallel_map(_transform_rows, tasks)
def load_transforms(filename): transforms = pickle_load(filename) if os.path.exists(filename) else [] return TransformSequence(transforms)