Example #1
0
def transform_rows(schema_in, transforms_in, rows_in, rows_out, id_field=None):
    transforms = pickle_load(transforms_in)
    if not transforms:
        cp_ns(rows_in, rows_out)
    else:
        transform = TransformSequence(transforms)
        transformed_header = sorted(json_load(schema_in).iterkeys())
        if id_field is not None:
            assert id_field not in transformed_header
            transformed_header = [id_field] + transformed_header
        tasks = []
        if os.path.isdir(rows_in):
            loom.util.mkdir_p(rows_out)
            for f in os.listdir(rows_in):
                tasks.append((
                    transform,
                    transformed_header,
                    os.path.join(rows_in, f),
                    os.path.join(rows_out, f),
                ))
        else:
            tasks.append((transform, transformed_header, rows_in, rows_out))
        parallel_map(_transform_rows, tasks)
Example #2
0
def load_transforms(filename):
    transforms = pickle_load(filename) if os.path.exists(filename) else []
    return TransformSequence(transforms)