Example #1
0
def reconciled_output(
        args, unreconciled, reconciled, explanations, column_types):
    """
    Output the reconciled based upon the given arguments.

    1) Split any mmr columns into individual columns for Mean, Mode, and Range.

    2) If the --explanations option is selected then add an
       explanations column for every output column just after the reconciled
       output.

    3) If the --transcribers option is selected then add two columns
       for every user. One for the user name and one for the value entered.
    """
    columns = util.sort_columns(args, reconciled.columns, column_types)
    del columns[0]
    del columns[0]
    del columns[0]
    reconciled = reconciled.reindex(columns, axis='columns').fillna('')

    plugins = util.get_plugins('column_types')
    for _, plugin in plugins.items():
        if hasattr(plugin, 'adjust_reconciled_columns'):
            reconciled = plugin.adjust_reconciled_columns(
                reconciled, column_types)

    if args.explanations:
        reconciled = add_explanations(reconciled, explanations, column_types)

    if args.transcribers:
        reconciled = add_transcribers(reconciled, unreconciled, column_types)

    reconciled.to_csv(args.reconciled)

    return reconciled
Example #2
0
def main():
    """Reconcile the data."""
    args = parse_command_line()

    formats = util.get_plugins('formats')
    unreconciled, column_types = formats[args.format].read(args)

    if unreconciled.shape[0] == 0:
        sys.exit('Workflow {} has no data.'.format(args.workflow_id))

    plugins = util.get_plugins('column_types')
    column_types = get_column_types(args, column_types)
    validate_columns(args, column_types, unreconciled, plugins=plugins)

    if args.unreconciled:
        unreconciled.to_csv(args.unreconciled, index=False)

    if args.reconciled or args.summary or args.merged:
        reconciled, explanations = reconciler.build(
            args, unreconciled, column_types, plugins=plugins)

        if args.reconciled:
            columns = util.sort_columns(args, reconciled.columns, column_types)
            del columns[0]
            del columns[0]
            del columns[0]
            reconciled = reconciled.reindex(columns, axis=1).fillna('')
            reconciled.to_csv(args.reconciled)

        if args.summary:
            summary.report(
                args, unreconciled, reconciled, explanations, column_types)

        if args.merged:
            smerged = merged.merge(
                args, unreconciled, reconciled, explanations, column_types)
            smerged.to_csv(args.merged, index=False)

    if args.zip:
        zip_files(args)
def validate_columns(args, column_types, unreconciled):
    """Validate that the columns are in the unreconciled data frame.

    Also verify that the column types are an existing plug-in.
    """
    plugins = util.get_plugins('column_types')
    plugin_types = list(plugins.keys())

    error = missing_headers(unreconciled, column_types, plugin_types)
    error |= missing_key_columns(args, unreconciled)

    if error:
        error_exit(unreconciled, plugin_types)
def main():
    """Reconcile the data."""
    args = parse_command_line()

    formats = util.get_plugins('formats')
    unreconciled, column_types = formats[args.format].read(args)

    if unreconciled.shape[0] == 0:
        sys.exit('Workflow {} has no data.'.format(args.workflow_id))

    column_types = get_column_types(args, column_types)
    validate_columns(args, column_types, unreconciled)

    if args.unreconciled:
        unreconciled.to_csv(args.unreconciled, index=False)

    if args.reconciled or args.summary or args.merged:
        reconcile_data(args, unreconciled, column_types)

    if args.zip:
        zip_files(args)
def build(args, unreconciled, column_types):
    """Build the reconciled and explanations data-frames."""
    plugins = util.get_plugins('column_types')
    reconcilers = {k: plugins[v['type']] for k, v in column_types.items()}

    # Get group and then reconcile the data
    aggregators = {r: partial(reconcilers[r].reconcile, args=args)
                   for r in reconcilers
                   if r in unreconciled.columns}

    # keep the userID associated with the data handed to the reconciler.
    reconciled = unreconciled.set_index(
        args.user_column, append=True).groupby(
            args.group_by).agg(aggregators, args)
    explanations = pd.DataFrame()
    for column in reconciled.columns:
        reconciler = reconcilers.get(column)
        if reconciler:
            if column_types[column]['type'] not in NO_EXPLANATIONS:
                explanations[column] = reconciled[column].apply(lambda x: x[0])
            reconciled[column] = reconciled[column].apply(lambda x: x[1])
    return reconciled, explanations