def reconciled_output( args, unreconciled, reconciled, explanations, column_types): """ Output the reconciled based upon the given arguments. 1) Split any mmr columns into individual columns for Mean, Mode, and Range. 2) If the --explanations option is selected then add an explanations column for every output column just after the reconciled output. 3) If the --transcribers option is selected then add two columns for every user. One for the user name and one for the value entered. """ columns = util.sort_columns(args, reconciled.columns, column_types) del columns[0] del columns[0] del columns[0] reconciled = reconciled.reindex(columns, axis='columns').fillna('') plugins = util.get_plugins('column_types') for _, plugin in plugins.items(): if hasattr(plugin, 'adjust_reconciled_columns'): reconciled = plugin.adjust_reconciled_columns( reconciled, column_types) if args.explanations: reconciled = add_explanations(reconciled, explanations, column_types) if args.transcribers: reconciled = add_transcribers(reconciled, unreconciled, column_types) reconciled.to_csv(args.reconciled) return reconciled
def main(): """Reconcile the data.""" args = parse_command_line() formats = util.get_plugins('formats') unreconciled, column_types = formats[args.format].read(args) if unreconciled.shape[0] == 0: sys.exit('Workflow {} has no data.'.format(args.workflow_id)) plugins = util.get_plugins('column_types') column_types = get_column_types(args, column_types) validate_columns(args, column_types, unreconciled, plugins=plugins) if args.unreconciled: unreconciled.to_csv(args.unreconciled, index=False) if args.reconciled or args.summary or args.merged: reconciled, explanations = reconciler.build( args, unreconciled, column_types, plugins=plugins) if args.reconciled: columns = util.sort_columns(args, reconciled.columns, column_types) del columns[0] del columns[0] del columns[0] reconciled = reconciled.reindex(columns, axis=1).fillna('') reconciled.to_csv(args.reconciled) if args.summary: summary.report( args, unreconciled, reconciled, explanations, column_types) if args.merged: smerged = merged.merge( args, unreconciled, reconciled, explanations, column_types) smerged.to_csv(args.merged, index=False) if args.zip: zip_files(args)
def validate_columns(args, column_types, unreconciled): """Validate that the columns are in the unreconciled data frame. Also verify that the column types are an existing plug-in. """ plugins = util.get_plugins('column_types') plugin_types = list(plugins.keys()) error = missing_headers(unreconciled, column_types, plugin_types) error |= missing_key_columns(args, unreconciled) if error: error_exit(unreconciled, plugin_types)
def main(): """Reconcile the data.""" args = parse_command_line() formats = util.get_plugins('formats') unreconciled, column_types = formats[args.format].read(args) if unreconciled.shape[0] == 0: sys.exit('Workflow {} has no data.'.format(args.workflow_id)) column_types = get_column_types(args, column_types) validate_columns(args, column_types, unreconciled) if args.unreconciled: unreconciled.to_csv(args.unreconciled, index=False) if args.reconciled or args.summary or args.merged: reconcile_data(args, unreconciled, column_types) if args.zip: zip_files(args)
def build(args, unreconciled, column_types): """Build the reconciled and explanations data-frames.""" plugins = util.get_plugins('column_types') reconcilers = {k: plugins[v['type']] for k, v in column_types.items()} # Get group and then reconcile the data aggregators = {r: partial(reconcilers[r].reconcile, args=args) for r in reconcilers if r in unreconciled.columns} # keep the userID associated with the data handed to the reconciler. reconciled = unreconciled.set_index( args.user_column, append=True).groupby( args.group_by).agg(aggregators, args) explanations = pd.DataFrame() for column in reconciled.columns: reconciler = reconcilers.get(column) if reconciler: if column_types[column]['type'] not in NO_EXPLANATIONS: explanations[column] = reconciled[column].apply(lambda x: x[0]) reconciled[column] = reconciled[column].apply(lambda x: x[1]) return reconciled, explanations