# ========================== # DEFINE METRIC AGGREGATORS # ========================== metric_header = RevertRate.header() field_prefixes = \ { 'reverted_': 1, 'reverts_': 2, 'revisions_': 3, } # Build "stats" aggregator op_list = [sum, mean] revert_stats_agg = build_numpy_op_agg(build_agg_meta(op_list, field_prefixes), metric_header, 'revert_stats_agg') agg_kwargs = getattr(revert_stats_agg, METRIC_AGG_METHOD_KWARGS) setattr(revert_stats_agg, METRIC_AGG_METHOD_KWARGS, agg_kwargs) # Build proportion aggregator revert_prop_agg = boolean_rate revert_prop_agg = decorator_builder(RevertRate.header())( revert_prop_agg) setattr(revert_prop_agg, METRIC_AGG_METHOD_FLAG, True) setattr(revert_prop_agg, METRIC_AGG_METHOD_NAME, 'revert_prop_agg') setattr(revert_prop_agg, METRIC_AGG_METHOD_HEAD, ['total_users', 'total_reverted',
# DEFINE METRIC AGGREGATORS # ========================== metric_header = BytesAdded.header() field_prefixes = { 'net_': 1, 'abs_': 2, 'pos_': 3, 'neg_': 4, 'count_': 5, } # Build "mean" decorator ba_mean_agg = build_numpy_op_agg(build_agg_meta([mean], field_prefixes), metric_header, 'ba_mean_agg') # Build "standard deviation" decorator ba_std_agg = build_numpy_op_agg(build_agg_meta([std], field_prefixes), metric_header, 'ba_std_agg') # Build "sum" decorator ba_sum_agg = build_numpy_op_agg(build_agg_meta([sum], field_prefixes), metric_header, 'ba_sum_agg') # Build "median" decorator ba_median_agg = build_numpy_op_agg(build_agg_meta([median], field_prefixes), metric_header, 'ba_median_agg') # Build "min" decorator ba_min_agg = build_numpy_op_agg(build_agg_meta([min], field_prefixes), metric_header, 'ba_min_agg') # Build "max" decorator ba_max_agg = build_numpy_op_agg(build_agg_meta([max], field_prefixes),
if metric_params.log_: logging.info(__name__ + '::Processed PID = %s. ' 'Dropped users = %s.' % ( os.getpid(), str(dropped_users))) return results # ========================== # DEFINE METRIC AGGREGATORS # ========================== from user_metrics.etl.aggregator import build_numpy_op_agg, build_agg_meta from user_metrics.metrics.user_metric import METRIC_AGG_METHOD_KWARGS metric_header = PagesCreated.header() field_prefixes =\ { 'count_': 1, } # Build "dist" decorator op_list = [sum, mean, std, median, min, max] pages_created_stats_agg = build_numpy_op_agg( build_agg_meta(op_list, field_prefixes), metric_header, 'pages_created_stats_agg') agg_kwargs = getattr(pages_created_stats_agg, METRIC_AGG_METHOD_KWARGS) setattr(pages_created_stats_agg, METRIC_AGG_METHOD_KWARGS, agg_kwargs)
# ========================== # DEFINE METRIC AGGREGATORS # ========================== # Build "average" aggregator ttt_avg_agg = weighted_rate ttt_avg_agg = decorator_builder(TimeToThreshold.header())(ttt_avg_agg) setattr(ttt_avg_agg, um.METRIC_AGG_METHOD_FLAG, True) setattr(ttt_avg_agg, um.METRIC_AGG_METHOD_NAME, "ttt_avg_agg") setattr(ttt_avg_agg, um.METRIC_AGG_METHOD_HEAD, ["total_users", "total_weight", "average"]) setattr(ttt_avg_agg, um.METRIC_AGG_METHOD_KWARGS, {"val_idx": 1}) metric_header = TimeToThreshold.header() field_prefixes = {"time_diff_": 1} # Build "dist" decorator op_list = [median, min, max] ttt_stats_agg = build_numpy_op_agg(build_agg_meta(op_list, field_prefixes), metric_header, "ttt_stats_agg") if __name__ == "__main__": for i in TimeToThreshold(threshold_type_class="edit_count_threshold", first_edit=0, threshold_edit=1).process( [13234584] ): print i