def build_key_str(dimensions): key = [] for dimension in sorted(list(dimensions)): d_type = mapping[dimension]["type"] function = DIMENSION_PARSERS_MAP[d_type] field = mapping[dimension]["field"] key.append(dimension) key.append(function(transaction[field])) return construct_key(key)
def actual_worker(analytics_name, sub, app): log = get_worker_log(analytics_name) try: conf_db = get_conf_db(app, exclusive=True) defn = conf_db.get("Analytics:ByName:%s" % analytics_name) analytics = Analytics(defn) if analytics["data_db"]: data_db = get_data_db(analytics["data_db"], app=app) else: data_db = get_data_db(app=app) measures = set(analytics["measures"]) query_dimensions = set(analytics["query_dimensions"]) slice_dimensions = set(analytics["slice_dimensions"]) mapping = analytics["mapping"] for content in sub.listen(): if content["type"] == "message": try: data = json.loads(content["data"]) transaction = data["payload"] tr_type = data["tr_type"] snoq_dimensions = slice_dimensions - query_dimensions qnos_dimensions = query_dimensions - slice_dimensions def build_key_str(dimensions): key = [] for dimension in sorted(list(dimensions)): d_type = mapping[dimension]["type"] function = DIMENSION_PARSERS_MAP[d_type] field = mapping[dimension]["field"] key.append(dimension) key.append(function(transaction[field])) return construct_key(key) query_key_str = build_key_str(query_dimensions) slice_key_str = build_key_str(slice_dimensions) snoq_key_str = build_key_str(snoq_dimensions) # Updating Reference count for qnos dimensions for dimension in sorted(list(qnos_dimensions)): field = mapping[dimension]["field"] ref_count_key = construct_key('RefCount', slice_key_str, dimension) if tr_type == "insert": value = data_db.hincrby(ref_count_key, transaction[field], 1) elif tr_type == "delete": value = data_db.hincrby(ref_count_key, transaction[field], -1) if value == 0: data_db.hdel(ref_count_key, transaction[field]) # Each measure gets added one at a time for m in measures: if mapping[m]["resource"] != content["channel"]: continue key_str = construct_key(m, query_key_str, snoq_key_str) function = MEASURING_FUNCTIONS_MAP[mapping[m]["type"]] field = mapping[m].get("field", None) conditions = mapping[m].get("conditions", []) kwargs = { "key_str": key_str, } for condition in conditions: condition_field = condition["field"] equals = condition.get("equals", None) not_equals = condition.get("not_equals", None) if equals is not None: if transaction[condition_field] != equals: break # Failed equals condition elif not_equals is not None: if transaction[condition_field] == not_equals: break # Failed not equals condition else: # All conditions passed if field is not None: kwargs["field_val"] = transaction[field] function(data_db, tr_type, **kwargs) except Exception, e: log.error("Error while consuming transaction.\n%s" % traceback.format_exc()) log.debug("Resource was: %s" % content["channel"]) log.debug("Data was: %s" % json.dumps(data)) except Exception, e: log.critical("Worker crashed.\nError was: %s" % str(e)) log.debug("Traceback: %s" % traceback.format_exc()) signal.pause()
def browse_analytics(a_name, slice_args): conf_db = get_conf_db() if not conf_db.sismember("Analytics:Active", a_name): abort(404) analytics_definition = conf_db.get("Analytics:ByName:%s" % a_name) if analytics_definition is None: abort(404) try: analytics = Analytics(analytics_definition) except (ValueError, AssertionError) as e: raise ServiceUnavailable(e.args) data_db = get_data_db(analytics["data_db"]) mapping = analytics["mapping"] measures = analytics["measures"] query_dimensions = set(analytics["query_dimensions"]) slice_dimensions = set(analytics["slice_dimensions"]) d_range = [] for d in slice_dimensions: expand = DIMENSION_EXPANSION_MAP[mapping[d]["type"]] try: value_set = expand(slice_args[d]) d_range.append((d, value_set)) except ValueError as e: abort(400, e.args) except KeyError as e: abort(400, ("Missing slice parameter", str(e.args[0]))) d_range_dict = dict(d_range) def get_range(dimensions): d_range = map(lambda d: (d, sorted(list(d_range_dict[d]))), sorted(list(dimensions))) return d_range qnos_dimensions = query_dimensions - slice_dimensions snoq_dimensions = slice_dimensions - query_dimensions s_range = get_range(slice_dimensions) snoq_range = get_range(snoq_dimensions) for qnos in qnos_dimensions: d_range_dict[qnos] = set() for s_key in combinatorial_keys(s_range): refcount_key_str = construct_key('RefCount', s_key, qnos) d_range_dict[qnos] |= set(data_db.hkeys(refcount_key_str)) q_range = get_range(query_dimensions) output = [] for q_key in combinatorial_keys(q_range): row = {} key_is_set = False key = None for q in q_key: # q_key=(Date,20110808,Practice,1) if not key_is_set: key = q key_is_set = True else: row[key] = q key_is_set = False for measure in measures: if mapping[measure]["type"][-5:] == "float": is_float = True else: is_float = False row[measure] = 0 snoq_keys = list(combinatorial_keys(snoq_range)) if len(snoq_keys) < 2: if len(snoq_keys) == 1: snoq_key = snoq_keys[0] else: snoq_key = None val_key = construct_key(measure, q_key, snoq_key) if mapping[measure]["type"] == "unique": val = data_db.scard(val_key) else: val = data_db.get(val_key) if val: if is_float: row[measure] = float(val) else: row[measure] = int(val) else: for snoq_key in snoq_keys: val_key = construct_key(measure, q_key, snoq_key) if mapping[measure]["type"] == "unique": abort(400, ( "Measure type 'unique' cannot be aggregated")) else: val = data_db.get(val_key) if val: if is_float: row[measure] += float(val) else: row[measure] += int(val) output.append(row) output_response = { "status": "OK", "data": output } return output_response
def browse_analytics(a_name, slice_args): conf_db = get_conf_db() if not conf_db.sismember("Analytics:Active", a_name): abort(404) analytics_definition = conf_db.get("Analytics:ByName:%s" % a_name) if analytics_definition is None: abort(404) try: analytics = Analytics(analytics_definition) except (ValueError, AssertionError) as e: raise ServiceUnavailable(e.args) data_db = get_data_db(analytics["data_db"]) mapping = analytics["mapping"] measures = analytics["measures"] query_dimensions = set(analytics["query_dimensions"]) slice_dimensions = set(analytics["slice_dimensions"]) d_range = [] for d in slice_dimensions: expand = DIMENSION_EXPANSION_MAP[mapping[d]["type"]] try: value_set = expand(slice_args[d]) d_range.append((d, value_set)) except ValueError as e: abort(400, e.args) except KeyError as e: abort(400, ("Missing slice parameter", str(e.args[0]))) d_range_dict = dict(d_range) def get_range(dimensions): d_range = map(lambda d: (d, sorted(list(d_range_dict[d]))), sorted(list(dimensions))) return d_range qnos_dimensions = query_dimensions - slice_dimensions snoq_dimensions = slice_dimensions - query_dimensions s_range = get_range(slice_dimensions) snoq_range = get_range(snoq_dimensions) for qnos in qnos_dimensions: d_range_dict[qnos] = set() for s_key in combinatorial_keys(s_range): refcount_key_str = construct_key('RefCount', s_key, qnos) d_range_dict[qnos] |= set(data_db.hkeys(refcount_key_str)) q_range = get_range(query_dimensions) output = [] for q_key in combinatorial_keys(q_range): row = {} key_is_set = False key = None for q in q_key: # q_key=(Date,20110808,Practice,1) if not key_is_set: key = q key_is_set = True else: row[key] = q key_is_set = False for measure in measures: if mapping[measure]["type"][-5:] == "float": is_float = True else: is_float = False row[measure] = 0 snoq_keys = list(combinatorial_keys(snoq_range)) if len(snoq_keys) < 2: if len(snoq_keys) == 1: snoq_key = snoq_keys[0] else: snoq_key = None val_key = construct_key(measure, q_key, snoq_key) if mapping[measure]["type"] == "unique": val = data_db.scard(val_key) else: val = data_db.get(val_key) if val: if is_float: row[measure] = float(val) else: row[measure] = int(val) else: for snoq_key in snoq_keys: val_key = construct_key(measure, q_key, snoq_key) if mapping[measure]["type"] == "unique": abort(400, ("Measure type 'unique' cannot be aggregated")) else: val = data_db.get(val_key) if val: if is_float: row[measure] += float(val) else: row[measure] += int(val) output.append(row) output_response = {"status": "OK", "data": output} return output_response