def execute(self, context): from depc.controllers import NotFoundError from depc.controllers.rules import RuleController from depc.extensions import redis_scheduler as redis from depc.utils import get_start_end_ts ds = context["ds"] start, end = get_start_end_ts(ds) with self.app.app_context(): # Get the nodes for this team and this label query = ("MATCH(n:{label}) RETURN n AS Node " "ORDER BY Node.name " "SKIP {skip} LIMIT {limit}") query = query.format(label=self.full_label, skip=self.skip, limit=int(self.length)) records = get_records(query) nodes = [dict(record.get("Node").items()) for record in records] # Remove old nodes nodes = [n for n in nodes if is_active_node(start, end, n)] # Get the rule associated to the label for this team try: rule = RuleController.get(filters={ "Rule": { "name": self.rule_name, "team_id": self.team_id } }) except NotFoundError: self.log.warning( "[{0}] The label {1} has no associated rule in DEPC". format(self.team_name, self.label)) return False has_qos = False auto_fill = check_enable_auto_fill(rule["id"], self.team_id) for node in nodes: result = RuleController.execute( rule_id=rule["id"], auto_fill=auto_fill, name=node["name"], start=start, end=end, ) if result["qos"]["qos"] != "unknown": has_qos = True self.log.info("[{0}/{1}] The QOS of {2} is {3}%".format( self.team_name, self.label, node["name"], result["qos"]["qos"], )) # Saving to Beamium self.write_metric( metric="depc.qos.node", ts=start, value=result["qos"]["qos"], tags={ "label": self.label, "name": node["name"], "team": self.team_id, }, ) # Used for average computing key = "{ds}.{team}.{label}".format(ds=ds, team=self.team_name, label=self.label) if not self.excluded_from_label_average( self.team_name, self.label, node["name"]): redis.zadd("{}.sorted".format(key), node["name"], result["qos"]["qos"]) # Save information to reuse it later (`bools_dps` is used in # OperationOperator and `qos` is used in AggregationOperator) redis.set( "{}.{}.node".format(key, node["name"]), json.dumps({ "bools_dps": result["qos"]["bools_dps"], "qos": result["qos"]["qos"], }), ) else: self.log.warning("[{0}/{1}] No QOS for {2}".format( self.team_name, self.label, node["name"])) # Add it in redis to compute some stats in AfterSubdagOperator redis.sadd( "{ds}.{team}.{label}.noqos".format(ds=ds, team=self.team_name, label=self.label), node["name"], ) if not has_qos: self.log.warning("[{0}/{1}] No QOS found for any items".format( self.team_name, self.label))
def execute(self, context): from depc.extensions import redis_scheduler as redis from depc.utils import get_start_end_ts ds = context["ds"] start, end = get_start_end_ts(ds) name, dependencies, query = self.build_query() self.log.info( "[{team}/{label}] Fetching nodes and its dependencies using the following query : {query}" .format(team=self.team_name, label=self.label, query=query)) # Retrieve the node and its dependencies start_time = time.time() with self.app.app_context(): records = get_records(query) nodes = self.filter_records( start=start, end=end, records=[r for r in records], name=name, dependencies=dependencies, ) # No node has dependency if not nodes: self.log.warning("[{team}/{label}] No node has dependency.".format( team=self.team_name, label=self.label)) return self.log.info( "[{team}/{label}] Nodes fetched in {t}s, processing it...".format( team=self.team_name, label=self.label, t=round(time.time() - start_time, 3), )) # Process the nodes and remove the archived ones start_time = time.time() msg = "[{team}/{label}] Processing done in {t}s, {count} nodes returned (from {begin} to {end})" self.log.info( msg.format( team=self.team_name, label=self.label, t=round(time.time() - start_time, 3), count=len(nodes), begin=list(nodes.keys())[0], end=list(nodes.keys())[-1], )) self.log.info( "[{team}/{label}] Computing the QOS for {count} nodes...".format( team=self.team_name, label=self.label, count=len(nodes))) start_time = time.time() QOS = {} metrics = [] nodes_without_qos = [] idx = 0 for node, deps in nodes.items(): self.log.info( "[{team}/{label}] Fetching the QOS of {count} dependencies for {node}..." .format(team=self.team_name, label=self.label, count=len(deps), node=node)) node_deps = [] for d in deps: dep_name = d["name"] dep_label = d["label"] # The label contains the topic but not the redis key dep = "{0}.{1}".format(dep_label.split("_")[1], dep_name) # It's the first time we see this dependency if dep not in QOS.keys(): # We retrieve its QOS in Redis qos = redis.get("{ds}.{team}.{dep}.node".format( ds=ds, team=self.team_name, dep=dep)) if qos: QOS[dep] = json.loads(qos.decode("utf-8"), cls=BoolsDpsDecoder) # Add the result of the dependencies for this node try: node_deps.append(QOS[dep]) except KeyError: msg = ("The QOS of {dep} is not available " "(no data in any metric ?)".format(dep=dep_name)) logger.warning(msg) if node_deps: msg = ( "[{team}/{label}] Computing the QOS of {node} using a {type} " "between {count} dependencies with valid QOS...") self.log.info( msg.format( team=self.team_name, label=self.label, node=node, type=self.type, count=len(node_deps), )) node_qos = self.compute_node_qos(data=node_deps, start=start, end=end) self.log.info("[{0}/{1}] The QOS of {2} is {3}%".format( self.team_name, self.label, node, node_qos["qos"])) metrics.append( self.format_metric( metric="depc.qos.node", ts=start, value=node_qos["qos"], tags={ "label": self.label, "name": node, "team": self.team_id }, )) key = "{ds}.{team}.{label}".format(ds=ds, team=self.team_name, label=self.label) if not self.excluded_from_label_average( self.team_name, self.label, node): redis.zadd("{}.sorted".format(key), node, node_qos["qos"]) # Save information to reuse it later (`bools_dps` is used in # OperationOperator and `qos` is used in AggregationOperator) redis.set("{}.{}.node".format(key, node), json.dumps(node_qos)) else: self.log.warning( "[{team}/{label}] {node} has no dependency with QOS". format(team=self.team_name, label=self.label, node=node)) nodes_without_qos.append(node) # Add it in redis to compute some stats in AfterSubdagOperator redis.sadd( "{ds}.{team}.{label}.noqos".format(ds=ds, team=self.team_name, label=self.label), node, ) idx += 1 if idx and idx % 1000 == 0: self.log.info( "[{team}/{label}] {count} nodes processed in {time}s". format( team=self.team_name, label=self.label, count=idx, time=round(time.time() - start_time, 3), )) self.log.info( "[{team}/{label}] The QOS of {count} nodes has been computed in {time}s" .format( team=self.team_name, label=self.label, count=len(metrics), time=round(time.time() - start_time, 3), )) if nodes_without_qos: msg = "[{team}/{label}] The QOS could not be found for {count} nodes ({excerpt}, ...)" self.log.warning( msg.format( team=self.team_name, label=self.label, count=len(nodes_without_qos), excerpt=", ".join(nodes_without_qos[:5]), )) # Write metrics for Beamium if not metrics: self.log.warning( "[{team}/{label}] No QOS to save, chunk is finished.".format( team=self.team_name, label=self.label)) else: self.write_metrics(metrics)