コード例 #1
0
def get_teams_schema(ds, **kwargs):
    """
    This task lists the last config for every team. Then a Neo4j
    query is done to count the nodes of each label.
    """
    with kwargs["params"]["app"].app_context():
        from depc.controllers.configs import ConfigController

        # Get all configs ordered by -date
        configs = ConfigController._list(order_by="updated_at", reverse=True)

        # Get the last config by team
        teams = {}
        for config in configs:

            team = config.team

            # For each team
            if team.kafka_topic not in teams.keys():
                logger.info("[{0}] Configuration : {1}".format(team.name, config.data))

                data = {
                    "id": str(team.id),
                    "name": team.name,
                    "topic": team.kafka_topic,
                    "schema": config.data,
                    "labels": {},
                }

                # Count number of nodes per label
                logger.info(
                    "[{0}] Counting nodes for {1} labels...".format(
                        team.name, len(config.data.keys())
                    )
                )

                for label in config.data.keys():
                    neo_key = "{}_{}".format(team.kafka_topic, label)
                    records = get_records(
                        "MATCH (n:{label}) RETURN count(n) AS Count".format(
                            label=neo_key
                        )
                    )
                    count = list(records)[0].get("Count")

                    logger.info(
                        "[{0}] {1} nodes for label {2}...".format(
                            team.name, count, label
                        )
                    )
                    data["labels"][label] = count

                teams[team.kafka_topic] = data

        # Save the config into an Airflow variable
        Variable.set("config", list(teams.values()), serialize_json=True)
コード例 #2
0
    def get_impacted_nodes_all(cls, team_id, label, node, impacted_label, ts,
                               with_inactive_nodes):
        try:
            ts = int(ts)
        except (TypeError, ValueError):
            raise IntegrityError("'ts' parameter must be a positive number")

        if ts < 0:
            raise IntegrityError("'ts' parameter must be a positive number")

        if not impacted_label:
            raise IntegrityError("'impactedLabel' parameter must not be empty")

        team = TeamController._get({"Team": {"id": team_id}})

        json_string = "["
        nodes_batch = 50000
        skip = 0
        total_count = cls.get_impacted_nodes_count(team_id, label, node,
                                                   impacted_label)["count"]

        # Load all impacted nodes data by batch inside a list
        while skip < total_count:
            query = cls._build_impacted_nodes_queries(
                topic=team.kafka_topic,
                label=label,
                node=node,
                impacted_label=impacted_label,
                skip=skip,
                limit=nodes_batch,
                count=False,
            )

            new_json_string_data = json.dumps(
                cls._compute_impacted_nodes_from_data(
                    get_records(query).data(),
                    ts,
                    with_inactive_nodes=with_inactive_nodes,
                ),
                indent=4,
            )

            skip += nodes_batch

            # if this is not the last loop, add a "," to the last element
            # of the array to help easy concatenation at the next loop
            if skip < total_count:
                new_json_string_data = new_json_string_data[:-2] + ",\n]"
            json_string += new_json_string_data[1:-2]

        json_string += "\n]"

        return json_string
コード例 #3
0
def skip_by_requirement(app, request):
    """Skip a test if the requirement is not met.

    Supporting argument: "neo4j"

    Usage:

        >>> @pytest.mark.skip_requirement('neo4j')
        >>> def test_get_team_dependencies(client):
        >>>    pass
    """
    if request.node.get_closest_marker('skip_requirement'):

        # Skip test if neo4j is not running (or not well configured)
        if request.node.get_closest_marker('skip_requirement').args[0] == 'neo4j':
            with app.app_context():
                try:
                    get_records("RETURN 1")
                except ServiceUnavailable as e:
                    pytest.skip("Neo4j server error : {}".format(e))
                except AuthError as e:
                    pytest.skip("Neo4j authentication error : {}".format(e))
コード例 #4
0
    def get_impacted_nodes_count(cls, team_id, label, node, impacted_label):
        if not impacted_label:
            raise IntegrityError("'impactedLabel' parameter must not be empty")

        team = TeamController._get({"Team": {"id": team_id}})

        query = cls._build_impacted_nodes_queries(
            topic=team.kafka_topic,
            label=label,
            node=node,
            impacted_label=impacted_label,
            count=True,
        )

        results = get_records(query)
        return {"count": results.value()[0]}
コード例 #5
0
    def get_impacted_nodes(cls, team_id, label, node, impacted_label, skip,
                           limit, ts):
        try:
            skip = int(skip)
            limit = int(limit)
            ts = int(ts)
        except (TypeError, ValueError):
            raise IntegrityError(
                "'skip', 'limit' and 'ts' parameters must be positive numbers")

        if skip < 0 or limit < 0 or ts < 0:
            raise IntegrityError(
                "'skip', 'limit' and 'ts' parameters must be positive numbers")

        if not impacted_label:
            raise IntegrityError("'impactedLabel' parameter must not be empty")

        team = TeamController._get({"Team": {"id": team_id}})

        query = cls._build_impacted_nodes_queries(
            topic=team.kafka_topic,
            label=label,
            node=node,
            impacted_label=impacted_label,
            skip=skip,
            limit=limit,
            count=False,
        )

        results = get_records(query)
        impacted_nodes_data = results.data()

        # Return all impacted nodes (active and inactive) with metadata indicating if they are active or not
        return cls._compute_impacted_nodes_from_data(impacted_nodes_data,
                                                     ts,
                                                     with_inactive_nodes=True)
コード例 #6
0
    def get_node_dependencies(
        cls,
        team_id,
        label,
        node,
        day=None,
        filter_on_config=False,
        include_inactive=False,
    ):
        topic = TeamController._get({"Team": {"id": team_id}}).kafka_topic
        query = cls._build_dependencies_query(team_id, topic, label, node,
                                              filter_on_config)
        dependencies = {
            "dependencies": {},
            "graph": {
                "nodes": [],
                "relationships": []
            }
        }
        records = get_records(query)

        # Loop on all relationships
        for idx, record in enumerate(records):

            # Handle the main node
            if idx == 0:
                node = record.get("n")
                title = list(node.labels)[0][len(topic) + 1:]

                if title not in dependencies["dependencies"]:
                    dependencies["dependencies"][title] = []
                dependencies["dependencies"][title].append(dict(node.items()))

                dependencies["graph"]["nodes"].append({
                    "id":
                    node.id,
                    "label":
                    dict(node.items())["name"],
                    "title":
                    title
                })

            # Handle the relationship
            rel = record.get("r")
            if not rel:
                continue

            # Check inactive nodes
            start_node = rel.start_node
            end_node = rel.end_node
            start = arrow.get(day, "YYYY-MM-DD").floor("day").timestamp
            end = arrow.get(day, "YYYY-MM-DD").ceil("day").timestamp

            if (not is_active_node(start, end,
                                   end_node)) or (not has_active_relationship(
                                       start, end, rel.get("periods"))):
                if not include_inactive:
                    continue
                else:
                    setattr(end_node, "inactive", True)

            # The label is 'acme_Mylabel', we just want 'Mylabel'
            title = list(end_node.labels)[0][len(topic) + 1:]

            if title not in dependencies["dependencies"]:
                dependencies["dependencies"][title] = []
            dependencies["dependencies"][title].append({
                **dict(end_node.items()),
                **{
                    "periods": list(rel.get("periods")),
                    "inactive": getattr(end_node, "inactive", False),
                },
            })

            dependencies["graph"]["nodes"].append({
                "id":
                end_node.id,
                "label":
                dict(end_node.items())["name"],
                "title":
                title,
            })

            dependencies["graph"]["relationships"].append({
                "id":
                rel.id,
                "from":
                start_node.id,
                "to":
                end_node.id,
                "arrows":
                "to",
                "periods":
                list(rel.get("periods")),
            })

        return dependencies
コード例 #7
0
ファイル: rule_operator.py プロジェクト: dingcycle/depc
    def execute(self, context):
        from depc.controllers import NotFoundError
        from depc.controllers.rules import RuleController
        from depc.extensions import redis_scheduler as redis
        from depc.utils import get_start_end_ts

        ds = context["ds"]
        start, end = get_start_end_ts(ds)

        with self.app.app_context():

            # Get the nodes for this team and this label
            query = ("MATCH(n:{label}) RETURN n AS Node "
                     "ORDER BY Node.name "
                     "SKIP {skip} LIMIT {limit}")
            query = query.format(label=self.full_label,
                                 skip=self.skip,
                                 limit=int(self.length))

            records = get_records(query)
            nodes = [dict(record.get("Node").items()) for record in records]

            # Remove old nodes
            nodes = [n for n in nodes if is_active_node(start, end, n)]

            # Get the rule associated to the label for this team
            try:
                rule = RuleController.get(filters={
                    "Rule": {
                        "name": self.rule_name,
                        "team_id": self.team_id
                    }
                })
            except NotFoundError:
                self.log.warning(
                    "[{0}] The label {1} has no associated rule in DEPC".
                    format(self.team_name, self.label))
                return False

            has_qos = False
            auto_fill = check_enable_auto_fill(rule["id"], self.team_id)
            for node in nodes:
                result = RuleController.execute(
                    rule_id=rule["id"],
                    auto_fill=auto_fill,
                    name=node["name"],
                    start=start,
                    end=end,
                )

                if result["qos"]["qos"] != "unknown":
                    has_qos = True
                    self.log.info("[{0}/{1}] The QOS of {2} is {3}%".format(
                        self.team_name,
                        self.label,
                        node["name"],
                        result["qos"]["qos"],
                    ))

                    # Saving to Beamium
                    self.write_metric(
                        metric="depc.qos.node",
                        ts=start,
                        value=result["qos"]["qos"],
                        tags={
                            "label": self.label,
                            "name": node["name"],
                            "team": self.team_id,
                        },
                    )

                    # Used for average computing
                    key = "{ds}.{team}.{label}".format(ds=ds,
                                                       team=self.team_name,
                                                       label=self.label)

                    if not self.excluded_from_label_average(
                            self.team_name, self.label, node["name"]):
                        redis.zadd("{}.sorted".format(key), node["name"],
                                   result["qos"]["qos"])

                    # Save information to reuse it later (`bools_dps` is used in
                    # OperationOperator and `qos` is used in AggregationOperator)
                    redis.set(
                        "{}.{}.node".format(key, node["name"]),
                        json.dumps({
                            "bools_dps": result["qos"]["bools_dps"],
                            "qos": result["qos"]["qos"],
                        }),
                    )

                else:
                    self.log.warning("[{0}/{1}] No QOS for {2}".format(
                        self.team_name, self.label, node["name"]))

                    # Add it in redis to compute some stats in AfterSubdagOperator
                    redis.sadd(
                        "{ds}.{team}.{label}.noqos".format(ds=ds,
                                                           team=self.team_name,
                                                           label=self.label),
                        node["name"],
                    )

            if not has_qos:
                self.log.warning("[{0}/{1}] No QOS found for any items".format(
                    self.team_name, self.label))
コード例 #8
0
    def execute(self, context):
        from depc.extensions import redis_scheduler as redis
        from depc.utils import get_start_end_ts

        ds = context["ds"]
        start, end = get_start_end_ts(ds)
        name, dependencies, query = self.build_query()

        self.log.info(
            "[{team}/{label}] Fetching nodes and its dependencies using the following query : {query}"
            .format(team=self.team_name, label=self.label, query=query))

        # Retrieve the node and its dependencies
        start_time = time.time()
        with self.app.app_context():
            records = get_records(query)
        nodes = self.filter_records(
            start=start,
            end=end,
            records=[r for r in records],
            name=name,
            dependencies=dependencies,
        )

        # No node has dependency
        if not nodes:
            self.log.warning("[{team}/{label}] No node has dependency.".format(
                team=self.team_name, label=self.label))
            return

        self.log.info(
            "[{team}/{label}] Nodes fetched in {t}s, processing it...".format(
                team=self.team_name,
                label=self.label,
                t=round(time.time() - start_time, 3),
            ))

        # Process the nodes and remove the archived ones
        start_time = time.time()

        msg = "[{team}/{label}] Processing done in {t}s, {count} nodes returned (from {begin} to {end})"
        self.log.info(
            msg.format(
                team=self.team_name,
                label=self.label,
                t=round(time.time() - start_time, 3),
                count=len(nodes),
                begin=list(nodes.keys())[0],
                end=list(nodes.keys())[-1],
            ))

        self.log.info(
            "[{team}/{label}] Computing the QOS for {count} nodes...".format(
                team=self.team_name, label=self.label, count=len(nodes)))

        start_time = time.time()
        QOS = {}
        metrics = []
        nodes_without_qos = []
        idx = 0

        for node, deps in nodes.items():
            self.log.info(
                "[{team}/{label}] Fetching the QOS of {count} dependencies for {node}..."
                .format(team=self.team_name,
                        label=self.label,
                        count=len(deps),
                        node=node))

            node_deps = []
            for d in deps:
                dep_name = d["name"]
                dep_label = d["label"]

                # The label contains the topic but not the redis key
                dep = "{0}.{1}".format(dep_label.split("_")[1], dep_name)

                # It's the first time we see this dependency
                if dep not in QOS.keys():

                    # We retrieve its QOS in Redis
                    qos = redis.get("{ds}.{team}.{dep}.node".format(
                        ds=ds, team=self.team_name, dep=dep))
                    if qos:
                        QOS[dep] = json.loads(qos.decode("utf-8"),
                                              cls=BoolsDpsDecoder)

                # Add the result of the dependencies for this node
                try:
                    node_deps.append(QOS[dep])
                except KeyError:
                    msg = ("The QOS of {dep} is not available "
                           "(no data in any metric ?)".format(dep=dep_name))
                    logger.warning(msg)

            if node_deps:
                msg = (
                    "[{team}/{label}] Computing the QOS of {node} using a {type} "
                    "between {count} dependencies with valid QOS...")
                self.log.info(
                    msg.format(
                        team=self.team_name,
                        label=self.label,
                        node=node,
                        type=self.type,
                        count=len(node_deps),
                    ))

                node_qos = self.compute_node_qos(data=node_deps,
                                                 start=start,
                                                 end=end)

                self.log.info("[{0}/{1}] The QOS of {2} is {3}%".format(
                    self.team_name, self.label, node, node_qos["qos"]))

                metrics.append(
                    self.format_metric(
                        metric="depc.qos.node",
                        ts=start,
                        value=node_qos["qos"],
                        tags={
                            "label": self.label,
                            "name": node,
                            "team": self.team_id
                        },
                    ))

                key = "{ds}.{team}.{label}".format(ds=ds,
                                                   team=self.team_name,
                                                   label=self.label)

                if not self.excluded_from_label_average(
                        self.team_name, self.label, node):
                    redis.zadd("{}.sorted".format(key), node, node_qos["qos"])

                # Save information to reuse it later (`bools_dps` is used in
                # OperationOperator and `qos` is used in AggregationOperator)
                redis.set("{}.{}.node".format(key, node), json.dumps(node_qos))
            else:
                self.log.warning(
                    "[{team}/{label}] {node} has no dependency with QOS".
                    format(team=self.team_name, label=self.label, node=node))
                nodes_without_qos.append(node)

                # Add it in redis to compute some stats in AfterSubdagOperator
                redis.sadd(
                    "{ds}.{team}.{label}.noqos".format(ds=ds,
                                                       team=self.team_name,
                                                       label=self.label),
                    node,
                )

            idx += 1
            if idx and idx % 1000 == 0:
                self.log.info(
                    "[{team}/{label}] {count} nodes processed in {time}s".
                    format(
                        team=self.team_name,
                        label=self.label,
                        count=idx,
                        time=round(time.time() - start_time, 3),
                    ))

        self.log.info(
            "[{team}/{label}] The QOS of {count} nodes has been computed in {time}s"
            .format(
                team=self.team_name,
                label=self.label,
                count=len(metrics),
                time=round(time.time() - start_time, 3),
            ))

        if nodes_without_qos:
            msg = "[{team}/{label}] The QOS could not be found for {count} nodes ({excerpt}, ...)"
            self.log.warning(
                msg.format(
                    team=self.team_name,
                    label=self.label,
                    count=len(nodes_without_qos),
                    excerpt=", ".join(nodes_without_qos[:5]),
                ))

        # Write metrics for Beamium
        if not metrics:
            self.log.warning(
                "[{team}/{label}] No QOS to save, chunk is finished.".format(
                    team=self.team_name, label=self.label))
        else:
            self.write_metrics(metrics)