예제 #1
0
def get_custom_distribution_metadata(product_name) -> List[CustomDistributionMeta]:
    """Get metadata for reconstructing custom distribution buckets in Glean metrics."""
    # GleanPing.get_repos -> List[Tuple[name: str, app_id: str]]
    glean = GleanPing(product_name)
    probes = glean.get_probes()

    custom = []
    for probe in probes:
        if probe.get_type() != "custom_distribution":
            continue
        meta = CustomDistributionMeta(
            probe.get_name(),
            probe.get("range_min"),
            probe.get("range_max"),
            probe.get("bucket_count"),
            probe.get("histogram_type"),
        )
        custom.append(meta)

    return custom
예제 #2
0
    def handle(self, *args, **kwargs):

        glean_products = [repo[0] for repo in GleanPing.get_repos()]
        for product in glean_products:
            probes = GleanPing(product).get_probes()

            data = []
            for probe in probes:
                name = snake_case(probe.name)
                info = probe.definition
                description = info.get("description").strip()

                data.append({
                    "product": product,
                    "name": name,
                    "type": probe.type,
                    "description": description,
                    "info": json.dumps(info, cls=SetEncoder),
                })

            self.upsert_data(data)
예제 #3
0
    def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]:
        if v1_name is None:
            logging.error(
                f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}"
            )
            return []

        repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name))
        glean_app = GleanPing(repo)

        ping_probes = []
        probe_ids = set()
        for probe in glean_app.get_probes():
            if self.name not in probe.definition["send_in_pings"]:
                continue
            if probe.id in probe_ids:
                # Some ids are duplicated, ignore them
                continue

            ping_probes.append(probe)
            probe_ids.add(probe.id)

        return ping_probes
예제 #4
0
    def generate_per_app(self, project_id, app_info, output_dir=None):
        """
        Generate per-app ping views.

        For the release channel of a glean app *only*, generate a
        pointer view to the app-id specific view for that channel/app
        combination
        """
        release_app = app_info[0]
        target_dataset = release_app["app_name"]
        repo = next((r for r in GleanPing.get_repos()
                     if r["name"] == release_app["v1_name"]))

        # app name is the same as the bq_dataset_family for the release channel: do nothing
        if (repo["app_id"] == release_app["app_name"] or
                release_app["bq_dataset_family"] == release_app["app_name"]):
            return

        p = GleanPing(repo)
        for ping_name in p.get_pings():
            view_name = ping_name.replace("-", "_")
            underlying_view_id = ".".join([
                "moz-fx-data-shared-prod", repo["app_id"].replace("-", "_"),
                view_name
            ])
            full_view_id = f"moz-fx-data-shared-prod.{target_dataset}.{view_name}"
            if output_dir:
                write_sql(
                    output_dir,
                    full_view_id,
                    "view.sql",
                    VIEW_QUERY_TEMPLATE.format(
                        full_view_id=full_view_id,
                        target=underlying_view_id,
                    ),
                )
                write_sql(
                    output_dir,
                    full_view_id,
                    "metadata.yaml",
                    VIEW_METADATA_TEMPLATE.format(
                        ping_name=ping_name,
                        app_name=release_app["canonical_app_name"],
                        underlying_view_id=underlying_view_id,
                    ),
                )

                # we create a schema to the original view created for the
                # stable tables here (this assumes that they have been or
                # will be generated, which should be the case for a full
                # run of the sql generation logic)
                schema_dir = get_table_dir(output_dir, full_view_id)
                original_schema_file = os.path.relpath(
                    os.path.abspath(
                        get_table_dir(output_dir, underlying_view_id) /
                        "schema.yaml"),
                    start=schema_dir,
                )
                schema_link = schema_dir / "schema.yaml"
                try:
                    os.unlink(schema_link)
                except FileNotFoundError:
                    pass
                os.symlink(original_schema_file, schema_link)
예제 #5
0
            description,
            "definition":
            definition,
            "index":
            fn.to_tsvector("english", " ".join([probe.name, description])),
        })

    with db.atomic():
        for batch in chunked(data, 100):
            (Probes.insert_many(batch).on_conflict(
                conflict_target=[Probes.product, Probes.name],
                update={
                    Probes.description: EXCLUDED.description,
                    Probes.definition: EXCLUDED.definition,
                    Probes.index: EXCLUDED.index,
                },
            ).execute())
    log("Imported {n:,} probes for {product}".format(n=len(probes),
                                                     product=product))


if __name__ == "__main__":
    log("Starting imports...")
    # Import telemetry pings
    import_probes("desktop", MainPing().get_probes())

    # Import Glean pings
    glean_products = [repo[0] for repo in GleanPing.get_repos()]
    for product in glean_products:
        import_probes(product, GleanPing(product).get_probes())
예제 #6
0
#!/usr/bin/env python3

import json
import os
import re
import sys

from mozilla_schema_generator.glean_ping import GleanPing

glean_mapping = dict(
    (id.replace('-', '_'), name) for (name, id) in GleanPing.get_repos())

os.chdir(os.path.join(sys.argv[1], 'schemas'))
tables = []
for root, dirs, files in os.walk('.'):
    if 'metadata' in root:
        # metadata is some weird other thing
        # FIXME: figure out what it is
        continue
    try:
        bq_file = next(f for f in files if f.endswith('.bq'))
    except StopIteration:
        # if there is no bq file, not in BigQuery?
        # FIXME: interrogate this assumption
        continue
    (dataset_name, table_name) = root[2:].replace('-', '_').split('/')
    path = "/".join([root[2:], bq_file])
    version = re.sub(r'.*([0-9]+).bq', r"\1", bq_file)
    bq_definition = 'https://github.com/mozilla-services/mozilla-pipeline-schemas/blob/generated-schemas/schemas/' + path
    bq_definition_raw_json = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/generated-schemas/schemas/' + path
    table = {