def get_custom_distribution_metadata(product_name) -> List[CustomDistributionMeta]: """Get metadata for reconstructing custom distribution buckets in Glean metrics.""" # GleanPing.get_repos -> List[Tuple[name: str, app_id: str]] glean = GleanPing(product_name) probes = glean.get_probes() custom = [] for probe in probes: if probe.get_type() != "custom_distribution": continue meta = CustomDistributionMeta( probe.get_name(), probe.get("range_min"), probe.get("range_max"), probe.get("bucket_count"), probe.get("histogram_type"), ) custom.append(meta) return custom
def handle(self, *args, **kwargs): glean_products = [repo[0] for repo in GleanPing.get_repos()] for product in glean_products: probes = GleanPing(product).get_probes() data = [] for probe in probes: name = snake_case(probe.name) info = probe.definition description = info.get("description").strip() data.append({ "product": product, "name": name, "type": probe.type, "description": description, "info": json.dumps(info, cls=SetEncoder), }) self.upsert_data(data)
def _get_glean_metrics(self, v1_name: Optional[str]) -> List[GleanProbe]: if v1_name is None: logging.error( f"Error: Missing v1 name for ping {self.name} in namespace {self.namespace}" ) return [] repo = next((r for r in GleanPing.get_repos() if r["name"] == v1_name)) glean_app = GleanPing(repo) ping_probes = [] probe_ids = set() for probe in glean_app.get_probes(): if self.name not in probe.definition["send_in_pings"]: continue if probe.id in probe_ids: # Some ids are duplicated, ignore them continue ping_probes.append(probe) probe_ids.add(probe.id) return ping_probes
def generate_per_app(self, project_id, app_info, output_dir=None): """ Generate per-app ping views. For the release channel of a glean app *only*, generate a pointer view to the app-id specific view for that channel/app combination """ release_app = app_info[0] target_dataset = release_app["app_name"] repo = next((r for r in GleanPing.get_repos() if r["name"] == release_app["v1_name"])) # app name is the same as the bq_dataset_family for the release channel: do nothing if (repo["app_id"] == release_app["app_name"] or release_app["bq_dataset_family"] == release_app["app_name"]): return p = GleanPing(repo) for ping_name in p.get_pings(): view_name = ping_name.replace("-", "_") underlying_view_id = ".".join([ "moz-fx-data-shared-prod", repo["app_id"].replace("-", "_"), view_name ]) full_view_id = f"moz-fx-data-shared-prod.{target_dataset}.{view_name}" if output_dir: write_sql( output_dir, full_view_id, "view.sql", VIEW_QUERY_TEMPLATE.format( full_view_id=full_view_id, target=underlying_view_id, ), ) write_sql( output_dir, full_view_id, "metadata.yaml", VIEW_METADATA_TEMPLATE.format( ping_name=ping_name, app_name=release_app["canonical_app_name"], underlying_view_id=underlying_view_id, ), ) # we create a schema to the original view created for the # stable tables here (this assumes that they have been or # will be generated, which should be the case for a full # run of the sql generation logic) schema_dir = get_table_dir(output_dir, full_view_id) original_schema_file = os.path.relpath( os.path.abspath( get_table_dir(output_dir, underlying_view_id) / "schema.yaml"), start=schema_dir, ) schema_link = schema_dir / "schema.yaml" try: os.unlink(schema_link) except FileNotFoundError: pass os.symlink(original_schema_file, schema_link)
description, "definition": definition, "index": fn.to_tsvector("english", " ".join([probe.name, description])), }) with db.atomic(): for batch in chunked(data, 100): (Probes.insert_many(batch).on_conflict( conflict_target=[Probes.product, Probes.name], update={ Probes.description: EXCLUDED.description, Probes.definition: EXCLUDED.definition, Probes.index: EXCLUDED.index, }, ).execute()) log("Imported {n:,} probes for {product}".format(n=len(probes), product=product)) if __name__ == "__main__": log("Starting imports...") # Import telemetry pings import_probes("desktop", MainPing().get_probes()) # Import Glean pings glean_products = [repo[0] for repo in GleanPing.get_repos()] for product in glean_products: import_probes(product, GleanPing(product).get_probes())
#!/usr/bin/env python3 import json import os import re import sys from mozilla_schema_generator.glean_ping import GleanPing glean_mapping = dict( (id.replace('-', '_'), name) for (name, id) in GleanPing.get_repos()) os.chdir(os.path.join(sys.argv[1], 'schemas')) tables = [] for root, dirs, files in os.walk('.'): if 'metadata' in root: # metadata is some weird other thing # FIXME: figure out what it is continue try: bq_file = next(f for f in files if f.endswith('.bq')) except StopIteration: # if there is no bq file, not in BigQuery? # FIXME: interrogate this assumption continue (dataset_name, table_name) = root[2:].replace('-', '_').split('/') path = "/".join([root[2:], bq_file]) version = re.sub(r'.*([0-9]+).bq', r"\1", bq_file) bq_definition = 'https://github.com/mozilla-services/mozilla-pipeline-schemas/blob/generated-schemas/schemas/' + path bq_definition_raw_json = 'https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/generated-schemas/schemas/' + path table = {