Ejemplo n.º 1
0
    def generate_per_app_id(self, project_id, baseline_table, output_dir=None):
        """Generate the baseline table query per app_id."""
        if not self.per_app_id_enabled:
            return

        tables = table_names_from_baseline(baseline_table,
                                           include_project_id=False)

        init_filename = f"{self.target_table_id}.init.sql"
        query_filename = f"{self.target_table_id}.query.sql"
        view_filename = f"{self.target_table_id[:-3]}.view.sql"
        view_metadata_filename = f"{self.target_table_id[:-3]}.metadata.yaml"

        table = tables[f"{self.prefix}_table"]
        view = tables[f"{self.prefix}_view"]
        render_kwargs = dict(
            header="-- Generated via bigquery_etl.glean_usage\n",
            project_id=project_id,
        )

        render_kwargs.update(self.custom_render_kwargs)
        render_kwargs.update(tables)

        query_sql = render(query_filename, **render_kwargs)
        view_sql = render(view_filename, **render_kwargs)
        view_metadata = render(view_metadata_filename,
                               format=False,
                               **render_kwargs)

        if not self.no_init:
            try:
                init_sql = render(init_filename, **render_kwargs)
            except TemplateNotFound:
                init_sql = render(query_filename, init=True, **render_kwargs)

        if not (referenced_table_exists(view_sql)):
            logging.info("Skipping view for table which doesn't exist:"
                         f" {table}")
            return

        if output_dir:
            write_sql(output_dir, view, "metadata.yaml", view_metadata)
            write_sql(output_dir, view, "view.sql", view_sql)
            write_sql(output_dir, table, "query.sql", query_sql)

            if not self.no_init:
                write_sql(output_dir, table, "init.sql", init_sql)

            write_dataset_metadata(output_dir, view)
Ejemplo n.º 2
0
def _write_sql(project, dataset, slug, kwargs, basename, init):
    write_sql(
        OUTPUT_DIR,
        f"{project}.{dataset}.{slug}",
        basename,
        render(
            basename,
            template_folder="operational_monitoring",
            **kwargs,
            init=init,
        ),
    )
Ejemplo n.º 3
0
def _write_sql(project,
               dataset,
               slug,
               kwargs,
               template_filename,
               output_filename=None,
               init=False):
    output_filename = output_filename or template_filename
    write_sql(
        OUTPUT_DIR,
        f"{project}.{dataset}.{slug}",
        output_filename,
        render(
            template_filename,
            template_folder="operational_monitoring",
            **kwargs,
            init=init,
        ),
    )
Ejemplo n.º 4
0
    def generate_per_app(self, project_id, app_info, output_dir=None):
        """Generate the baseline table query per app_name."""
        if not self.per_app_enabled:
            return

        target_view_name = "_".join(self.target_table_id.split("_")[:-1])
        target_dataset = app_info[0]["app_name"]

        datasets = [(a["bq_dataset_family"], a.get("app_channel", "release"))
                    for a in app_info]

        if len(datasets) == 1 and target_dataset == datasets[0][0]:
            # This app only has a single channel, and the app_name
            # exactly matches the generated bq_dataset_family, so
            # the existing per-app_id dataset also serves as the
            # per-app dataset, thus we don't have to provision
            # union views.
            if self.per_app_id_enabled:
                return

        render_kwargs = dict(
            header="-- Generated via bigquery_etl.glean_usage\n",
            project_id=project_id,
            target_view=f"{target_dataset}.{target_view_name}",
            datasets=datasets,
            table=target_view_name,
            app_name=app_info[0]["app_name"],
        )

        sql = render(self.cross_channel_template, **render_kwargs)
        view = f"{project_id}.{target_dataset}.{target_view_name}"

        if output_dir:
            write_dataset_metadata(output_dir, view)

        if not (referenced_table_exists(sql)):
            logging.info("Skipping view for table which doesn't exist:"
                         f" {view}")
            return

        if output_dir:
            write_sql(output_dir, view, "view.sql", sql)