Exemple #1
0
def main(unused_argv):
    attributes = {
        'list_reports': FLAGS.list,
        'dv360': FLAGS.dv360,
        'cm': FLAGS.cm,
        'force': FLAGS.force,
        'dv360_id': FLAGS.dv360_id,
        'cm_id': FLAGS.cm_id,
        'report_id': FLAGS.report_id,
        'profile': FLAGS.profile,
        'account_id': FLAGS.account,
        'email': FLAGS.email,
        'in_cloud': True,
        'append': FLAGS.append,
        'project': FLAGS.project,
        'sa360_url': unquote(FLAGS.sa360_url) if FLAGS.sa360_url else None,
        'sa360': (True if FLAGS.sa360_url else False),
        'dest_project': FLAGS.dest_project,
        'dest_dataset': FLAGS.dest_dataset,
        'infer_schema': FLAGS.infer_schema,
        'product': Type(FLAGS.product),
        'type': FLAGS.product,
        'notify_topic': FLAGS.notify_topic,
        'notify_message': FLAGS.notify_message,
    }

    if Type(FLAGS.product) == Type.SA360_RPT:
        f = report_runner
    else:
        f = report_fetch

    f({'attributes': attributes}, None)
Exemple #2
0
    def process(self, data: Dict[str, Any], context):
        """[summary]
    
    Arguments:
        data {Dict[str, Any]} -- Data passed in from the calling function, containing the attributes from the
                                 calling PubSub message
        context {} -- unused
    """
        self.project = os.environ['GCP_PROJECT']
        report_checker = {
            Type.DV360: self._check_dv360_report,
            Type.CM: self._check_cm_report,
            Type.SA360: self._check_sa360_report,
            Type.SA360_RPT: self._check_sa360_report
        }

        try:
            documents = self.firestore.get_all_running()
            for document in documents:
                with suppress(ValueError):
                    run_config = document.get().to_dict()
                    T = Type(run_config['type'])
                    # config = self.firestore.get_report_config(T, document.id)
                    job_config = self._fetch_schedule(type=T,
                                                      run_config=run_config)
                    report_checker[T](run_config=run_config,
                                      job_config=job_config)
                    # break
                    # else:
                    #   logging.error(f'Invalid report: {document.get().to_dict()}')

        except Exception as e:
            logging.error(e)
Exemple #3
0
 def test_str(self):
     self.assertEqual(['dv360', 'cm', 'administration', 'unknown'], [
         T.value for T in [
             Type.DV360,
             Type.CM,
             Type._ADMIN,
             Type(None),
         ]
     ])
Exemple #4
0
def index() -> jinja2.Template:
    """The index method for the appengine.

  Returns:
      Template: The completed html template
  """
    project = os.environ['GOOGLE_CLOUD_PROJECT']
    bucket = f'{project}-report2bq-tokens'

    user_email, user_id = user()

    data = {}

    creds = Credentials(project=project, email=user_email)
    try:
        template = JINJA_ENVIRONMENT.get_template('index.html')
        running_jobs = Scheduler().process(**{
            'action': 'list',
            'project': project,
            'email': user_email
        })
        jobs = []
        for job in running_jobs:
            with suppress(ValueError, KeyError, TypeError):
                _attrs = job.get('pubsubTarget', {}).get('attributes', {})
                _def = Type(_attrs['type'])
                j = {
                    'id': job['name'].split('/')[-1],
                    'description': job['description']
                    if 'description' in job else '-- No description given --',
                    'type': _def,
                    'schedule': job['schedule'],
                    'timezone': job['timeZone'],
                }

                j['attributes'] = switch(_def, _attrs)
                jobs.append(j)

        data = {'jobs': jobs, 'user_email': user_email}

    except CredentialsError as e:
        template = JINJA_ENVIRONMENT.get_template('authenticate.html')
        data = {
            'email': user_email,
            'client_id': creds.project_credentials.client_id,
        }

    return template.render(data)
Exemple #5
0
def index():
  
  project = os.environ['GOOGLE_CLOUD_PROJECT']
  bucket = f'{project}-report2bq-tokens'
  project_credentials = json.loads(OAuth.fetch_file(
    bucket,
    'client_secrets.json'
  ), encoding='utf-8')

  user_email, user_id = user()

  client = storage.Client(credentials=None)
  has_auth = client.get_bucket(bucket).get_blob(f'{user_email}_user_token.json')
  data = {}

  if has_auth:
    template = JINJA_ENVIRONMENT.get_template('index.html')
    running_jobs = Scheduler().process(args={'action': 'list', 'project': project, 'email': user_email})
    jobs = []
    for job in running_jobs:
      with suppress(ValueError, KeyError):
        _attrs = job['pubsubTarget']['attributes']
        _def = Type(_attrs['type'])
        j = {
          'id': job['name'].split('/')[-1],
          'description': job['description'] if 'description' in job else '-- No description given --',
          'type': _def,
          'schedule': job['schedule'],
          'timezone': job['timeZone'],
        }
        
        j['attributes'] = switch(_def, _attrs)
        jobs.append(j)

    data = {'jobs': jobs, 'user_email': user_email}
  
  else:
    template = JINJA_ENVIRONMENT.get_template('authenticate.html')
    data = {
      'email': user_email,
      'client_id': project_credentials['web']['client_id'],
    }

  return template.render(data)
Exemple #6
0
def main(unused_argv):
  attributes = {
    'force': FLAGS.force,
    'dv360_id': FLAGS.dv360_id,
    'cm_id': FLAGS.cm_id,
    'report_id': FLAGS.report_id,
    'profile': FLAGS.profile,
    'email': FLAGS.email,
    'in_cloud': FLAGS.in_cloud,
    'append': FLAGS.append,
    'project': FLAGS.project or os.environ.get('GCP_PROJECT'),
    'sa360_url': unquote(FLAGS.sa360_url) if FLAGS.sa360_url else None,
    'sa360': (True if FLAGS.sa360_url else False),
    'dest_project': FLAGS.dest_project,
    'dest_dataset': FLAGS.dest_dataset,
    'dest_table': FLAGS.dest_table,
    'infer_schema': FLAGS.infer_schema,
    'notify_topic': FLAGS.notify_topic,
    'notify_message': FLAGS.notify_message,
    'partition': FLAGS.partition or None,
    'development': FLAGS.development,
    'adh_customer': FLAGS.adh_customer,
    'adh_query': FLAGS.adh_query,
    'api_key': FLAGS.api_key,
    'days': FLAGS.days,
  }

  if FLAGS.product:
    attributes['type'] = Type(FLAGS.product)
  elif attributes.get('sa360_url'):
    attributes['type'] = Type.SA360
  elif attributes.get('profile'):
    attributes['type'] = Type.CM
  elif attributes['adh_customer']:
    attributes['type'] = Type.ADH
  else:
    attributes['type'] = Type.DV360

  if attributes['type'] in [ Type.SA360_RPT, Type.GA360_RPT, Type.ADH ]:
    f = report_runner
  else:
    f = report_fetch if not FLAGS.runner else report_runner

  f({'attributes': attributes}, None)
Exemple #7
0
    def process(self, data: Dict[str, Any], context) -> None:
        """Execute the run_monitor.

    Arguments:
        data (Dict[str, Any]):  Data passed in from the calling function,
                                 containing the attributes from the
                                 calling PubSub message
        context ():  unused
    """
        self.project = os.environ['GCP_PROJECT']
        report_checker = {
            Type.DV360: self._check_dv360_report,
            Type.CM: self._check_cm_report,
            Type.SA360: self._check_sa360_report,
            Type.SA360_RPT: self._check_sa360_report
        }

        documents = list(self.firestore_client.get_all_documents(
            Type._RUNNING))
        logging.info('To process: %s', ','.join([d.id for d in documents]))
        for document in documents:
            run_config = document.get().to_dict()
            T = Type(run_config.get('type'))
            (success, job_config) = \
                self._fetch_schedule(type=T, run_config=run_config)

            try:
                if success:
                    logging.info('Processing job %s', run_config['report_id'])
                    report_checker.get(T, self._invalid_type)(
                        run_config=run_config, job_config=job_config)
                else:
                    # Invalid job; remove this runner
                    self.remove_report_runner(document.id)

            except Exception as e:
                logging.error(gmail.error_to_trace(e))
                self._email_error(message=(
                    f'Error in run monitor for job {run_config["report_id"]}. Report'
                    f' config is: {job_config}'),
                                  error=e)
Exemple #8
0
def main(unused_argv):
    fetcher = fetcher_factory.create_fetcher(Type(FLAGS.product),
                                             email=FLAGS.email,
                                             project=FLAGS.project,
                                             profile=FLAGS.profile)

    if fetcher.report_type == Type.CM:
        if FLAGS.list:
            reports = fetcher.get_reports()
            if reports:
                print('Report list')
                print('')
                for report in reports:
                    print((
                        f'ID [{report["id"]}] on profile [{report["ownerProfileId"]}],'
                        f' "{report["name"]}". Type [{report["type"]}], '
                        f'running {report["schedule"]["repeats"] if report["schedule"]["active"] else "MANUAL"}'
                    ))

        if FLAGS.backup:
            report = fetcher.get_report_definition(
                profile_id=FLAGS.profile,
                report_id=FLAGS.report_id,
            )
            with open(f'config_files/{FLAGS.report_id}.json',
                      'w') as report_file:
                report_file.write(json.dumps(report, indent=2))
                report_file.close()

        if FLAGS.restore:
            keys_wanted = [
                'format', 'name', 'criteria', 'schedule', 'delivery'
            ]

            new_report = {}
            with open(f'config_files/{FLAGS.report_id}.json',
                      'w') as report_file:
                report = json.load(report_file)
                for key in report:
                    if key in keys_wanted:
                        new_report[key] = report[key]

                if FLAGS.new_name:
                    new_report['name'] = FLAGS.new_name

                pprint(new_report)

    else:
        # DV360
        if FLAGS.list:
            reports = fetcher.get_reports()
            if reports:
                print('Report list')
                print('')
                for report in reports.get('queries'):
                    print('ID [{id}], "{name}". Type [{type}], running {run}'.
                          format(id=report['queryId'],
                                 name=report['metadata']['title'],
                                 type=report['params']['type'],
                                 run=report['schedule']['frequency']
                                 if 'schedule' in report else 'MANUAL'))

        if FLAGS.backup:
            report = fetcher.get_report_definition(report_id=FLAGS.report_id)
            with open(f'{FLAGS.report_id}.json', 'w') as report_file:
                report_file.write(json.dumps(report, indent=2))
                report_file.close()

        if FLAGS.restore:
            with open(f'{FLAGS.report_id}.json', 'r') as report_file:
                report = json.load(report_file)
                new_report = fetcher.create_report(report=report)

            with open(f'{FLAGS.report_id}_new.json', 'w') as report_file:
                if isinstance(new_report, str):
                    report_file.write(new_report)

                else:
                    report_file.write(json.dumps(new_report, indent=2))
Exemple #9
0
def report_fetch(event: Dict[str, Any], context=None) -> None:
    """Report fetch request processor

  This is the processor that determines which type of report is to be fetched
  and in turn invokes the Report2BQ process. It scans through the parameters
  sent from the Cloud Scheduler task as part of the PubSub message. These are
  stored in the 'event' object.

  Arguments:
      event (Dict[str, Any]):  data sent from the PubSub message
      context (Dict[str, Any]):  context data. unused.
  """
    if attributes := event.get('attributes'):
        logging.info(attributes)
        project = attributes.get('project', os.environ.get('GCP_PROJECT'))

        try:
            kwargs = {
                'email':
                attributes.get('email'),
                'project':
                project,
                'report_id':
                attributes.get('report_id') or attributes.get('dv360_id')
                or attributes.get('cm_id'),
                'profile':
                attributes.get('profile'),
                'sa360_url':
                attributes.get('sa360_url'),
                'force':
                attributes.get('force', False),
                'append':
                attributes.get('append', False),
                'infer_schema':
                attributes.get('infer_schema', False),
                'dest_project':
                attributes.get('dest_project'),
                'dest_dataset':
                attributes.get('dest_dataset', 'report2bq'),
                'notify_message':
                attributes.get('notify_message'),
                'partition':
                attributes.get('partition')
            }
            kwargs.update(attributes)

            if 'type' in attributes:
                kwargs['product'] = Type(attributes['type'])
            elif kwargs.get('sa360_url'):
                kwargs['product'] = Type.SA360
            elif kwargs.get('profile'):
                kwargs['product'] = Type.CM
            else:
                kwargs['product'] = Type.DV360

            if not APIService(project=project).check_api(
                    kwargs['product'].api_name):
                api_not_enabled(kwargs['product'].fetcher(kwargs['report_id']))
            else:
                Report2BQ(**kwargs).run()

        except Exception as e:
            if email := attributes.get('email'):
                message = gmail.create_error_email(email=email,
                                                   product='Report Fetcher',
                                                   event=event,
                                                   error=e)
                gmail.send_message(message,
                                   credentials=Credentials(
                                       project=os.environ['GCP_PROJECT'],
                                       email=email))

            logging.fatal(f'Error: {gmail.error_to_trace(error=e)}')
            return
Exemple #10
0
def report_runner(event: Dict[str, Any], context=None) -> None:
    """Run a DV360, CM, SA360 or ADH report on demand

  This allows a user to issue the API-based run report directive to start
  unscheduled, unschedulable (ie today-based) or simply control the run time of
  DV360/CM and ADH reports. A job kicked off using this process will be
  monitored by the "run-monitor", or can simply be left if a "fetcher" is
  enabled.

  Arguments:
      event (Dict[str, Any]):  data sent from the PubSub message
      context (Dict[str, Any]):  context data. unused
  """
    email = None

    if attributes := event.get('attributes'):
        project = attributes.get('project', os.environ.get('GCP_PROJECT'))
        T = Type(attributes.get('type'))
        _base_args = {
            'email': attributes.get('email'),
            'project': project,
        }

        if _command := {
                Type.DV360: {
                    'runner':
                    DBMReportRunner if APIService(project=project).check_api(
                        T.api_name) else api_not_enabled,
                    'args': {
                        'dbm_id':
                        attributes.get('dv360_id')
                        or attributes.get('report_id'),
                        **_base_args,
                    },
                },
                Type.CM: {
                    'runner':
                    DCMReportRunner if APIService(project=project).check_api(
                        T.api_name) else api_not_enabled,
                    'args': {
                        'cm_id':
                        attributes.get('cm_id') or attributes.get('report_id'),
                        'profile':
                        attributes.get('profile', None),
                        **_base_args,
                    }
                },
                Type.SA360_RPT: {
                    'runner':
                    SA360ReportRunner if APIService(project=project).check_api(
                        T.api_name) else api_not_enabled,
                    'args': {
                        'report_id': attributes.get('report_id'),
                        'timezone': attributes.get("timezone", None),
                        **_base_args,
                    }
                },
                Type.ADH: {
                    'runner':
                    ADH if APIService(project=project).check_api(T.api_name)
                    else api_not_enabled,
                    'args': {
                        'adh_customer': attributes.get('adh_customer'),
                        'adh_query': attributes.get('adh_query'),
                        'api_key': attributes.get('api_key'),
                        'days': attributes.get('days', 60),
                        'dest_project': attributes.get('dest_project', None),
                        'dest_dataset': attributes.get('dest_dataset', None),
                        **_base_args,
                    }
                },
                Type.GA360_RPT: {
                    'runner':
                    GA360ReportRunner if APIService(project=project).check_api(
                        T.api_name) else api_not_enabled,
                    'args': {
                        'report_id': attributes.get('report_id'),
                        **_base_args,
                    }
                },
        }.get(T):
            _command['runner'](**_command['args']).run()
Exemple #11
0
 def test_valid_internals(self):
     self.assertEqual(
         [Type._ADMIN, Type._JOBS, Type._RUNNING],
         [Type(T) for T in ['administration', 'jobs', 'running']])
Exemple #12
0
 def test_unknown(self):
     self.assertEqual([Type._UNKNOWN, Type._UNKNOWN],
                      [Type(T) for T in ['foo', None]])
Exemple #13
0
 def test_valid_enum_rewrite(self):
     self.assertEqual([Type.DV360, Type.CM, Type.GA360_RPT],
                      [Type(T) for T in ['dbm', 'dcm', 'ga360']])
Exemple #14
0
 def test_valid_current_enum(self):
     self.assertEqual(Type.DV360, Type('dv360'))
Exemple #15
0
def main(unused_argv):
    fetcher = FetcherFactory.create_fetcher(Type(FLAGS.product),
                                            email=FLAGS.email,
                                            project=FLAGS.project,
                                            profile=FLAGS.profile)

    if fetcher.report_type == Type.CM:
        if FLAGS.list:
            reports = fetcher.get_reports()
            if reports:
                print('Report list')
                print('')
                for report in reports:
                    print(
                        'ID [{id}] on profile [{profile}], "{name}". Type [{type}], running {run}'
                        .format(id=report['id'],
                                name=report['name'],
                                type=report['type'],
                                run=report['schedule']['repeats']
                                if report['schedule']['active'] else 'MANUAL',
                                profile=report['ownerProfileId']))

        if FLAGS.backup:
            report = fetcher.get_report_definition(
                profile_id=FLAGS.profile,
                report_id=FLAGS.report_id,
            )
            with open(
                    'config_files/{report}.json'.format(
                        report=FLAGS.report_id), 'w') as report_file:
                report_file.write(json.dumps(report, indent=2))
                report_file.close()

        if FLAGS.restore:
            keys_wanted = [
                'format', 'name', 'criteria', 'schedule', 'delivery'
            ]

            new_report = {}
            with open('config_files/{report}.json'.format(
                    report=FLAGS.report_id)) as report_file:
                report = json.load(report_file)
                for key in report:
                    if key in keys_wanted:
                        new_report[key] = report[key]

                if FLAGS.new_name:
                    new_report['name'] = FLAGS.new_name

                pprint(new_report)

    else:
        # DV360
        if FLAGS.list:
            reports = fetcher.get_reports()
            if reports:
                print('Report list')
                print('')
                for report in reports.get('queries'):
                    print('ID [{id}], "{name}". Type [{type}], running {run}'.
                          format(id=report['queryId'],
                                 name=report['metadata']['title'],
                                 type=report['params']['type'],
                                 run=report['schedule']['frequency']
                                 if 'schedule' in report else 'MANUAL'))
Exemple #16
0
                    'ADH customer id')
flags.DEFINE_string('adh_query',
                    None,
                    'ADH query id')
flags.DEFINE_string('api_key',
                    None,
                    'ADH Developer Key')
flags.DEFINE_integer('days',
                     60,
                     'Number of days lookback, default is 60')

flags.DEFINE_string('product',
                    None,
                    'Product type')
flags.register_validator('product',
                         lambda p: not p or (Type(p).value != 'unknown'),
                         message=('Invalid product type given. Must be one of '
                                  f'{[T for T in list(Type) if T not in [Type._ADMIN, Type._COMPLETED, Type._JOBS, Type._RUNNING, Type._UNKNOWN]]}'))

flags.DEFINE_boolean('force',
                     False,
                     'Force update, regardless of last update time.')
flags.DEFINE_boolean('rebuild_schema',
                     False,
                     'Rescan the file for schema.')
flags.DEFINE_boolean('append',
                     False,
                     ('Append the data to the existing table '
                      'instead of replacing.'))
flags.DEFINE_string('partition', None, 'Create a partitioned table in BQ.')
flags.DEFINE_boolean('in_cloud', True, 'All storage is in GCS.')
Exemple #17
0
def report_runner(event: Dict[str, Any], context=None):
    """Run a DV360, CM, SA360 or ADH report on demand

  This allows a user to issue the API-based run report directive to start unscheduled, unschedulable (ie 
  today-based) or simply control the run time of DV360/CM and ADH reports. A job kicked off using this process
  will be monitored by the "run-monitor", or can simply be left if a "fetcher" is enabled.
  
  Arguments:
      event {Dict[str, Any]} -- data sent from the PubSub message
      context {Dict[str, Any]} -- context data. unused
  """
    email = None

    if 'attributes' in event:
        attributes = event['attributes']
        try:
            logging.info(attributes)
            if 'type' in attributes:
                if Type(attributes['type']) == Type.DV360:
                    dv360_id = attributes.get('dv360_id') or attributes.get(
                        'report_id')
                    email = attributes['email']
                    project = attributes['project'] or os.environ.get(
                        'GCP_PROJECT')

                    runner = DBMReportRunner(dbm_id=dv360_id,
                                             email=email,
                                             project=project)

                elif Type(attributes['type']) == Type.CM:
                    cm_id = attributes.get('cm_id') or attributes.get(
                        'report_id')
                    profile = attributes.get('profile', None)
                    email = attributes['email']
                    project = attributes['project'] or os.environ.get(
                        'GCP_PROJECT')

                    runner = DCMReportRunner(cm_id=cm_id,
                                             profile=profile,
                                             email=email,
                                             project=project)

                elif Type(attributes['type']) == Type.SA360_RPT:
                    report_id = attributes['report_id']
                    email = attributes['email']
                    project = attributes['project'] or os.environ.get(
                        'GCP_PROJECT')
                    timezone = attributes.get("timezone", None)
                    runner = SA360ReportRunner(report_id=report_id,
                                               email=email,
                                               project=project,
                                               timezone=timezone)

                elif Type(attributes['type']) == Type.ADH:
                    adh_customer = attributes['adh_customer']
                    adh_query = attributes['adh_query']
                    api_key = attributes['api_key']
                    email = attributes['email']
                    project = attributes['project'] or os.environ.get(
                        'GCP_PROJECT')
                    days = attributes.get(
                        'days') if 'days' in attributes else 60
                    dest_project = attributes.get(
                        'dest_project'
                    ) if 'dest_project' in attributes else None
                    dest_dataset = attributes.get(
                        'dest_dataset'
                    ) if 'dest_dataset' in attributes else None

                    # Always run this as async: forcing to be certain
                    runner = ADH(email=email,
                                 project=project,
                                 adh_customer=adh_customer,
                                 adh_query=adh_query,
                                 api_key=api_key,
                                 days=days,
                                 dest_project=dest_project,
                                 dest_dataset=dest_dataset)

                else:
                    logging.error(
                        'Invalid report type specified: {type}'.format(
                            type=attributes['type']))
                    return

                runner.run(unattended=True)

            else:
                logging.error('No report type specified.')

        except Exception as e:
            if email:
                email_error(email=email,
                            product="report_runner",
                            event=event,
                            error=e)

            logging.fatal(f'Error: {e}\Event Data supplied: {event}')
            return