Ejemplo n.º 1
0
  def credentials(self) -> Credentials:
    """Fetch the credentials on demand.

    Returns:
        Credentials: credentials
    """
    return Credentials(email=self.email, project=self.project)
Ejemplo n.º 2
0
    def credentials(self) -> Credentials:
        """Generates the Credentials.

    Returns:
        Credentials: the scheduler scredentials.
    """
        return Credentials(email=self.email, project=self.project)
Ejemplo n.º 3
0
    def install(self, config: ManagerConfiguration, **unused) -> None:
        if not self.scheduler:
            logging.warn(
                'No scheduler is available: jobs will be stored but not scheduled.'
            )

        results = []
        random.seed(uuid.uuid4())

        runners = self._read_json(config)
        sa360_report_definitions = \
            self.firestore.get_document(self.report_type, '_reports')

        credentials = {}
        services = {}
        for runner in runners:
            id = f"{runner['report']}_{runner['AgencyId']}_{runner['AdvertiserId']}"
            if not runner['dest_dataset']:
                runner['dest_dataset'] = \
                    f'sa360_hourly_depleted_{runner["country_code"].lower()}'

            if not (description := runner.get('description')):
                description = (
                    f'[{runner["country_code"]}] '
                    f'{runner["title"] if "title" in runner else runner["report"]}: '
                    f'{runner["agencyName"]}/{runner["advertiserName"]}')
                runner['description'] = description

            if not (creds := credentials.get(runner['email'])):
                creds = Credentials(project=config.project,
                                    email=runner['email'])
                credentials[runner['email']] = creds
Ejemplo n.º 4
0
    def validate(self, config: ManagerConfiguration, **unused) -> None:
        sa360_report_definitions = \
            self.firestore.get_document(self.report_type, '_reports')
        validation_results = []

        sa360_objects = self._read_json(config)

        for sa360_object in sa360_objects:
            if sa360_object == '_reports':
                continue
            creds = Credentials(project=config.project,
                                email=sa360_object['email'])
            sa360_service = \
                discovery.get_service(service=Service.SA360, credentials=creds)

            (valid, validation) = \
                self._report_validation(sa360_report_definitions,
                                        sa360_object, sa360_service)
            validation_results.append(validation)

        if validation_results:
            if config.type == ManagerType.BIG_QUERY:
                results = [json.loads(r.to_json()) for r in validation_results]
                # write to BQ
                client = bigquery.Client(project=config.project)
                table = client.dataset(
                    config.dataset).table('sa360_validation')
                job_config = bigquery.LoadJobConfig(
                    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
                    source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON)

                client.load_table_from_json(results,
                                            table,
                                            job_config=job_config)

            else:
                csv_output = f'{config.email}-<now>-validation.csv'
                if config.gcs_stored:
                    csv_bytes = io.StringIO()
                    writer = csv.DictWriter(csv_bytes,
                                            fieldnames=Validation.keys(),
                                            quoting=csv.QUOTE_ALL)
                    writer.writeheader()
                    writer.writerows([r.to_dict() for r in validation_results])
                    Cloud_Storage(project=config.project,
                                  email=config.email).write_file(
                                      bucket=self.bucket,
                                      file=csv_output,
                                      data=csv_bytes.getvalue())

                else:
                    with open(csv_output, 'w') as csv_file:
                        writer = csv.DictWriter(csv_file,
                                                fieldnames=Validation.keys(),
                                                quoting=csv.QUOTE_ALL)
                        writer.writeheader()
                        writer.writerows(
                            [r.to_dict() for r in validation_results])
Ejemplo n.º 5
0
class ReportRunner(object):
    report_type = None
    project = None
    email = None

    @lazy_property
    def firestore(self) -> Firestore:
        return Firestore(project=self.project, email=self.email)

    def run(self, unattended: bool):
        """Run the report.

    Args:
        unattended (bool): log the report for later or wait for the result
    """
        pass

    def _email_error(self,
                     message: str,
                     email: str = None,
                     error: Exception = None) -> None:
        """Email the error to the administrator

    Send an email (with errors) to the administrator and/or job owner.

    Args:
        message (str): the message.
        email (str, optional): job owner's email. Defaults to None.
        error (Exception, optional): any error found. Defaults to None.
    """
        _to = [email] if email else []
        _administrator = \
          os.environ.get('ADMINISTRATOR_EMAIL') or self.FIRESTORE.get_document(
            Type._ADMIN, 'admin').get('email')
        _cc = [_administrator] if _administrator else []

        if _trace := \
        ''.join(traceback.TracebackException.from_exception(error).format()) \
          if error else None:
            _trace = 'Error\n\n' + _trace

        if _to or _cc:
            message = GMailMessage(to=_to,
                                   cc=_cc,
                                   subject=f'Error in report_loader',
                                   body=f'{message}{_trace if _trace else ""}',
                                   project=os.environ.get('GCP_PROJECT'))

            GMail().send_message(message=message,
                                 credentials=Credentials(
                                     email=email,
                                     project=os.environ.get('GCP_PROJECT')))
Ejemplo n.º 6
0
  def oauth_complete(self, request: Request):
    if(request.get_data()):
      app.logger.info(f'data:\n{request.get_data()}')
      auth_code = str(request.get_data(), encoding='utf-8')

    else:
      app.logger.error('No code sent!')
      return 'AUTH FAIL: No authentication code received.'

    project_credentials = Credentials(project=self.project,
                                      email=None).project_credentials

    credentials = client.credentials_from_code(
        client_id=project_credentials.client_id,
        client_secret=project_credentials.client_secret,
        scope=self.SCOPES,
        code=auth_code
    )

    email = credentials.id_token['email']
    cm = Credentials(project=self.project, email=email)
    cm.store_credentials(creds=credentials)

    return 'Authenticated!'
Ejemplo n.º 7
0
def index() -> jinja2.Template:
    """The index method for the appengine.

  Returns:
      Template: The completed html template
  """
    project = os.environ['GOOGLE_CLOUD_PROJECT']
    bucket = f'{project}-report2bq-tokens'

    user_email, user_id = user()

    data = {}

    creds = Credentials(project=project, email=user_email)
    try:
        template = JINJA_ENVIRONMENT.get_template('index.html')
        running_jobs = Scheduler().process(**{
            'action': 'list',
            'project': project,
            'email': user_email
        })
        jobs = []
        for job in running_jobs:
            with suppress(ValueError, KeyError, TypeError):
                _attrs = job.get('pubsubTarget', {}).get('attributes', {})
                _def = Type(_attrs['type'])
                j = {
                    'id': job['name'].split('/')[-1],
                    'description': job['description']
                    if 'description' in job else '-- No description given --',
                    'type': _def,
                    'schedule': job['schedule'],
                    'timezone': job['timeZone'],
                }

                j['attributes'] = switch(_def, _attrs)
                jobs.append(j)

        data = {'jobs': jobs, 'user_email': user_email}

    except CredentialsError as e:
        template = JINJA_ENVIRONMENT.get_template('authenticate.html')
        data = {
            'email': user_email,
            'client_id': creds.project_credentials.client_id,
        }

    return template.render(data)
Ejemplo n.º 8
0
    def __init__(self,
                 email: str,
                 project: str,
                 append: bool = False,
                 infer_schema: bool = False):
        self.email = email
        self.project = project
        self.creds = Credentials(email=email, project=project)
        self.credentials = storage.Client()._credentials
        self.transport = AuthorizedSession(credentials=self.credentials)
        self.append = append
        self.infer_schema = infer_schema

        self.firestore = Firestore(email=email, project=project)

        self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64))
        self.bucket = f'{self.project}-report2bq-upload'
Ejemplo n.º 9
0
def authenticate() -> jinja2.Template:
    """Runs the OAuth2 authentication flow.

  This calls the OAuth flow for an unknown (but valid) user, or one who is
  reauthenticating.

  Returns:
      Template: the JINJA template
  """
    project = os.environ['GOOGLE_CLOUD_PROJECT']
    bucket = f'{project}-report2bq-tokens'

    user_email, user_id = user()
    template = JINJA_ENVIRONMENT.get_template('authenticate.html')
    creds = Credentials(project=project, email=None)
    data = {
        'email': user_email,
        'client_id': creds.project_credentials.client_id,
    }
    return template.render(data)
Ejemplo n.º 10
0
    def __init__(self,
                 email: str,
                 project: str,
                 append: bool = False,
                 infer_schema: bool = False) -> SA360Web:
        self.email = email
        self.project = project
        self.creds = Credentials(email=email, project=project)
        self.credentials = storage.Client()._credentials
        self.transport = AuthorizedSession(credentials=self.credentials)
        self.append = append
        self.infer_schema = infer_schema

        self.firestore = Firestore(email=email, project=project)

        # chunk_multiplier is set in the environment, but defaults to 64 - this
        # leads to a 64M chunk size we can throw around. Given the memory
        # constraints of a cloud function this seems like a good, safe number.
        self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64))
        self.bucket = f'{self.project}-report2bq-upload'
Ejemplo n.º 11
0
    def install(self, config: report_manager.ManagerConfiguration,
                **unused) -> None:
        if not self.scheduler:
            logging.warn(
                'No scheduler is available: jobs will be stored but not scheduled.'
            )

        results = []
        random.seed(uuid.uuid4())
        runners = self._read_json(config)

        for runner in runners:
            id = f'{runner["report"]}_{runner["view_id"]}'
            creds = Credentials(project=config.project,
                                email=runner.get('email'))
            service = discovery.get_service(service=Service.GA360,
                                            credentials=creds)

            self.firestore.update_document(type=self.report_type,
                                           id=id,
                                           new_data=runner)

            # Now schedule.
            if self.scheduler:
                if not (description := runner.get('description')):
                    if title := runner.get('title'):
                        description = title
                    else:
                        description = (
                            f'Runner: report {runner.get("report")}, '
                            f'view_id {runner.get("view_id")}.')
                    runner['description'] = description

                runner['hour'] = runner.get('hour') or '1'
                results.append(
                    self._schedule_job(project=config.project,
                                       runner=runner,
                                       id=id))
Ejemplo n.º 12
0
def report_fetch(event: Dict[str, Any], context=None) -> None:
    """Report fetch request processor

  This is the processor that determines which type of report is to be fetched
  and in turn invokes the Report2BQ process. It scans through the parameters
  sent from the Cloud Scheduler task as part of the PubSub message. These are
  stored in the 'event' object.

  Arguments:
      event (Dict[str, Any]):  data sent from the PubSub message
      context (Dict[str, Any]):  context data. unused.
  """
    if attributes := event.get('attributes'):
        logging.info(attributes)
        project = attributes.get('project', os.environ.get('GCP_PROJECT'))

        try:
            kwargs = {
                'email':
                attributes.get('email'),
                'project':
                project,
                'report_id':
                attributes.get('report_id') or attributes.get('dv360_id')
                or attributes.get('cm_id'),
                'profile':
                attributes.get('profile'),
                'sa360_url':
                attributes.get('sa360_url'),
                'force':
                attributes.get('force', False),
                'append':
                attributes.get('append', False),
                'infer_schema':
                attributes.get('infer_schema', False),
                'dest_project':
                attributes.get('dest_project'),
                'dest_dataset':
                attributes.get('dest_dataset', 'report2bq'),
                'notify_message':
                attributes.get('notify_message'),
                'partition':
                attributes.get('partition')
            }
            kwargs.update(attributes)

            if 'type' in attributes:
                kwargs['product'] = Type(attributes['type'])
            elif kwargs.get('sa360_url'):
                kwargs['product'] = Type.SA360
            elif kwargs.get('profile'):
                kwargs['product'] = Type.CM
            else:
                kwargs['product'] = Type.DV360

            if not APIService(project=project).check_api(
                    kwargs['product'].api_name):
                api_not_enabled(kwargs['product'].fetcher(kwargs['report_id']))
            else:
                Report2BQ(**kwargs).run()

        except Exception as e:
            if email := attributes.get('email'):
                message = gmail.create_error_email(email=email,
                                                   product='Report Fetcher',
                                                   event=event,
                                                   error=e)
                gmail.send_message(message,
                                   credentials=Credentials(
                                       project=os.environ['GCP_PROJECT'],
                                       email=email))

            logging.fatal(f'Error: {gmail.error_to_trace(error=e)}')
            return
Ejemplo n.º 13
0
 def credentials(self) -> Credentials:
     return Credentials(project=self._project, email=self._email)
Ejemplo n.º 14
0
                                                  column_types=None)
                runner['schema'] = schema
                self.firestore.update_document(self.report_type,
                                               self._report_id, runner)

                # Stream CSV to GCS. Should beable to use un-threaded streamer.
                # We look for a 'CHUNK_MULTIPLIER' setting in the environment, like
                # everywhere else, but default to 128, making the standard chunk
                # size we process 128Mb. Well within the 4Gb we're allowed for a
                # cloud function. If they turn out to be bigger than this (which I
                # # don't believe GA360 reports will be), we should move to the
                # ThreadedGCSObjectStreamUpload version.
                chunk_size = os.environ.get('CHUNK_MULTIPLIER',
                                            128) * 1024 * 1024
                streamer = GCSObjectStreamUpload(
                    creds=Credentials(email=self._email,
                                      project=self._project).credentials,
                    bucket_name=f'{self._project}-report2bq-upload',
                    blob_name=f'{self._report_id}.csv',
                    chunk_size=chunk_size)
                streamer.begin()

                output_buffer.seek(0)
                with output_buffer as source:
                    chunk = source.read(chunk_size).encode('utf-8')
                    streamer.write(chunk)

                streamer.stop()
                # Profit!!!

        except Exception as e:
            self._email_error(
Ejemplo n.º 15
0
 def client(self):
     return firestore.Client() if self._in_cloud else \
         firestore.Client(credentials=Credentials(email=self._email,
                          project=self._project).get_credentials())