Ejemplo n.º 1
0
    def firestore(self) -> Firestore:
        """The Firestore client wrapper

    Returns:
        Firestore: the wrapper
    """
        return Firestore()
Ejemplo n.º 2
0
  def __init__(self, 
    email: str, project: str, adh_customer: str,
    adh_query: str, api_key: str, days: int,
    dest_project: str=None, dest_dataset: str=None):
    """Constructor

    Setus up the ADH helper
    
    Arguments:
        email {str} -- authenticated user email (for the token)
        project {str} -- GCP project
        adh_customer {str} -- ADH customer id, 9-digit number, NO DASHES
        adh_query {str} -- ADH query id
        api_key {str} -- API Key (has to be set up in APIs and Libraries in GCP)
        days {int} -- Lookback window (default: 60)
        dest_project {str} -- target GCP project for results
        dest_dataset {str} -- target BQ dataset for results
    """
    self.email = email
    self.project = project
    self.adh_customer = adh_customer
    self.adh_query = adh_query
    self.api_key = api_key
    self.days = days
    self.dest_project = dest_project
    self.dest_dataset = dest_dataset

    self.credentials = Credentials(email=email, project=project)
    self.storage = Cloud_Storage(email=email, project=project)
    self.firestore = Firestore(email=email, project=project)
Ejemplo n.º 3
0
  def firestore(self) -> Firestore:
    """Fetch the Firestore client on demand.

    Returns:
        Firestore: firestore client
    """
    return Firestore()
Ejemplo n.º 4
0
    def manage(self, **kwargs):
        firestore = Firestore(project=kwargs['project'], email=kwargs['email'])

        args = {
            'report':
            kwargs.get(
                'name',
                kwargs.get('file').split('/')[-1].split('.')[0]
                if kwargs.get('file') else None),
            'file':
            kwargs.get('file'),
            'firestore':
            firestore,
            'project':
            kwargs['project'],
            'email':
            kwargs['email'],
            **kwargs,
        }

        action = {
            'list': self.list_all,
            'show': self.show,
            'add': self.add,
            'delete': self.delete,
        }.get(kwargs['action'])

        if action:
            return action(**args)

        else:
            raise NotImplementedError()
Ejemplo n.º 5
0
 def __init__(self, cm_id: str=None, profile: str=None,
              email: str=None, project: str=None):
   self.email = email
   self.cm_id = cm_id
   self.cm_profile = profile
   self.project = project
   self.firestore = Firestore(email=email, project=project)
Ejemplo n.º 6
0
  def __init__(self, report_id: str, email: str, project: str=None, timezone: str=None):
    self.email = email
    self.report_id = report_id
    self.project = project
    self.timezone = timezone

    self.firestore = Firestore()
Ejemplo n.º 7
0
 def __init__(self,
              dbm_id: str = None,
              email: str = None,
              project: str = None):
     self.email = email
     self.dbm_id = dbm_id
     self.project = project
     self.firestore = Firestore(email=email, project=project)
Ejemplo n.º 8
0
def main(unusedargv):
    scheduler = Scheduler()
    with open(FLAGS.file) as reports:
        runners = json.loads(''.join(reports.readlines()))

        for runner in runners:
            id = f"{runner['report']}_{runner['AgencyId']}_{runner['AdvertiserId']}"
            Firestore().update_document(Type.SA360_RPT, f'{id}', runner)
Ejemplo n.º 9
0
    def __init__(self,
                 email: str,
                 project: str,
                 append: bool = False,
                 infer_schema: bool = False):
        self.email = email
        self.project = project
        self.creds = Credentials(email=email, project=project)
        self.credentials = storage.Client()._credentials
        self.transport = AuthorizedSession(credentials=self.credentials)
        self.append = append
        self.infer_schema = infer_schema

        self.firestore = Firestore(email=email, project=project)

        self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64))
        self.bucket = f'{self.project}-report2bq-upload'
Ejemplo n.º 10
0
    def oauth_init(self, request: Request, project: str, email: str):
        project_credentials = json.loads(Files.fetch_file(
            '{project}-report2bq-tokens'.format(project=project),
            'client_secrets.json'),
                                         encoding='utf-8')

        _flow = flow.Flow.from_client_config(client_config=project_credentials,
                                             scopes=self.SCOPES)

        _flow.redirect_uri = f"https://{os.environ.get('FUNCTION_REGION')}-{os.environ.get('GCP_PROJECT')}.cloudfunctions.net/OAuthComplete"

        authorization_url, state = _flow.authorization_url(
            access_type='offline', include_granted_scopes='true')

        firestore = Firestore()
        firestore.store_oauth_state(state=state, email=email, project=project)

        return redirect(authorization_url)
Ejemplo n.º 11
0
    def __init__(self,
                 email: str,
                 project: str,
                 append: bool = False,
                 infer_schema: bool = False) -> SA360Web:
        self.email = email
        self.project = project
        self.creds = Credentials(email=email, project=project)
        self.credentials = storage.Client()._credentials
        self.transport = AuthorizedSession(credentials=self.credentials)
        self.append = append
        self.infer_schema = infer_schema

        self.firestore = Firestore(email=email, project=project)

        # chunk_multiplier is set in the environment, but defaults to 64 - this
        # leads to a 64M chunk size we can throw around. Given the memory
        # constraints of a cloud function this seems like a good, safe number.
        self.chunk_multiplier = int(os.environ.get('CHUNK_MULTIPLIER', 64))
        self.bucket = f'{self.project}-report2bq-upload'
Ejemplo n.º 12
0
    def oauth_complete(self, request: Request):
        logging.info(request.args)

        state = request.args.get('state', type=str)
        firestore = Firestore()
        email, project = firestore.get_oauth_state(state)

        project_credentials = json.loads(Files.fetch_file(
            '{project}-report2bq-tokens'.format(project=project),
            'client_secrets.json'),
                                         encoding='utf-8')

        _flow = flow.Flow.from_client_config(client_config=project_credentials,
                                             scopes=self.SCOPES)
        _flow.redirect_uri = f"https://{os.environ.get('FUNCTION_REGION')}-{os.environ.get('GCP_PROJECT')}.cloudfunctions.net/OAuthComplete"

        r = urlparse(request.url)
        auth_response = urlunparse(
            ['https', r.netloc, r.path, r.params, r.query, r.fragment])
        _flow.fetch_token(authorization_response=auth_response)

        logging.info(_flow.credentials)

        token_details = {
            'access_token': _flow.credentials.token,
            'refresh_token': _flow.credentials.refresh_token
        }

        Cloud_Storage.write_file(
            '{project}-report2bq-tokens'.format(project=project),
            '{email}_user_token.json'.format(email=email),
            json.dumps(token_details).encode('utf-8'))

        firestore.delete_oauth_state(state=state)

        return 'Ok'
Ejemplo n.º 13
0
    def __init__(self,
                 product: Type,
                 email=None,
                 project=None,
                 report_id=None,
                 profile=None,
                 sa360_url=None,
                 force: bool = False,
                 append: bool = False,
                 infer_schema: bool = False,
                 dest_project: str = None,
                 dest_dataset: str = 'report2bq',
                 notify_topic: str = None,
                 notify_message: str = None):
        self.product = product

        self.force = force
        self.email = email
        self.append = append
        self.infer_schema = infer_schema

        self.report_id = report_id

        self.sa360_url = unquote(sa360_url) if sa360_url else None

        self.cm_profile = profile

        self.project = project

        self.dest_project = dest_project
        self.dest_dataset = dest_dataset

        self.notify_topic = notify_topic
        self.notify_message = notify_message

        self.firestore = Firestore(email=email, project=project)
Ejemplo n.º 14
0
    def check_running_report(self, config: Dict[str, Any]):
        """Check a running CM report for completion
    
    Arguments:
        report {Dict[str, Any]} -- The report data structure from Firestore
    """
        append = config['append'] if config and 'append' in config else False
        response = self.report_state(report_id=config['id'],
                                     file_id=config['report_file']['id'])
        status = response[
            'status'] if response and 'status' in response else 'UNKNOWN'

        logging.info('Report {report} status: {status}'.format(
            report=config['id'], status=status))
        firestore = Firestore(email=email, project=project)
        if status == 'REPORT_AVAILABLE':
            # Remove job from running
            firestore.remove_report_runner(config['id'])

            # Send pubsub to trigger report2bq now
            topic = 'projects/{project}/topics/report2bq-trigger'.format(
                project=self.project)
            pubsub = pubsub.PublisherClient()
            pubsub.publish(topic=topic,
                           data=b'RUN',
                           cm_id=config['id'],
                           profile=config['profile_id'],
                           email=config['email'],
                           append=str(append),
                           project=self.project)

        elif status == 'FAILED' or status == 'CANCELLED':
            # Remove job from running
            logging.error(
                f'Report {config["id"]}: {inflection.humanize(status)}.')
            firestore.remove_report_runner(config['id'])
Ejemplo n.º 15
0
class ReportLoader(object):
  """Run the report loading process
  
  This performs the CSV import into BQ. It is triggered by a finalize/create on a
  monitored GCS bucket, and will ONLY process CSVs. All other files written to that
  bucket will result in an error in the logs. The file must named the same as the report
  id that is stored in Firestore - this is how the process knows which table/schema to use.

  Once started, the BQ Import Job (of type google.cloud.bigquery.LoadJob) is stored in 
  Firestore, under the 'jobs' key. This is then monitored for completion by JobMonitor.
  """
  CS = storage.Client()     # uses default service account credentials
  FIRESTORE = Firestore()   # uses default service account credentials


  def process(self, data: Dict[str, Any], context):
    """Process an added file

    This is the entry point for the Cloud Function to create the BQ import job.
    
    Arguments:
        event {Dict[str, Any]} -- data sent from the PubSub message
        context {Dict[str, Any]} -- context data. unused
    """
    logging.info(data)
    bucket_name = data['bucket']
    file_name = data['name']

    if file_name.upper().endswith('CSV'):
      logging.info('Processing CSV file %s' % file_name)

      try:
        self._handle_csv(bucket_name, file_name)

      except Exception as e:
        logging.error('Error processing file %s\n%s' % (file_name, e))

    else:
      # Ignore it, it's probably the schema
      logging.warn('File added that will not be processed: %s' % file_name)


  def _get_report_config(self, id: str) -> (Type, Dict[str, Any]):
    """Fetch the report configuration

    Load the stored report configuration from Firestore and return the report type
    and config as a tuple
    
    Arguments:
        id {int} -- Report Id, aka CSV file name
    
    Returns:
        (Type, Dict[str, Any]) -- Tuple containing the report type as an Enum, and the
        report configuration.
    """
    config = None
    for config_type in [Type.DV360, Type.CM, Type.SA360, Type.SA360_RPT]:
      config = self.FIRESTORE.get_report_config(config_type, id)
      if config: return config_type, config

    return None, None


  def _handle_csv(self, bucket_name: str, file_name: str):
    """Handle the CSV file

    Work out which type of job it is and send it to the appropriate uploader
    
    Arguments:
        bucket_name {str} -- name of the source bucket
        file_name {str} -- name of the CSV file
    """
    # Load config file. Must be present to continue
    # This could be either DBM/DV360 or (D)CM
    report_id = file_name.split('/')[-1].split('.')[0]
    config_type, config = self._get_report_config(report_id)

    if not config_type:
      self._email_error(f'No config found for report {report_id}')
      raise Exception(f'No config found for report {report_id}')

    logging.info(config)

    # Insert with schema and table name from config
    if config_type == Type.DV360:
      job = self._import_dbm_report(bucket_name, file_name, config)

    elif config_type == Type.CM:
      job = self._import_dcm_report(bucket_name, file_name, config)

    elif config_type == Type.SA360:
      job = self._import_sa360_report(bucket_name, file_name, config)

    elif config_type == Type.SA360_RPT:
      job = self._import_sa360_report(bucket_name, file_name, config)

    # Store the completed job in Firestore
    if job:
       self.FIRESTORE.store_import_job_details(report_id, job)


  def _import_dbm_report(self, bucket_name, file_name, config) -> bigquery.LoadJob:
    """Begin DV360 import

    These functions are identical, but need not be (used not to be) to reflect the fact that at
    some point, each product's CSVs could be subtly different, or that on product or another may
    switch from CSV to (say) json.
    
    Arguments:
        bucket_name {str} -- GCS bucket name
        file_name {str} -- CSV file name
        config {Dict[str, Any]} -- report config
    
    Returns:
        bigquery.LoadJob
    """
    return self._import_report(bucket_name, file_name, config)


  def _import_dcm_report(self, bucket_name, file_name, config):
    """Begin CM import

    These functions are identical, but need not be (used not to be) to reflect the fact that at
    some point, each product's CSVs could be subtly different, or that on product or another may
    switch from CSV to (say) json.
    
    Arguments:
        bucket_name {str} -- GCS bucket name
        file_name {str} -- CSV file name
        config {Dict[str, Any]} -- report config
    
    Returns:
        bigquery.LoadJob
    """
    return self._import_report(bucket_name, file_name, config)


  def _import_sa360_report(self, bucket_name, file_name, config):
    """Begin SA360 import

    These functions are identical, but need not be (used not to be) to reflect the fact that at
    some point, each product's CSVs could be subtly different, or that on product or another may
    switch from CSV to (say) json.
    
    Arguments:
        bucket_name {str} -- GCS bucket name
        file_name {str} -- CSV file name
        config {Dict[str, Any]} -- report config
    
    Returns:
        bigquery.LoadJob
    """
    return self._import_report(bucket_name, file_name, config)


  def _import_report(self, bucket_name: str, file_name: str, config: dict) -> bigquery.LoadJob:
    """Begin CSV import

    Create and start the Big Query import job.

    Arguments:
        bucket_name {str} -- GCS bucket name
        file_name {str} -- CSV file name
        config {Dict[str, Any]} -- report config
    
    Returns:
        bigquery.LoadJob
    """
    if config.get('dest_project'):
      # authenticate against supplied project with supplied key
      project = config.get('dest_project') or os.environ.get('GCP_PROJECT')
      client_key = json.loads(Cloud_Storage.fetch_file(
        bucket=f"{os.environ.get('GCP_PROJECT')}-report2bq-tokens",
        file=f"{config['email']}_user_token.json"
      ))
      server_key = json.loads(Cloud_Storage.fetch_file(
        bucket=f"{os.environ.get('GCP_PROJECT')}-report2bq-tokens",
        file='client_secrets.json'
      ))
      client_key['client_id'] = (server_key.get('web') or server_key.get('installed')).get('client_id')
      client_key['client_secret'] = (server_key.get('web') or server_key.get('installed')).get('client_secret')
      logging.info(client_key)
      creds = Credentials.from_authorized_user_info(client_key)
      bq = bigquery.Client(project=project, credentials=creds)

    else:
      project = os.environ.get('GCP_PROJECT')
      bq = bigquery.Client()

    dataset = config.get('dest_dataset') or os.environ.get('BQ_DATASET') or 'report2bq'

    table_name = config.get('table_name', CSVHelpers.sanitize_string(file_name))
    logging.info(f'bucket {bucket_name}, table {table_name}, file_name {file_name}')

    json_schema = config['schema']
    schema = []
    _json_schema = []
    # Build the json format schema that the BQ LoadJob requires from the text-based ones in the config
    for field in json_schema:
      f = bigquery.schema.SchemaField(name=field['name'],
                                      field_type=field['type'],
                                      mode=field['mode'])
      schema.append(f)
      _json_schema.append(f'{field["name"]}: {field["type"]}')

    table_ref = bq.dataset(dataset).table(table_name)

    # Default action is to completely replace the table each time. If requested, however then
    # we can do an append for (say) huge jobs where you would see the table with 60 days once
    # and then append 'yesterday' each day.
    if config.get('append', False):
      if self._table_exists(bq, table_ref) and not self._validate_schema(bq, table_ref, schema):
        config_schema = '\n'.join([ f'{field.name}, {field.field_type}' for field in schema])
        target_schema = '\n'.join([ f'{field.name}, {field.field_type}' for field in bq.get_table(table_ref).schema])
        self._email_error(
          email=config['email'], 
          message=f'''
Mismatched schema for {project}.{dataset}.{table_name}, trying anyway

Report has schema:
{config_schema}

Table has schema:
{target_schema}
'''
        )
        logging.error(f"Mismatched schema for {project}.{dataset}.{table_name}, trying anyway")

      import_type = bigquery.WriteDisposition.WRITE_APPEND
      
    else:
      import_type = bigquery.WriteDisposition.WRITE_TRUNCATE

    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = import_type
    # Assume a CSV header is the first line unless otherwise specified in the report's own config
    job_config.skip_leading_rows = config.get('csv_header_length', 1)
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.schema = schema
    # Allow a few errors, just in case
    job_config.max_bad_records = 10
    # Allow for DV360/CM (SA360 won't) to pass jagged rows, which they do
    job_config.allow_jagged_rows = True
    
    uri = f'gs://{bucket_name}/{file_name}'
    load_job = bq.load_table_from_uri(
        uri, table_ref, job_config=job_config
    )  # API request
    logging.info(f'Starting CSV import job {load_job.job_id}')

    return load_job


  def _table_exists(self, bq: bigquery.Client, table_ref: bigquery.TableReference) -> bool:
    try:
        bq.get_table(table_ref)
        return True

    except NotFound:
        return False


  def _validate_schema(self, bq: bigquery.Client, table_ref: bigquery.TableReference, schema: List[bigquery.schema.SchemaField]) -> bool:
    _table = bq.get_table(table_ref)
    _schema = _table.schema

    return _schema == schema


  def _email_error(self, message: str, email: str=None, error: Exception=None) -> None:
    _to = [email] if email else []
    _administrator = os.environ.get('ADMINISTRATOR_EMAIL') or self.FIRESTORE.get_document(Type._ADMIN, 'admin').get('email')
    _cc = [_administrator] if _administrator else []

    if _to or _cc:
      message = GMailMessage(
        to=_to, 
        cc=_cc,
        subject=f'Error in report_loader',
        body=f'''
{message}

Error: {error if error else 'No exception.'}
''', 
        project=os.environ.get('GCP_PROJECT'))

      GMail().send_message(
        message=message,
        credentials=Report2BQCredentials(email=email, project=os.environ.get('GCP_PROJECT'))
      )
Ejemplo n.º 16
0
 def firestore(self) -> Firestore:
     return Firestore(project=self.project, email=self.email)
Ejemplo n.º 17
0
    if args.get('encode_key'):
        key = encode_key(_key)

    else:
        key = _key

    src_data['email'] = _key

    if args.get('local_store'):
        from classes.local_datastore import LocalDatastore
        f = LocalDatastore()

    if args.get('firestore'):
        from classes.firestore import Firestore
        f = Firestore()

    if args.get('secret_manager'):
        from classes.secret_manager import SecretManager
        f = SecretManager(project=_project, email=args.get('email'))

    f.update_document(type=Type._ADMIN, id=key, new_data=src_data)


def main(unused_argv):
    event = {
        'key': FLAGS.key,
        'file': FLAGS.file,
        'encode_key': FLAGS.encode_key,
        'local_store': FLAGS.local,
        'firestore': FLAGS.firestore,
Ejemplo n.º 18
0
    def process(self, data: Dict[str, Any], context):
        """Check all the running jobs
    
    Arguments:
      event {Dict[str, Any]} -- data sent from the PubSub message
      context {Dict[str, Any]} -- context data. unused
    """
        firestore = Firestore(in_cloud=True, email=None, project=None)
        documents = firestore.get_all_jobs()

        for document in documents:
            for T in [t for t in Type if not t.name.startswith('_')]:
                config = firestore.get_report_config(T, document.id)

                if config:
                    if config.get('dest_project'):
                        # authenticate against supplied project with supplied key
                        project = config.get('dest_project') or os.environ.get(
                            'GCP_PROJECT')
                        client_key = json.loads(
                            Cloud_Storage.fetch_file(
                                bucket=
                                f"{os.environ.get('GCP_PROJECT') or 'galvanic-card-234919'}-report2bq-tokens",
                                file=f"{config['email']}_user_token.json"))
                        server_key = json.loads(
                            Cloud_Storage.fetch_file(
                                bucket=
                                f"{os.environ.get('GCP_PROJECT') or 'galvanic-card-234919'}-report2bq-tokens",
                                file='client_secrets.json'))
                        client_key['client_id'] = (
                            server_key.get('web')
                            or server_key.get('installed')).get('client_id')
                        client_key['client_secret'] = (
                            server_key.get('web') or
                            server_key.get('installed')).get('client_secret')
                        logging.info(client_key)
                        creds = Credentials.from_authorized_user_info(
                            client_key)
                        bq = bigquery.Client(project=project,
                                             credentials=creds)

                    else:
                        bq = bigquery.Client()

                    api_repr = document.get().to_dict()
                    if api_repr:
                        try:
                            job = LoadJob.from_api_repr(api_repr, bq)
                            job.reload()

                            if job.state == 'DONE':
                                if job.error_result:
                                    logging.error(job.errors)

                                self._handle_finished(job=job,
                                                      id=document.id,
                                                      config=config,
                                                      report_type=T)
                                firestore.mark_import_job_complete(
                                    document.id, job)

                        except Exception as e:
                            logging.error(
                                f"""Error loading job {document.id} for monitoring."""
                            )

                    break
Ejemplo n.º 19
0
class RunMonitor(object):
    """Run the process watching running DV360/CM jobs

  This process is triggered by a Cloud Scheduler job every 5 minutes to watch the Firestore-held list
  list of jobs for running DV360/CM processes. If one is discovered to have completed, the Report2BQ
  process is invoked in the normal manner (via a PubSub message to the trigger queue).

  This process is not 100% necessary; if a report is defined with a "fetcher", then the fetcher will 
  run as usual every hour and will pick up the change anyway. On the other hand, it allows for a user
  to schedule a quick report to run (say) every 30 minutes, and not create a "fetcher" since this process
  takes the "fetcher"'s place.
  """

    firestore = Firestore()
    PS = pubsub.PublisherClient()

    schedules = None

    def process(self, data: Dict[str, Any], context):
        """[summary]
    
    Arguments:
        data {Dict[str, Any]} -- Data passed in from the calling function, containing the attributes from the
                                 calling PubSub message
        context {} -- unused
    """
        self.project = os.environ['GCP_PROJECT']
        report_checker = {
            Type.DV360: self._check_dv360_report,
            Type.CM: self._check_cm_report,
            Type.SA360: self._check_sa360_report,
            Type.SA360_RPT: self._check_sa360_report
        }

        try:
            documents = self.firestore.get_all_running()
            for document in documents:
                with suppress(ValueError):
                    run_config = document.get().to_dict()
                    T = Type(run_config['type'])
                    # config = self.firestore.get_report_config(T, document.id)
                    job_config = self._fetch_schedule(type=T,
                                                      run_config=run_config)
                    report_checker[T](run_config=run_config,
                                      job_config=job_config)
                    # break
                    # else:
                    #   logging.error(f'Invalid report: {document.get().to_dict()}')

        except Exception as e:
            logging.error(e)

    def _fetch_schedule(self, type: Type,
                        run_config: Dict[str, Any]) -> Dict[str, Any]:
        scheduler = Scheduler()
        (success, job_config) = scheduler.process({
            'action':
            'get',
            'project':
            os.environ['GCP_PROJECT'],
            'email':
            run_config['email'],
            'html':
            False,
            'job_id':
            type.runner(run_config['report_id'])
        })

        return job_config

    def _check_dv360_report(self, job_config: Dict[str, Any],
                            run_config: Dict[str, Any]):
        """Check a running DV360 report for completion
    
    Arguments:
        report {Dict[str, Any]} -- The report data structure from Firestore
    """
        job_attributes = job_config['pubsubTarget']['attributes']
        dbm = DBM(email=job_attributes['email'], project=self.project)
        status = dbm.report_state(job_attributes['report_id'])
        append = job_attributes[
            'append'] if job_attributes and 'append' in job_attributes else False

        logging.info('Report {report} status: {status}'.format(
            report=job_attributes['report_id'], status=status))

        if status == 'DONE':
            # Remove job from running
            self.firestore.remove_report_runner(job_attributes['report_id'])

            # Send pubsub to trigger report2bq now
            topic = job_config['pubsubTarget']['topicName']
            self.PS.publish(topic=topic, data=b'RUN', **job_attributes)

        elif status == 'FAILED':
            # Remove job from running
            logging.error(f'Report {run_config["report_id"]} failed!')
            self.firestore.remove_report_runner(run_config['report_id'])

    def _check_cm_report(self, job_config: Dict[str, Any],
                         run_config: Dict[str, Any]):
        """Check a running CM report for completion
    
    Arguments:
        report {Dict[str, Any]} -- The report data structure from Firestore
    """
        job_attributes = job_config['pubsubTarget']['attributes']
        dcm = DCM(email=job_attributes['email'],
                  project=self.project,
                  profile=job_attributes['profile_id'])
        append = job_attributes[
            'append'] if job_attributes and 'append' in job_attributes else False
        # TODO: Add report_file.id to run_config
        response = dcm.report_state(report_id=job_attributes['report_id'],
                                    file_id=run_config['report_file']['id'])
        status = response[
            'status'] if response and 'status' in response else 'UNKNOWN'

        logging.info('Report {report} status: {status}'.format(
            report=job_attributes['report_id'], status=status))
        if status == 'REPORT_AVAILABLE':
            # Remove job from running
            self.firestore.remove_report_runner(job_attributes['report_id'])

            # Send pubsub to trigger report2bq now
            topic = 'projects/{project}/topics/report2bq-trigger'.format(
                project=self.project)
            self.PS.publish(topic=topic, data=b'RUN', **job_attributes)

        elif status == 'FAILED' or status == 'CANCELLED':
            # Remove job from running
            logging.error('Report {report} failed!'.format(
                report=job_attributes['report_id']))
            self.firestore.remove_report_runner(job_attributes['report_id'])

    def _check_sa360_report(self, job_config: Dict[str, Any],
                            run_config: Dict[str, Any]):
        sa360 = SA360(email=run_config['email'], project=self.project)

        # Merge configs
        job_attributes = job_config['pubsubTarget'][
            'attributes'] if 'pubsubTarget' in job_config else {}
        config = {**run_config, **job_attributes}

        if sa360.handle_offline_report(run_config=config):
            self.firestore.remove_report_runner(run_config['report_id'])
            logging.info(f'Report {run_config["report_id"]} done.')

        else:
            # SA360 ones can't fail - they won't start if there are errors, so it's just
            # not ready yet. So just leave it here and try again later.
            logging.error(f'Report {run_config["report_id"]} not ready.')
Ejemplo n.º 20
0
 def datastore(self) -> AbstractDatastore:
     """The datastore property."""
     from classes.firestore import Firestore
     return Firestore()
Ejemplo n.º 21
0
 def firestore(self) -> AbstractDatastore:
     return Firestore()
Ejemplo n.º 22
0
 def add(self, firestore: Firestore, report: str, file: str, **unused):
     with open(file) as definition:
         cfg = json.loads(''.join(definition.readlines()))
         Firestore().update_document(Type.SA360_RPT, '_reports',
                                     {report: cfg})
Ejemplo n.º 23
0
 def firestore(self) -> Firestore:
     return Firestore()  #email=self.email, project=self.project)