Esempio n. 1
0
    def to_topic(batch):
        batch_to_publish = []
        for item in batch:
            batch_item = {}
            for key in item:
                if key not in ['doc_id', 'doc_reference']:
                    batch_item[key] = item[key]

            batch_to_publish.append(batch_item)

        try:
            gobits = Gobits()  # Get gobits
            publisher = pubsub_v1.PublisherClient()  # Publish to topic

            topic_path = f"projects/{config.PROJECT_ID_TOPIC}/topics/{config.TOPIC_NAME}"
            msg = {
                "gobits": [gobits.to_json()],
                "trips": batch_to_publish
            }

            future = publisher.publish(topic_path, bytes(json.dumps(msg).encode('utf-8')))
            future.add_done_callback(lambda x: logging.debug(f"Published {len(batch_to_publish)} exported trips"))
        except Exception as e:
            logging.exception(f"Unable to publish exported trips to topic because of {str(e)}")
            return False
        else:
            return True
def handler(data, context):
    logging.info('Run started')
    bucket = data['bucket']
    filename = data['name']
    try:
        if 'uploads/bouwportaal_orders/' in filename:
            df, nr_of_files, nr_of_file = data_from_store(bucket, filename)
            process_bouwportaal_orders(df)
            logging.info('Run finished')

            if nr_of_file == nr_of_files:  # trigger analyse for bouwportaal
                gobits = Gobits.from_context(context=context)
                topic_project_id = config.TOPIC_SETTINGS.get(
                    'topic_project_id')
                topic_name = config.TOPIC_SETTINGS.get('topic_name')
                send_trigger_to_topic(gobits=gobits,
                                      topic_project_id=topic_project_id,
                                      topic_name=topic_name)

            return 'OK', 200
        else:
            logging.info(
                f'Skipping {filename} because it does not need processing')
            logging.info('Run finished')
            return 'OK', 200
    except Exception as e:
        logging.error(f'Processing {filename} stopped')
        logging.error(f'Processing failure: {e}')
        traceback.print_exc()
        return 'Error', 500
Esempio n. 3
0
def handler(data, context):
    """
    Handler method that calculates the difference of a dataset
    and sends messages to Google Cloud Pub/Sub.

    :param: data    Dictionary like object that holds trigger information.
    :param: context Google Cloud Function context.
    """

    try:
        bucket_name = data["bucket"]
        file_name = data["name"]

        # Exit when file does not need to be processed
        if not file_name.startswith(config.prefix_filter):
            logging.info("Do not process file, exiting...")
            return "OK", 204

        file = GoogleCloudStorage().read(file_name, bucket_name)
        file.top_level_attribute = config.top_level_attribute
        file.csv_dialect_parameters = config.csv_dialect_parameters

        records = file.to_json(Formatter(config.template))

        if not config.full_load:
            if config.state.type == "datastore":
                records = GoogleCloudDatastore().difference(
                    records, config.state.kind, config.state.property)
            else:
                raise NotImplementedError("Unkown state type!")

        # Exit when no new records exist
        if not len(records):
            logging.info("No new records found, exiting...")
            return "OK", 204

        metadata = Gobits.from_context(context=context)
        publisher = Publisher(config.topic.batch_settings)
        publisher.publish(
            config.topic.project_id,
            config.topic.id,
            records,
            metadata.to_json(),
            config.topic.batch_size,
            config.topic.subject,
        )

        # Store the new state records
        if not config.full_load:
            if config.state.type == "datastore":
                logging.info("Adding new items to state")
                GoogleCloudDatastore().put_multi(records, config.state.kind,
                                                 config.state.property)

    except Exception as e:
        logging.exception(e)
        return "Bad Request", 400

    return "OK", 204
Esempio n. 4
0
class TestGobits(unittest.TestCase):
    def setUp(self):
        os.environ['X_GOOGLE_GCP_PROJECT'] = X_GOOGLE_GCP_PROJECT
        os.environ['X_GOOGLE_FUNCTION_NAME'] = X_GOOGLE_FUNCTION_NAME
        os.environ['FUNCTION_TRIGGER_TYPE'] = FUNCTION_TRIGGER_TYPE
        os.environ['X_GOOGLE_FUNCTION_VERSION'] = X_GOOGLE_FUNCTION_VERSION
        self.gobits = Gobits()

    def test_processed(self):
        self.assertEqual(len(self.gobits.processed), 24),

    def test_gcp_project(self):
        self.assertEqual(self.gobits.gcp_project, X_GOOGLE_GCP_PROJECT)

    def test_execution_type(self):
        self.assertEqual(self.gobits.execution_type, EXECUTION_TYPE)

    def test_execution_trigger_type(self):
        self.assertEqual(self.gobits.execution_trigger_type,
                         FUNCTION_TRIGGER_TYPE)

    def test_function_name(self):
        self.assertEqual(self.gobits.function_name, X_GOOGLE_FUNCTION_NAME)

    def test_function_version(self):
        self.assertEqual(self.gobits.function_version,
                         X_GOOGLE_FUNCTION_VERSION)

    def test_to_json(self):
        gobits = self.gobits.to_json()
        self.assertEqual(gobits['gcp_project'], X_GOOGLE_GCP_PROJECT)
        self.assertEqual(gobits['execution_type'], EXECUTION_TYPE)
        self.assertEqual(gobits['execution_trigger_type'],
                         FUNCTION_TRIGGER_TYPE)
        self.assertEqual(gobits['function_name'], X_GOOGLE_FUNCTION_NAME)
        self.assertEqual(gobits['function_version'], X_GOOGLE_FUNCTION_VERSION)

    def test_json_length(self):
        gobits = self.gobits.to_json()
        self.assertEqual(len(gobits), 6)

    def tearDown(self):
        os.environ['X_GOOGLE_GCP_PROJECT'] = ''
        os.environ['X_GOOGLE_FUNCTION_NAME'] = ''
        os.environ['FUNCTION_TRIGGER_TYPE'] = ''
        os.environ['X_GOOGLE_FUNCTION_VERSION'] = ''
Esempio n. 5
0
class TestCloudBuildGobits(unittest.TestCase):
    def setUp(self):
        os.environ['BUILDER_OUTPUT'] = BUILDER_OUTPUT
        self.gobits = Gobits()

    def test_processed(self):
        self.assertEqual(len(self.gobits.processed), 24),

    def test_to_json(self):
        gobits = self.gobits.to_json()
        self.assertEqual(gobits['execution_type'], 'cloud_build')

    def test_json_length(self):
        gobits = self.gobits.to_json()
        self.assertEqual(len(gobits), 2)

    def tearDown(self):
        os.environ['BUILDER_OUTPUT'] = ''
Esempio n. 6
0
 def setUp(self, mock_request):
     mock_envelope = dict(subscription=SUBSCRIPTION,
                          message=dict(messageId=MESSAGE_ID,
                                       publishTime=MESSAGE_PUBLISH_TIME))
     mock_request.data = json.dumps(mock_envelope).encode('utf-8')
     mock_request.headers = {
         'Function-Execution-Id': HTTP_FUNCTION_EXECUTION_ID
     }
     self.gobits = Gobits.from_request(request=mock_request)
Esempio n. 7
0
def publish(name, publisher):

    formatted = json.dumps(
        {"gobits": [Gobits().to_json()], "chain_name": name},
        indent=2,
    ).encode("utf-8")

    # Publish to ops-issues here
    topic_path = publisher.topic_path(config.PROJECT, config.TURN_TO_CALCULATE)
    publisher.publish(topic_path, formatted)
Esempio n. 8
0
 def _publish_message(self, message_name, message):
     metadata = Gobits.from_request(request=self._request)
     try:
         my_gobits = [metadata.to_json()]
     except:  # noqa: E722
         my_gobits = []
     message_to_publish = {"gobits": my_gobits, message_name: message}
     self._publisher.publish(
         self._topic_name,
         bytes(json.dumps(message_to_publish).encode("utf-8")))
Esempio n. 9
0
 def process_mail(self, mail):
     mail_sender = mail["sender"]
     date = ""
     if "received_on" in mail:
         date = mail["received_on"]
     if mail_sender not in self.senders:
         if date:
             logging.info(
                 "Mail received on {} was not send by the right e-mail address"
                 .format(date))
             return False
         else:
             logging.info(
                 "Mail received was not send by the right e-mail address")
             return False
     html_content = mail["body"]
     # Check if code contains "<table>", "<tr>" and "<td>" tags
     if ("<table>" not in html_content or "<tr>" not in html_content
             or "<td>" not in html_content):
         logging.info("Required tags cannot be found in HTML body")
         return False
     # Get list from above table
     html_above_table = html_content.split("<table>")[0]
     html_above_table_list = self.get_part_above_table_list(
         html_above_table)
     if not html_above_table_list:
         return False
     # Add the fields from above the table of the HTML to the message
     new_message = self.add_fields_not_table(html_above_table_list)
     # HTML to parse-able content
     parsed_html = BeautifulSoup(html_content, "html.parser")
     # Add fields from the table in the HTML to the message
     new_message = self.add_fields_table(parsed_html, new_message)
     # Make sure that every required field is added
     new_message = self.required_fields_check(new_message)
     if not new_message:
         return False
     # Check if ID is correct
     if self.id_check(new_message) is False:
         return False
     # Add an ID
     new_message = self.add_id(mail, new_message)
     if not new_message:
         return False
     metadata = Gobits()
     return_bool_publish_topic = self.publish_to_topic(
         new_message, metadata)
     if not return_bool_publish_topic:
         return False
     return True
Esempio n. 10
0
def handler(request):
    """
    When triggered, this function fetches exectues a cost query in BigQuery.
    Results from this query are published on Pub/Sub topic.
    """

    dataset_id = os.getenv("DATASET_ID")

    with open("query.sql") as f:
        q = f.read()

    result = query(q, dataset_id, TOPIC_NAME)

    for item in result:
        logging.info(item)

    if result:
        metadata = Gobits.from_request(request=request).to_json()
        publish(result, metadata, TOPIC_NAME)
Esempio n. 11
0
 def process(self, payload):
     # Get message
     message = payload[self.data_selector]
     # Message to HTML body
     html_body, subject = self.message_to_html(message)
     if not html_body or not subject:
         logging.error("Message was not processed")
         return False
     # Make topic message
     count = 0
     for field in message:
         message_root = field
         count = count + 1
     count_bool = count > 1
     if count_bool:
         logging.error("Message has multiple roots")
         return False
     recipient_mapping_field_dict = message.get(message_root)
     recipient_mapping_field_message = recipient_mapping_field_dict.get(
         self.recipient_mapping_message_field
     )
     if not recipient_mapping_field_message:
         logging.error(
             f"The field {self.recipient_mapping_message_field} could not be found in the message"
         )
         return False
     topic_message = self.make_topic_msg(
         recipient_mapping_field_message, html_body, subject
     )
     if not topic_message:
         logging.error("Topic message was not made")
         return False
     # Make gobits
     gobits = Gobits()
     # Send message to topic
     return_bool = self.publish_to_topic(subject, topic_message, gobits)
     if return_bool is False:
         logging.error("Message was not processed")
         return False
     else:
         logging.info("Message was processed")
     return True
Esempio n. 12
0
    def process(self, request):
        not_found_resources = []
        if not self.ckan_service.is_ckan_reachable():
            return False
        # Get all groups of CKAN, they are based on GCP project IDs
        group_list = self.ckan_service.get_group_list()
        # For every group
        for group_project_id in group_list:
            not_found_resource = NotFoundResource(group_project_id)
            # Get project's services
            gcp_services = self.gcp_service.get_project_services(
                group_project_id)
            # If no gcp_services where found, the project does not exist
            if not gcp_services:
                logging.info(
                    f"Project ID {group_project_id} could not be found on GCP while getting services"
                )
                resource_url = f"https://console.cloud.google.com/home/dashboard?project={group_project_id}"
                not_found_resources.append(
                    not_found_resource.make_not_found(
                        "Project not found",
                        "google-cloud-project",
                        group_project_id,
                        "GCP Project",
                        resource_url,
                    ))
            group = self.ckan_service.get_project_group(group_project_id)
            # Get topics belonging to project ID
            not_found_resources, topics = self.gcp_service.get_topics(
                not_found_resource, gcp_services, not_found_resources,
                group_project_id)
            # Get subscriptions belonging to project ID
            not_found_resources, subscriptions = self.gcp_service.get_subscriptions(
                not_found_resource, gcp_services, not_found_resources,
                group_project_id)
            # Get buckets belonging to project ID
            not_found_resources, buckets = self.gcp_service.get_buckets(
                not_found_resource, gcp_services, not_found_resources,
                group_project_id)
            # Get SQL instances belonging to project ID
            not_found_resources, sql_instances = self.gcp_service.get_sql_instances(
                not_found_resource, gcp_services, not_found_resources,
                group_project_id)
            # Get SQL databases belonging to project ID
            not_found_resources, sql_databases = self.gcp_service.get_sql_databases(
                not_found_resource,
                gcp_services,
                sql_instances,
                not_found_resources,
                group_project_id,
            )
            # Get bigquery datasets belonging to project ID
            not_found_resources, bigquery_datasets = self.gcp_service.get_bigquery_datasets(
                not_found_resource, gcp_services, not_found_resources,
                group_project_id)
            # For every package in the group
            for package in group.get("packages", []):
                full_package = self.ckan_service.get_full_package(
                    package["id"])
                not_found_resources.extend(
                    Package(
                        package=full_package,
                        topics=topics,
                        subscriptions=subscriptions,
                        buckets=buckets,
                        sql_instances=sql_instances,
                        sql_databases=sql_databases,
                        bigquery_datasets=bigquery_datasets,
                        gcp_services=gcp_services,
                        group_project_id=group_project_id,
                    ).process())
        self.gcp_service.get_subscriber_client().close()

        # Create gobits object
        metadata = Gobits.from_request(request=request)

        # Send issues to a topic
        return self.gcp_helper.publish_to_topic(config.TOPIC_PROJECT_ID,
                                                config.TOPIC_NAME,
                                                not_found_resources,
                                                [metadata.to_json()])
Esempio n. 13
0
    # Open data catalog
    try:
        with open(args.data_catalog, "r") as f:
            catalog = json.load(f)
    except Exception as e:
        logging.exception("Unable to open catalog " +
                          "because of {}".format(e))
        sys.exit(1)
    # Get schemas list
    schemas_list = args.schemas
    # A message should be send to the schemas topic
    # for every topic that has this schema
    schemas, schema_names = get_schemas(catalog, schemas_list)
    # Project id of the topic the schema needs to be published to
    topic_project_id = args.topic_project_id
    # Topic the schema needs to be published to
    topic_name = args.topic_name
    # Print which schemas are published
    print("Publishing schemas {} to topic".format(schema_names))
    # Publish every schema message to the topic
    # The gobits of the message
    gobits = Gobits()
    msg = {"gobits": [gobits.to_json()], "schemas": schemas}
    # print(json.dumps(msg, indent=2, sort_keys=False))
    # with open('data.json', 'w') as outfile:
    #     json.dump(msg, outfile, indent=2, sort_keys=False)
    return_bool_publish_topic = publish_to_topic(msg, schema_names,
                                                 topic_project_id, topic_name)
    if not return_bool_publish_topic:
        sys.exit(1)
Esempio n. 14
0
    def process(self, payload, in_request):
        try:
            xml = self.translate_to_xml(payload)

            # Get files from einvoices bucket
            bucket_einvoices = self.client.get_bucket(
                self.bucket_name_einvoices)
            blobs = self.client.list_blobs(bucket_einvoices,
                                           prefix=self.base_path)

            bits = Gobits(request=in_request)
            gobits = bits.to_json()

            try:
                if self.file_name != "e2e_test":
                    self.check_metadata(gobits)
            except TranslateError:
                raise
            else:
                self.create_merged_pdf(blobs)

            logging.info("Prepare XML and PDF for sending")
            pdf = {'pdf': (self.file_name, open(self.merged_pdf.name, 'rb'))}
            headerspdf = {
                'Content-Type': "application/pdf",
                'Accept': "application/pdf",
                'Filename': self.file_name
            }

            headersxml = {
                'Content-Type': "application/xml",
                'Accept': "application/xml",
                'Filename': self.file_name
            }

            cert = self.get_certificates()

            # Posting XML data and PDF file to server in separate requests
            logging.info("Send XML and PDF to ISP")
            rxml = requests.post(self.url,
                                 headers=headersxml,
                                 data=xml,
                                 cert=cert,
                                 verify=True)
            if not rxml.ok:
                raise TranslateError(
                    4001,
                    function_name="process",
                    fields=[rxml],
                    description=
                    f"Invoice {self.invoice_number}: XML post request to ISP failed"
                )
            else:
                logging.info("[{}] XML invoice sent".format(
                    self.invoice_number))

                rpdf = requests.post(self.url,
                                     headers=headerspdf,
                                     files=pdf,
                                     cert=cert,
                                     verify=True)
                if not rpdf.ok:
                    raise TranslateError(
                        4001,
                        function_name="process",
                        fields=[rpdf],
                        description=
                        f"Invoice {self.invoice_number}: PDF post request to ISP failed"
                    )
                else:
                    logging.info("[{}] PDF invoice sent".format(
                        self.invoice_number))

            # Remove (content) temp file
            self.merged_pdf.close()
            os.unlink(self.merged_pdf.name)

            # Update metadata
            self.update_metadata(gobits, payload['gobits'])

        except TranslateError as e:
            if e.properties['error']['exception_id'] == 4030:
                logging.info(json.dumps({'warning': e.properties['error']}))
            else:
                logging.error(json.dumps(e.properties))
        except Exception as e:
            logging.exception(e)
Esempio n. 15
0
    def process(self, request):
        if not self.ckan_service.is_ckan_reachable():
            return False

        # get GCP projects
        project_list = self.gcp_service.get_projects()
        # get all groups of CKAN, they are based on GCP project IDs
        group_list = self.ckan_service.get_group_list()
        # get mismatching projects
        mismatching_projects = list(set(project_list) - set(group_list))
        not_found_resources = self.process_not_found_projects(mismatching_projects)
        # get matching projects
        matching_projects = list(set(project_list) - set(mismatching_projects))
        for project_id in matching_projects:
            not_found_resource = NotFoundResource(project_id)
            # Get project's services
            gcp_services = self.gcp_service.get_project_services(project_id)
            # Get matching ckan project
            group = self.ckan_service.get_project_group(project_id)
            # get ckan resources
            ckan_resources = []
            ckan_resources_search = ''
            for package in group.get("packages", []):
                ckan_resources = self.ckan_service.get_full_package(package["id"]).get("resources", [])
                for resource in ckan_resources:
                    if "format" in resource and "name" in resource:
                        ckan_resources_search += ':' + resource["name"] + ':' + resource["format"]

            # Get topics belonging to project ID
            not_found_resources, topics = self.gcp_service.get_topics(
                not_found_resource, gcp_services, not_found_resources, project_id
            )

            # Get subscriptions belonging to project ID
            not_found_resources, subscriptions = self.gcp_service.get_subscriptions(
                not_found_resource, gcp_services, not_found_resources, project_id
            )
            # Get buckets belonging to project ID
            not_found_resources, buckets = self.gcp_service.get_buckets(
                not_found_resource, gcp_services, not_found_resources, project_id
            )

            # Get SQL instances belonging to project ID
            not_found_resources, sql_instances = self.gcp_service.get_sql_instances(
                not_found_resource, gcp_services, not_found_resources, project_id
            )
            # Get SQL databases belonging to project ID
            not_found_resources, sql_databases = self.gcp_service.get_sql_databases(
                not_found_resource,
                gcp_services,
                sql_instances,
                not_found_resources,
                project_id,
            )
            # Get bigquery datasets belonging to project ID
            not_found_resources, bigquery_datasets = self.gcp_service.get_bigquery_datasets(
                not_found_resource, gcp_services, not_found_resources, project_id
            )

            resources = {
                'topic': topics,
                'subscription': subscriptions,
                'blob-storage': buckets,
                'cloudsql-instance': sql_instances,
                'cloudsql-db': sql_databases,
                'bigquery-dataset': bigquery_datasets
            }
            for key, value in resources.items():
                for resource_name in value:
                    search = ':' + resource_name + ':' + key
                    if search not in ckan_resources_search and not self.is_default_resource(resource_name):
                        logging.info(
                            f"Resource {resource_name} could not be found on CKAN while it still exists in GCP"
                        )
                        not_found_resources.append(
                            not_found_resource.make_not_found(
                                "Resource not found",
                                '',  # package name is a ckan-specific attribute
                                resource_name,
                                key,
                                self.gcp_service.generate_resource_url(key, resource_name, project_id),
                            )
                        )
        self.gcp_service.get_subscriber_client().close()

        # Create gobits object
        metadata = Gobits.from_request(request=request)
        # Send issues to a topic
        return self.gcp_helper.publish_to_topic(
            config.TOPIC_PROJECT_ID, config.TOPIC_NAME,
            not_found_resources, [metadata.to_json()]
        )
Esempio n. 16
0
 def setUp(self, mock_context):
     mock_context.event_id = EVENT_ID
     self.gobits = Gobits.from_context(context=mock_context)
Esempio n. 17
0
                                                    topic_name)
        msg = {"gobits": [gobits.to_json()], "data_catalog": catalog}

        future = publisher.publish(topic_path,
                                   bytes(json.dumps(msg).encode("utf-8")))

        future.add_done_callback(lambda x: logging.info(
            "Published data catalog of project {} to topic {}".format(
                args.project_id, topic_name)))

        return True
    except Exception as e:
        logging.exception("Unable to publish data catalog " +
                          "to topic because of {}".format(e))
        print(
            "Unable to publish data catalog to topic because of {}".format(e))
    return False


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--data-catalog", required=True)
    parser.add_argument("-p", "--project-id", required=True)
    parser.add_argument("-t", "--publish-topic-name", required=False)
    parser.add_argument("-n", "--publish-project-name", required=False)
    args = parser.parse_args()
    gobits = Gobits()
    return_bool = publish_to_topic(args, gobits)
    if not return_bool:
        sys.exit(1)
Esempio n. 18
0
 def setUp(self):
     os.environ['BUILDER_OUTPUT'] = BUILDER_OUTPUT
     self.gobits = Gobits()
Esempio n. 19
0
 def setUp(self):
     os.environ['X_GOOGLE_GCP_PROJECT'] = X_GOOGLE_GCP_PROJECT
     os.environ['X_GOOGLE_FUNCTION_NAME'] = X_GOOGLE_FUNCTION_NAME
     os.environ['FUNCTION_TRIGGER_TYPE'] = FUNCTION_TRIGGER_TYPE
     os.environ['X_GOOGLE_FUNCTION_VERSION'] = X_GOOGLE_FUNCTION_VERSION
     self.gobits = Gobits()
Esempio n. 20
0
 def setUp(self, mock_request):
     mock_request.data = b''
     mock_request.headers = {
         'Function-Execution-Id': HTTP_FUNCTION_EXECUTION_ID
     }
     self.gobits = Gobits.from_request(request=mock_request)