def to_topic(batch): batch_to_publish = [] for item in batch: batch_item = {} for key in item: if key not in ['doc_id', 'doc_reference']: batch_item[key] = item[key] batch_to_publish.append(batch_item) try: gobits = Gobits() # Get gobits publisher = pubsub_v1.PublisherClient() # Publish to topic topic_path = f"projects/{config.PROJECT_ID_TOPIC}/topics/{config.TOPIC_NAME}" msg = { "gobits": [gobits.to_json()], "trips": batch_to_publish } future = publisher.publish(topic_path, bytes(json.dumps(msg).encode('utf-8'))) future.add_done_callback(lambda x: logging.debug(f"Published {len(batch_to_publish)} exported trips")) except Exception as e: logging.exception(f"Unable to publish exported trips to topic because of {str(e)}") return False else: return True
def handler(data, context): logging.info('Run started') bucket = data['bucket'] filename = data['name'] try: if 'uploads/bouwportaal_orders/' in filename: df, nr_of_files, nr_of_file = data_from_store(bucket, filename) process_bouwportaal_orders(df) logging.info('Run finished') if nr_of_file == nr_of_files: # trigger analyse for bouwportaal gobits = Gobits.from_context(context=context) topic_project_id = config.TOPIC_SETTINGS.get( 'topic_project_id') topic_name = config.TOPIC_SETTINGS.get('topic_name') send_trigger_to_topic(gobits=gobits, topic_project_id=topic_project_id, topic_name=topic_name) return 'OK', 200 else: logging.info( f'Skipping {filename} because it does not need processing') logging.info('Run finished') return 'OK', 200 except Exception as e: logging.error(f'Processing {filename} stopped') logging.error(f'Processing failure: {e}') traceback.print_exc() return 'Error', 500
def handler(data, context): """ Handler method that calculates the difference of a dataset and sends messages to Google Cloud Pub/Sub. :param: data Dictionary like object that holds trigger information. :param: context Google Cloud Function context. """ try: bucket_name = data["bucket"] file_name = data["name"] # Exit when file does not need to be processed if not file_name.startswith(config.prefix_filter): logging.info("Do not process file, exiting...") return "OK", 204 file = GoogleCloudStorage().read(file_name, bucket_name) file.top_level_attribute = config.top_level_attribute file.csv_dialect_parameters = config.csv_dialect_parameters records = file.to_json(Formatter(config.template)) if not config.full_load: if config.state.type == "datastore": records = GoogleCloudDatastore().difference( records, config.state.kind, config.state.property) else: raise NotImplementedError("Unkown state type!") # Exit when no new records exist if not len(records): logging.info("No new records found, exiting...") return "OK", 204 metadata = Gobits.from_context(context=context) publisher = Publisher(config.topic.batch_settings) publisher.publish( config.topic.project_id, config.topic.id, records, metadata.to_json(), config.topic.batch_size, config.topic.subject, ) # Store the new state records if not config.full_load: if config.state.type == "datastore": logging.info("Adding new items to state") GoogleCloudDatastore().put_multi(records, config.state.kind, config.state.property) except Exception as e: logging.exception(e) return "Bad Request", 400 return "OK", 204
class TestGobits(unittest.TestCase): def setUp(self): os.environ['X_GOOGLE_GCP_PROJECT'] = X_GOOGLE_GCP_PROJECT os.environ['X_GOOGLE_FUNCTION_NAME'] = X_GOOGLE_FUNCTION_NAME os.environ['FUNCTION_TRIGGER_TYPE'] = FUNCTION_TRIGGER_TYPE os.environ['X_GOOGLE_FUNCTION_VERSION'] = X_GOOGLE_FUNCTION_VERSION self.gobits = Gobits() def test_processed(self): self.assertEqual(len(self.gobits.processed), 24), def test_gcp_project(self): self.assertEqual(self.gobits.gcp_project, X_GOOGLE_GCP_PROJECT) def test_execution_type(self): self.assertEqual(self.gobits.execution_type, EXECUTION_TYPE) def test_execution_trigger_type(self): self.assertEqual(self.gobits.execution_trigger_type, FUNCTION_TRIGGER_TYPE) def test_function_name(self): self.assertEqual(self.gobits.function_name, X_GOOGLE_FUNCTION_NAME) def test_function_version(self): self.assertEqual(self.gobits.function_version, X_GOOGLE_FUNCTION_VERSION) def test_to_json(self): gobits = self.gobits.to_json() self.assertEqual(gobits['gcp_project'], X_GOOGLE_GCP_PROJECT) self.assertEqual(gobits['execution_type'], EXECUTION_TYPE) self.assertEqual(gobits['execution_trigger_type'], FUNCTION_TRIGGER_TYPE) self.assertEqual(gobits['function_name'], X_GOOGLE_FUNCTION_NAME) self.assertEqual(gobits['function_version'], X_GOOGLE_FUNCTION_VERSION) def test_json_length(self): gobits = self.gobits.to_json() self.assertEqual(len(gobits), 6) def tearDown(self): os.environ['X_GOOGLE_GCP_PROJECT'] = '' os.environ['X_GOOGLE_FUNCTION_NAME'] = '' os.environ['FUNCTION_TRIGGER_TYPE'] = '' os.environ['X_GOOGLE_FUNCTION_VERSION'] = ''
class TestCloudBuildGobits(unittest.TestCase): def setUp(self): os.environ['BUILDER_OUTPUT'] = BUILDER_OUTPUT self.gobits = Gobits() def test_processed(self): self.assertEqual(len(self.gobits.processed), 24), def test_to_json(self): gobits = self.gobits.to_json() self.assertEqual(gobits['execution_type'], 'cloud_build') def test_json_length(self): gobits = self.gobits.to_json() self.assertEqual(len(gobits), 2) def tearDown(self): os.environ['BUILDER_OUTPUT'] = ''
def setUp(self, mock_request): mock_envelope = dict(subscription=SUBSCRIPTION, message=dict(messageId=MESSAGE_ID, publishTime=MESSAGE_PUBLISH_TIME)) mock_request.data = json.dumps(mock_envelope).encode('utf-8') mock_request.headers = { 'Function-Execution-Id': HTTP_FUNCTION_EXECUTION_ID } self.gobits = Gobits.from_request(request=mock_request)
def publish(name, publisher): formatted = json.dumps( {"gobits": [Gobits().to_json()], "chain_name": name}, indent=2, ).encode("utf-8") # Publish to ops-issues here topic_path = publisher.topic_path(config.PROJECT, config.TURN_TO_CALCULATE) publisher.publish(topic_path, formatted)
def _publish_message(self, message_name, message): metadata = Gobits.from_request(request=self._request) try: my_gobits = [metadata.to_json()] except: # noqa: E722 my_gobits = [] message_to_publish = {"gobits": my_gobits, message_name: message} self._publisher.publish( self._topic_name, bytes(json.dumps(message_to_publish).encode("utf-8")))
def process_mail(self, mail): mail_sender = mail["sender"] date = "" if "received_on" in mail: date = mail["received_on"] if mail_sender not in self.senders: if date: logging.info( "Mail received on {} was not send by the right e-mail address" .format(date)) return False else: logging.info( "Mail received was not send by the right e-mail address") return False html_content = mail["body"] # Check if code contains "<table>", "<tr>" and "<td>" tags if ("<table>" not in html_content or "<tr>" not in html_content or "<td>" not in html_content): logging.info("Required tags cannot be found in HTML body") return False # Get list from above table html_above_table = html_content.split("<table>")[0] html_above_table_list = self.get_part_above_table_list( html_above_table) if not html_above_table_list: return False # Add the fields from above the table of the HTML to the message new_message = self.add_fields_not_table(html_above_table_list) # HTML to parse-able content parsed_html = BeautifulSoup(html_content, "html.parser") # Add fields from the table in the HTML to the message new_message = self.add_fields_table(parsed_html, new_message) # Make sure that every required field is added new_message = self.required_fields_check(new_message) if not new_message: return False # Check if ID is correct if self.id_check(new_message) is False: return False # Add an ID new_message = self.add_id(mail, new_message) if not new_message: return False metadata = Gobits() return_bool_publish_topic = self.publish_to_topic( new_message, metadata) if not return_bool_publish_topic: return False return True
def handler(request): """ When triggered, this function fetches exectues a cost query in BigQuery. Results from this query are published on Pub/Sub topic. """ dataset_id = os.getenv("DATASET_ID") with open("query.sql") as f: q = f.read() result = query(q, dataset_id, TOPIC_NAME) for item in result: logging.info(item) if result: metadata = Gobits.from_request(request=request).to_json() publish(result, metadata, TOPIC_NAME)
def process(self, payload): # Get message message = payload[self.data_selector] # Message to HTML body html_body, subject = self.message_to_html(message) if not html_body or not subject: logging.error("Message was not processed") return False # Make topic message count = 0 for field in message: message_root = field count = count + 1 count_bool = count > 1 if count_bool: logging.error("Message has multiple roots") return False recipient_mapping_field_dict = message.get(message_root) recipient_mapping_field_message = recipient_mapping_field_dict.get( self.recipient_mapping_message_field ) if not recipient_mapping_field_message: logging.error( f"The field {self.recipient_mapping_message_field} could not be found in the message" ) return False topic_message = self.make_topic_msg( recipient_mapping_field_message, html_body, subject ) if not topic_message: logging.error("Topic message was not made") return False # Make gobits gobits = Gobits() # Send message to topic return_bool = self.publish_to_topic(subject, topic_message, gobits) if return_bool is False: logging.error("Message was not processed") return False else: logging.info("Message was processed") return True
def process(self, request): not_found_resources = [] if not self.ckan_service.is_ckan_reachable(): return False # Get all groups of CKAN, they are based on GCP project IDs group_list = self.ckan_service.get_group_list() # For every group for group_project_id in group_list: not_found_resource = NotFoundResource(group_project_id) # Get project's services gcp_services = self.gcp_service.get_project_services( group_project_id) # If no gcp_services where found, the project does not exist if not gcp_services: logging.info( f"Project ID {group_project_id} could not be found on GCP while getting services" ) resource_url = f"https://console.cloud.google.com/home/dashboard?project={group_project_id}" not_found_resources.append( not_found_resource.make_not_found( "Project not found", "google-cloud-project", group_project_id, "GCP Project", resource_url, )) group = self.ckan_service.get_project_group(group_project_id) # Get topics belonging to project ID not_found_resources, topics = self.gcp_service.get_topics( not_found_resource, gcp_services, not_found_resources, group_project_id) # Get subscriptions belonging to project ID not_found_resources, subscriptions = self.gcp_service.get_subscriptions( not_found_resource, gcp_services, not_found_resources, group_project_id) # Get buckets belonging to project ID not_found_resources, buckets = self.gcp_service.get_buckets( not_found_resource, gcp_services, not_found_resources, group_project_id) # Get SQL instances belonging to project ID not_found_resources, sql_instances = self.gcp_service.get_sql_instances( not_found_resource, gcp_services, not_found_resources, group_project_id) # Get SQL databases belonging to project ID not_found_resources, sql_databases = self.gcp_service.get_sql_databases( not_found_resource, gcp_services, sql_instances, not_found_resources, group_project_id, ) # Get bigquery datasets belonging to project ID not_found_resources, bigquery_datasets = self.gcp_service.get_bigquery_datasets( not_found_resource, gcp_services, not_found_resources, group_project_id) # For every package in the group for package in group.get("packages", []): full_package = self.ckan_service.get_full_package( package["id"]) not_found_resources.extend( Package( package=full_package, topics=topics, subscriptions=subscriptions, buckets=buckets, sql_instances=sql_instances, sql_databases=sql_databases, bigquery_datasets=bigquery_datasets, gcp_services=gcp_services, group_project_id=group_project_id, ).process()) self.gcp_service.get_subscriber_client().close() # Create gobits object metadata = Gobits.from_request(request=request) # Send issues to a topic return self.gcp_helper.publish_to_topic(config.TOPIC_PROJECT_ID, config.TOPIC_NAME, not_found_resources, [metadata.to_json()])
# Open data catalog try: with open(args.data_catalog, "r") as f: catalog = json.load(f) except Exception as e: logging.exception("Unable to open catalog " + "because of {}".format(e)) sys.exit(1) # Get schemas list schemas_list = args.schemas # A message should be send to the schemas topic # for every topic that has this schema schemas, schema_names = get_schemas(catalog, schemas_list) # Project id of the topic the schema needs to be published to topic_project_id = args.topic_project_id # Topic the schema needs to be published to topic_name = args.topic_name # Print which schemas are published print("Publishing schemas {} to topic".format(schema_names)) # Publish every schema message to the topic # The gobits of the message gobits = Gobits() msg = {"gobits": [gobits.to_json()], "schemas": schemas} # print(json.dumps(msg, indent=2, sort_keys=False)) # with open('data.json', 'w') as outfile: # json.dump(msg, outfile, indent=2, sort_keys=False) return_bool_publish_topic = publish_to_topic(msg, schema_names, topic_project_id, topic_name) if not return_bool_publish_topic: sys.exit(1)
def process(self, payload, in_request): try: xml = self.translate_to_xml(payload) # Get files from einvoices bucket bucket_einvoices = self.client.get_bucket( self.bucket_name_einvoices) blobs = self.client.list_blobs(bucket_einvoices, prefix=self.base_path) bits = Gobits(request=in_request) gobits = bits.to_json() try: if self.file_name != "e2e_test": self.check_metadata(gobits) except TranslateError: raise else: self.create_merged_pdf(blobs) logging.info("Prepare XML and PDF for sending") pdf = {'pdf': (self.file_name, open(self.merged_pdf.name, 'rb'))} headerspdf = { 'Content-Type': "application/pdf", 'Accept': "application/pdf", 'Filename': self.file_name } headersxml = { 'Content-Type': "application/xml", 'Accept': "application/xml", 'Filename': self.file_name } cert = self.get_certificates() # Posting XML data and PDF file to server in separate requests logging.info("Send XML and PDF to ISP") rxml = requests.post(self.url, headers=headersxml, data=xml, cert=cert, verify=True) if not rxml.ok: raise TranslateError( 4001, function_name="process", fields=[rxml], description= f"Invoice {self.invoice_number}: XML post request to ISP failed" ) else: logging.info("[{}] XML invoice sent".format( self.invoice_number)) rpdf = requests.post(self.url, headers=headerspdf, files=pdf, cert=cert, verify=True) if not rpdf.ok: raise TranslateError( 4001, function_name="process", fields=[rpdf], description= f"Invoice {self.invoice_number}: PDF post request to ISP failed" ) else: logging.info("[{}] PDF invoice sent".format( self.invoice_number)) # Remove (content) temp file self.merged_pdf.close() os.unlink(self.merged_pdf.name) # Update metadata self.update_metadata(gobits, payload['gobits']) except TranslateError as e: if e.properties['error']['exception_id'] == 4030: logging.info(json.dumps({'warning': e.properties['error']})) else: logging.error(json.dumps(e.properties)) except Exception as e: logging.exception(e)
def process(self, request): if not self.ckan_service.is_ckan_reachable(): return False # get GCP projects project_list = self.gcp_service.get_projects() # get all groups of CKAN, they are based on GCP project IDs group_list = self.ckan_service.get_group_list() # get mismatching projects mismatching_projects = list(set(project_list) - set(group_list)) not_found_resources = self.process_not_found_projects(mismatching_projects) # get matching projects matching_projects = list(set(project_list) - set(mismatching_projects)) for project_id in matching_projects: not_found_resource = NotFoundResource(project_id) # Get project's services gcp_services = self.gcp_service.get_project_services(project_id) # Get matching ckan project group = self.ckan_service.get_project_group(project_id) # get ckan resources ckan_resources = [] ckan_resources_search = '' for package in group.get("packages", []): ckan_resources = self.ckan_service.get_full_package(package["id"]).get("resources", []) for resource in ckan_resources: if "format" in resource and "name" in resource: ckan_resources_search += ':' + resource["name"] + ':' + resource["format"] # Get topics belonging to project ID not_found_resources, topics = self.gcp_service.get_topics( not_found_resource, gcp_services, not_found_resources, project_id ) # Get subscriptions belonging to project ID not_found_resources, subscriptions = self.gcp_service.get_subscriptions( not_found_resource, gcp_services, not_found_resources, project_id ) # Get buckets belonging to project ID not_found_resources, buckets = self.gcp_service.get_buckets( not_found_resource, gcp_services, not_found_resources, project_id ) # Get SQL instances belonging to project ID not_found_resources, sql_instances = self.gcp_service.get_sql_instances( not_found_resource, gcp_services, not_found_resources, project_id ) # Get SQL databases belonging to project ID not_found_resources, sql_databases = self.gcp_service.get_sql_databases( not_found_resource, gcp_services, sql_instances, not_found_resources, project_id, ) # Get bigquery datasets belonging to project ID not_found_resources, bigquery_datasets = self.gcp_service.get_bigquery_datasets( not_found_resource, gcp_services, not_found_resources, project_id ) resources = { 'topic': topics, 'subscription': subscriptions, 'blob-storage': buckets, 'cloudsql-instance': sql_instances, 'cloudsql-db': sql_databases, 'bigquery-dataset': bigquery_datasets } for key, value in resources.items(): for resource_name in value: search = ':' + resource_name + ':' + key if search not in ckan_resources_search and not self.is_default_resource(resource_name): logging.info( f"Resource {resource_name} could not be found on CKAN while it still exists in GCP" ) not_found_resources.append( not_found_resource.make_not_found( "Resource not found", '', # package name is a ckan-specific attribute resource_name, key, self.gcp_service.generate_resource_url(key, resource_name, project_id), ) ) self.gcp_service.get_subscriber_client().close() # Create gobits object metadata = Gobits.from_request(request=request) # Send issues to a topic return self.gcp_helper.publish_to_topic( config.TOPIC_PROJECT_ID, config.TOPIC_NAME, not_found_resources, [metadata.to_json()] )
def setUp(self, mock_context): mock_context.event_id = EVENT_ID self.gobits = Gobits.from_context(context=mock_context)
topic_name) msg = {"gobits": [gobits.to_json()], "data_catalog": catalog} future = publisher.publish(topic_path, bytes(json.dumps(msg).encode("utf-8"))) future.add_done_callback(lambda x: logging.info( "Published data catalog of project {} to topic {}".format( args.project_id, topic_name))) return True except Exception as e: logging.exception("Unable to publish data catalog " + "to topic because of {}".format(e)) print( "Unable to publish data catalog to topic because of {}".format(e)) return False if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-d", "--data-catalog", required=True) parser.add_argument("-p", "--project-id", required=True) parser.add_argument("-t", "--publish-topic-name", required=False) parser.add_argument("-n", "--publish-project-name", required=False) args = parser.parse_args() gobits = Gobits() return_bool = publish_to_topic(args, gobits) if not return_bool: sys.exit(1)
def setUp(self): os.environ['BUILDER_OUTPUT'] = BUILDER_OUTPUT self.gobits = Gobits()
def setUp(self): os.environ['X_GOOGLE_GCP_PROJECT'] = X_GOOGLE_GCP_PROJECT os.environ['X_GOOGLE_FUNCTION_NAME'] = X_GOOGLE_FUNCTION_NAME os.environ['FUNCTION_TRIGGER_TYPE'] = FUNCTION_TRIGGER_TYPE os.environ['X_GOOGLE_FUNCTION_VERSION'] = X_GOOGLE_FUNCTION_VERSION self.gobits = Gobits()
def setUp(self, mock_request): mock_request.data = b'' mock_request.headers = { 'Function-Execution-Id': HTTP_FUNCTION_EXECUTION_ID } self.gobits = Gobits.from_request(request=mock_request)