def fame_manage(request): """HTTP Google cloud function. Adds/Removes/Lists repos.""" try: data = request.get_json() error_if_false(data, 'No payload') print('i %s' % str(data)) command = data.get('command', None) error_if_false(Manage.is_valid(command), 'Invalid command %s' % command) user = data.get('user', None) error_if_false(user, 'User is required') configure_storage() topic = get_fame_pubsub_topic() # Let it fail early. if command in [Manage.ADD, Manage.REMOVE]: owner = data.get('owner', None) error_if_false(owner, 'Repo owner is required') repo = data.get('repo', None) error_if_false(repo, 'Repo is required') tracker = RepoTracker() result = {'status': 'ok'} if command == Manage.ADD: tracker.configure(user, owner, repo) tracker.add() client = pubsub.PublisherClient() client.publish(topic, b'', command=Refresh.REFRESH, user=user, owner=owner, repo=repo) elif command == Manage.REMOVE: tracker.configure(user, owner, repo) tracker.remove() elif command == Manage.LIST: directory = [] for item in RepoTracker.list(user): modified = item.last_modified.strftime('%Y-%m-%dT%H:%M:%SZ') directory.append({ 'user': item.user, 'owner': item.owner, 'repo': item.repo, 'status': item.status, 'last_modified': modified, 'message': item.error_message }) result['data'] = directory return flask.jsonify(result) except Exception as e: print('e %s' % str(e)) return make_error_response(400, str(e))
def list_subscriptions_in_topic(project, topic_name): """Lists all subscriptions for a given topic.""" subscriber = pubsub.PublisherClient() topic_path = subscriber.topic_path(project, topic_name) for subscription in subscriber.list_topic_subscriptions(topic_path): print(subscription)
def __init__(self, queue_name, sub_name=None, verbose=10): assert 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ.keys() with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as f: credentials = json.loads(f.read()) project_name = credentials['project_id'] self.logger = logging.getLogger(self.__class__.__name__) if verbose is not None: self.logger.setLevel(parse_verbosity(verbose)) self.pubclient = pubsub.PublisherClient() self.subclient = pubsub.SubscriberClient() self.project = project_name self.topic_name = self.pubclient.topic_path(project_name, queue_name) self.logger.info("Topic name = {}".format(self.topic_name)) try: self.pubtopic = self.pubclient.get_topic(self.topic_name) except BaseException as e: self.pubtopic = self.pubclient.create_topic(self.topic_name) self.logger.info('topic {} created'.format(self.topic_name)) sub_name = sub_name if sub_name else queue_name + "_sub" self.logger.info("Topic name = {}".format(queue_name)) self.logger.info("Subscription name = {}".format(sub_name)) self.sub_name = self.subclient.subscription_path( project_name, sub_name) try: self.subclient.get_subscription(self.sub_name) except BaseException as e: self.logger.warn(e) self.subclient.create_subscription(self.sub_name, self.topic_name) self.logger.info('subscription {} created'.format(sub_name))
def main(args: argparse.Namespace): """main entry point for backfill CLI.""" gcs_client: storage.Client = storage.Client(client_info=CLIENT_INFO) pubsub_client = None suffix = args.success_filename if args.destination_regex: os.environ["DESTINATION_REGEX"] = args.destination_regex if args.mode == "NOTIFICATIONS": if not args.pubsub_topic: raise ValueError("when passing mode=NOTIFICATIONS" "you must also pass pubsub_topic.") # import is here because this utility can be used without # google-cloud-pubsub dependency in LOCAL mode. # pylint: disable=import-outside-toplevel from google.cloud import pubsub pubsub_client = pubsub.PublisherClient() # These are all I/O bound tasks so use Thread Pool concurrency for speed. with concurrent.futures.ThreadPoolExecutor() as executor: future_to_gsurl = {} for blob in find_blobs_with_suffix(gcs_client, args.gcs_path, suffix): if pubsub_client: # kwargs are message attributes # https://googleapis.dev/python/pubsub/latest/publisher/index.html#publish-a-message logging.info("sending pubsub message for: %s", f"gs://{blob.bucket.name}/{blob.name}") future_to_gsurl[executor.submit( pubsub_client.publish, args.pubsub_topic, b'', # cloud function ignores message body bucketId=blob.bucket.name, objectId=blob.name, _metaInfo="this message was submitted with " "gcs_ocn_bq_ingest backfill.py utility" )] = f"gs://{blob.bucket.name}/{blob.name}" else: logging.info("running cloud function locally for: %s", f"gs://{blob.bucket.name}/{blob.name}") future_to_gsurl[executor.submit( gcs_ocn_bq_ingest.main.main, { "attributes": { "bucketId": blob.bucket.name, "objectId": blob.name } }, None, )] = f"gs://{blob.bucket.name}/{blob.name}" exceptions: Dict[str, Exception] = dict() for future in concurrent.futures.as_completed(future_to_gsurl): gsurl = future_to_gsurl[future] try: future.result() except Exception as err: # pylint: disable=broad-except logging.error("Error processing %s: %s", gsurl, err) exceptions[gsurl] = err if exceptions: raise RuntimeError("The following errors were encountered:\n" + pprint.pformat(exceptions))
def main(): sc_client = Socrata(SOCRATA_DOMAIN, SOCRATA_APP_TOKEN) publisher = pubsub.PublisherClient() data_list = sc_client.get(SOCRATA_DATASET_IDENTIFIER, limit=LIMIT) for data in data_list: publisher.publish(TOPIC, json.dumps(data)) print('completed to publish data as of {0:%Y%m%d_%H%M%S}'.format( datetime.now()))
def _infinite_publish_job(topic_path): publisher_client = pubsub.PublisherClient() while True: future = publisher_client.publish( topic_path, b'{"url": "https://beam.apache.org/", "review": "positive"}') future.result() time.sleep(1)
def main(): with open('youtube-ids') as f: ids = [s.strip() for s in f.readlines()] publisher = pubsub.PublisherClient() topic = 'projects/{}/topics/{}'.format(PROJECT, TOPIC) for id in tqdm(ids): publisher.publish(topic, id)
def send_message(message, project_id, topic): data = json.dumps(message) data = data.encode('utf-8') publisher = pubsub.PublisherClient() topic_path = publisher.topic_path(project_id, topic) publisher.publish(topic_path, data=data)
def send_to_pubsub(topic_name, data, project_id): from google.cloud import pubsub publisher = pubsub.PublisherClient() topic_path = publisher.topic_path(project_id, topic_name) data = data.encode('utf-8') future = publisher.publish(topic_path, data=data) print('Published {} of message ID {}.'.format(data, future.result()))
def execute(self, context): date = self.execution_date.format(**context) clean_and_filtered_episodes = self.clean_and_filter_dataset(date) if len(clean_and_filtered_episodes) > 0: publisher = pubsub.PublisherClient() self.simulate_streaming(clean_and_filtered_episodes, publisher)
def test_topic(): topic = manager.create_iot_topic(project_id, topic_id) yield topic pubsub_client = pubsub.PublisherClient() topic_path = pubsub_client.topic_path(project_id, topic_id) pubsub_client.delete_topic(topic_path)
def late_dpi_to_topic(row): # Write row to pubsub output topic publisher = pubsub.PublisherClient() topic_url = 'projects/{project_id}/topics/{topic}'.format( project_id='big-data-env', topic='late-dpi-topic', ) publisher.publish(topic_url, ','.join([str(row.window.start), str(row.window.end), row.number[-4:], str(row['count'])]).encode('utf-8'))
def publish_message(project_name, topic_name, data): try: publisher = pubsub.PublisherClient() topic = 'projects/' + project_name + '/topics/' + topic_name publisher.publish(topic, data, placeholder='') print data except: print "There was an error publishing weather data."
def main(): publisher = pubsub.PublisherClient() topic_s = 'projects/{}/topics/{}'.format(PROJECT, TOPIC) tkrs_l = [ 'FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG', 'IBM', 'SPY', 'WMT', 'MSFT', 'XOM', 'ORCL', 'NVDA' ] for id in tqdm(tkrs_l): publisher.publish(topic_s, 'You should predict: ' + id)
def publishWeather(sensor, weatherData): latitude=sensor['Location'].latitude longitude=sensor['Location'].longitude publisher = pubsub.PublisherClient() topic = 'projects/ml-demo-212200/topics/iot-topic'.format( project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), topic='MY_TOPIC_NAME', # Set this to something appropriate. ) publisher.publish(topic, json.dumps(weatherData), projectId='ml-demo-212200', deviceRegistryId='iot-registry', deviceNumId='000000000000000', deviceId='esp32_virtual', deviceRegistryLocation='us-central1')
def __init__(self, jobs_denylist=None, jobs_allowlist=None): """Initialization for PSQ Worker. Args: jobs_denylist (Optional[list[str]]): Jobs we will exclude from running jobs_allowlist (Optional[list[str]]): The only Jobs we will include to run """ setup() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) # Deregister jobs from denylist/allowlist. job_manager.JobsManager.DeregisterJobs(jobs_denylist, jobs_allowlist) disabled_jobs = list( config.DISABLED_JOBS) if config.DISABLED_JOBS else [] disabled_jobs = [j.lower() for j in disabled_jobs] # Only actually disable jobs that have not been allowlisted. if jobs_allowlist: disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist)) if disabled_jobs: log.info( 'Disabling non-allowlisted jobs configured to be disabled in the ' 'config file: {0:s}'.format(', '.join(disabled_jobs))) job_manager.JobsManager.DeregisterJobs(jobs_denylist=disabled_jobs) # Check for valid dependencies/directories. dependencies = config.ParseDependencies() if config.DOCKER_ENABLED: try: check_docker_dependencies(dependencies) except TurbiniaException as e: log.warning( "DOCKER_ENABLED=True is set in the config, but there is an error checking for the docker daemon: {0:s}" ).format(str(e)) check_system_dependencies(dependencies) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) register_job_timeouts(dependencies) jobs = job_manager.JobsManager.GetJobNames() log.info('Dependency check complete. The following jobs are enabled ' 'for this worker: {0:s}'.format(','.join(jobs))) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def waitOnFunctionsStart(splitFiles, recv_topic, project_id, bucket, topicId, work_dir, alignsDir, uploadDir, startTimeout, finishTimeout, checkInterval=30): reinvokeFutures = {} waitStartTime = timer() #for reinvoke publisher = pubsub.PublisherClient() topic_path = createTopic(publisher, project_id, topicId) fullUploadDir = os.path.join(work_dir, uploadDir) #create client to read/write to pubsub queue client = pubsub.SubscriberClient() subscription_path = client.subscription_path(project_id, recv_topic) recv_path = client.topic_path(project_id, recv_topic) create_subscription(client, subscription_path, recv_path) storageClient = storage.Client() waitIntervalTime = timer() unstartedSplitFiles = splitFiles #unstartedSplitFiles=listFunctionsNotStarted(splitFiles,bucket,work_dir,uploadDir) #wait on start while (not checkAllFunctionsStarted(unstartedSplitFiles, storageClient, bucket, work_dir, uploadDir) and timer() - waitStartTime < startTimeout): streaming_pull_future = client.subscribe( subscription_path, callback=lambda message: recv_callback( message, publisher, topic_path, bucket, work_dir, recv_topic, fullUploadDir, project_id, reinvokeFutures)) try: if streaming_pull_future: streaming_pull_future.result(timeout=10) except: # noqa streaming_pull_future.cancel() if timer() - waitIntervalTime > checkInterval: sys.stderr.write( "Checking start functions at time (queue empty) {}\n". format(timer() - waitStartTime)) waitIntervalTime = timer() unstartedSplitFiles = listFunctionsNotStarted( unstartedSplitFiles, bucket, work_dir, uploadDir) unstartedSplitFiles = listFunctionsNotStarted(unstartedSplitFiles, bucket, work_dir, uploadDir) if unstartedSplitFiles: for unstartedSplitFile in unstartedSplitFiles: sys.stderr.write('{} not started\n'.format(unstartedSplitFile)) else: sys.stderr.write( 'Time after last message for functions to start is {}\n'. format(timer() - waitStartTime)) return unstartedSplitFiles
def pubsub_client(): """Google Cloud PubSub client""" try: from google.cloud import pubsub return pubsub.PublisherClient() except Exception: LOGGER.exception("unable to initialise PubSub client") return None
def create_clients() -> None: global __subs_client global __pubs_client if __subs_client is None: __subs_client = pubsub.SubscriberClient() if __pubs_client is None: __pubs_client = pubsub.PublisherClient()
def process_item(self, item, spider): """We need to establish a an authorized connection to Google Cloud in order to upload to Google Pub/Sub. In order to host the spiders on Github, the service account credentials are housed on the Scrapy platform and dynamically created in the script.""" # Pull all of the credential info from the Scrapy platform into a dictionary. cred_dict = { "auth_provider_x509_cert_url": spider.settings.get('auth_provider_x509_cert_url'), "auth_uri": spider.settings.get('auth_uri'), "client_email": spider.settings.get('client_email'), "client_id": spider.settings.get('client_id'), "client_x509_cert_url": spider.settings.get('client_x509_cert_url'), "private_key": spider.settings.get('private_key'), "private_key_id": spider.settings.get('private_key_id'), "project_id": spider.settings.get('project_id'), "token_uri": spider.settings.get('token_uri'), "type": spider.settings.get('account_type') } logging.info('Credentials downloaded from Scrapy server.') cred_dict['private_key'] = cred_dict['private_key'].replace( '\\n', '\n') # Build a Credentials object from the above dictionary. This will properly allow access as part of a # Google Cloud Client. logging.info('Credentials object created.') credentials = service_account.Credentials.from_service_account_info( cred_dict) # Create Publisher client. publisher = pubsub.PublisherClient(credentials=credentials) logging.info('Publisher Client created.') # Set location of proper publisher topic project_id = 'politics-data-tracker-1' topic_name = 'senate_pols' topic_path = publisher.topic_path(project_id, topic_name) data = u'This is a representative in the Senate.' #Consider how to better use this. data = data.encode('utf-8') publisher.publish(topic_path, data=data, first_name=item['first_name'], last_name=item['last_name'], party=item['party'], state=item['state']) logging.info('Published item: {0}'.format(item)) return item
class TweetStreamListener(tweepy.StreamListener): """ A listener handles tweets that are received from the stream. This listener dumps the tweets into a Pub/Sub topic. """ client = pubsub.PublisherClient() pubsub_topic = client.topic_path(GCP_PROJECT_NAME, PUBSUB_TOPIC_NAME) count = 0 tweets = [] batch_size = 1 #process-tweets.py file set up to handle one tweet at a time! total_tweets = TOTAL_TWEETS def write_to_pubsub(self, tweets): publish(self.client, self.pubsub_topic, tweets) def on_status(self, status): created_at = status.created_at.isoformat() id_str = status.id_str text = status.text source = status.source user_name = status.user.name user_screen_name = status.user.screen_name loc = status.user.location coords = status.coordinates lang = status.user.lang bio = status.user.description tw = dict(text=text, bio=bio, created_at=created_at, tweet_id=id_str, location=loc, user_name=user_name, user_screen_name=user_screen_name, source=source, coords=coords, lang=lang) self.tweets.append(tw) if len(self.tweets) >= self.batch_size: self.write_to_pubsub(self.tweets) # print(self.tweets) # <- For debugging self.tweets = [] self.count += 1 if self.count >= self.total_tweets: return False if (self.count % 100) == 0: print("count is: {} at {}".format(self.count, datetime.datetime.now())) # Status message every 100 tweets. return True def on_error(self, status_code): print('ERROR:{}'.format(status_code))
def get_publisher(): client = pubsub.PublisherClient() try: client.create_topic(TOPIC) except Exception as e: # already created pass return client
def main(): # Publishes the message 'Hello World' publisher = pubsub.PublisherClient() topic = 'projects/{}/topics/{}'.format(PROJECT, TOPIC) for asset in data_settings.ASSET_LIST: asset = asset.encode('utf-8') message_future = publisher.publish(topic, data=asset) message_future.add_done_callback(pub_callback)
def build_client(service_account_data=None, scopes=None, project=None, client_type="bigquery"): """Builds bigquery or pubsub client Parameters ---------- service_account_data : str or dict Your service account data as filename or dict (default is value of global SERVICE_ACCOUNT_DICT) scopes : list, optional GCP execution scopes (default is value of SERVICE_ACCOUNT_SCOPES) project : str, optional The GCP project client_type : str, optional Either `bigquery` or `pubsub` (default is `bigquery`) Returns ------- google.cloud client Either `bigquery` or `pubsub` client """ project = project or GCP_PROJECT service_account_data = service_account_data or SERVICE_ACCOUNT_DICT scopes = scopes or SERVICE_ACCOUNT_SCOPES if isinstance(service_account_data, dict): credentials = service_account.\ Credentials.\ from_service_account_info(service_account_data, \ scopes=scopes) elif isinstance(service_account_data, str): credentials = service_account.\ Credentials.\ from_service_account_file(service_account_data, \ scopes=scopes) else: raise AttributeError("Only file location (str) or json (dict) are " \ "valid for --service_account_data.") if client_type == "bigquery": client = bigquery.Client(credentials=credentials, project=project) elif client_type == "pubsub": client = pubsub.PublisherClient(credentials=credentials) else: raise AttributeError("Only `bigquery` and `pubsub` are supported"\ " values for --client_type") _LOG.info("{} client built successfully.".format(client_type)) return client
def publish(self, message): """publsih method to send message to downstream queues""" if len(self.get_channels()) == 0: return data = self.encode(message) publisher = pubsub.PublisherClient() for channel in self.get_channels(): path = publisher.topic_path(self.PROJECT_ID, channel) publish_future = publisher.publish(path, data=data) publish_future.result()
def publish_tweets(keyword, messages): client = pubsub.PublisherClient() topic_path = client.topic_path(GOOGLE_CLOUD_PROJECT, PUB_SUB_TOPIC) for message in messages: data = message['tweet'].encode('utf-8') client.publish(topic_path, data=data, keyword=keyword, timestamp=message['timestamp'], location=message['location'])
def send(_type, id, action): pubsub = PubSub.PublisherClient() topic_name = "events-v1" attributes = {"type": _type, "id": id, "action": action} data = b"{}" return pubsub.publish(topic_name, data, **attributes)
def topic_path(): publisher_client = pubsub.PublisherClient() topic_path = publisher_client.topic_path(PROJECT, TOPIC) try: publisher_client.delete_topic(topic_path) except Exception: pass topic = publisher_client.create_topic(topic_path) yield topic.name publisher_client.delete_topic(topic_path)
def get_publisher(): """Get Google Pub/Sub publisher client.""" client = pubsub.PublisherClient() try: client.create_topic(TOPIC) except Exception as e: # already created pass return client
def tracking_article_view(article_id): # instantiate pubsub client credentials, gcp_project_id = google.auth.default() pubsub_client = pubsub.PublisherClient(credentials=credentials) # tracks the article clicked prior to redirecting the user user_id = check_or_set_user_id() redirect_url = track_click_and_get_url(gcp_project_id, pubsub_client, article_id, articles, user_id) return redirect(redirect_url)