Example #1
0
def fame_manage(request):
    """HTTP Google cloud function. Adds/Removes/Lists repos."""
    try:
        data = request.get_json()
        error_if_false(data, 'No payload')

        print('i %s' % str(data))

        command = data.get('command', None)
        error_if_false(Manage.is_valid(command),
                       'Invalid command %s' % command)

        user = data.get('user', None)
        error_if_false(user, 'User is required')

        configure_storage()
        topic = get_fame_pubsub_topic()  # Let it fail early.

        if command in [Manage.ADD, Manage.REMOVE]:
            owner = data.get('owner', None)
            error_if_false(owner, 'Repo owner is required')

            repo = data.get('repo', None)
            error_if_false(repo, 'Repo is required')

        tracker = RepoTracker()
        result = {'status': 'ok'}

        if command == Manage.ADD:
            tracker.configure(user, owner, repo)
            tracker.add()
            client = pubsub.PublisherClient()
            client.publish(topic,
                           b'',
                           command=Refresh.REFRESH,
                           user=user,
                           owner=owner,
                           repo=repo)
        elif command == Manage.REMOVE:
            tracker.configure(user, owner, repo)
            tracker.remove()
        elif command == Manage.LIST:
            directory = []
            for item in RepoTracker.list(user):
                modified = item.last_modified.strftime('%Y-%m-%dT%H:%M:%SZ')
                directory.append({
                    'user': item.user,
                    'owner': item.owner,
                    'repo': item.repo,
                    'status': item.status,
                    'last_modified': modified,
                    'message': item.error_message
                })
            result['data'] = directory

        return flask.jsonify(result)

    except Exception as e:
        print('e %s' % str(e))
        return make_error_response(400, str(e))
Example #2
0
def list_subscriptions_in_topic(project, topic_name):
    """Lists all subscriptions for a given topic."""
    subscriber = pubsub.PublisherClient()
    topic_path = subscriber.topic_path(project, topic_name)

    for subscription in subscriber.list_topic_subscriptions(topic_path):
        print(subscription)
Example #3
0
    def __init__(self, queue_name, sub_name=None, verbose=10):
        assert 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ.keys()
        with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as f:
            credentials = json.loads(f.read())

        project_name = credentials['project_id']
        self.logger = logging.getLogger(self.__class__.__name__)
        if verbose is not None:
            self.logger.setLevel(parse_verbosity(verbose))

        self.pubclient = pubsub.PublisherClient()
        self.subclient = pubsub.SubscriberClient()

        self.project = project_name
        self.topic_name = self.pubclient.topic_path(project_name, queue_name)
        self.logger.info("Topic name = {}".format(self.topic_name))
        try:
            self.pubtopic = self.pubclient.get_topic(self.topic_name)
        except BaseException as e:
            self.pubtopic = self.pubclient.create_topic(self.topic_name)
            self.logger.info('topic {} created'.format(self.topic_name))

        sub_name = sub_name if sub_name else queue_name + "_sub"
        self.logger.info("Topic name = {}".format(queue_name))
        self.logger.info("Subscription name = {}".format(sub_name))

        self.sub_name = self.subclient.subscription_path(
            project_name, sub_name)
        try:
            self.subclient.get_subscription(self.sub_name)
        except BaseException as e:
            self.logger.warn(e)
            self.subclient.create_subscription(self.sub_name, self.topic_name)

        self.logger.info('subscription {} created'.format(sub_name))
Example #4
0
def main(args: argparse.Namespace):
    """main entry point for backfill CLI."""
    gcs_client: storage.Client = storage.Client(client_info=CLIENT_INFO)
    pubsub_client = None
    suffix = args.success_filename
    if args.destination_regex:
        os.environ["DESTINATION_REGEX"] = args.destination_regex
    if args.mode == "NOTIFICATIONS":
        if not args.pubsub_topic:
            raise ValueError("when passing mode=NOTIFICATIONS"
                             "you must also pass pubsub_topic.")
        # import is here because this utility can be used without
        # google-cloud-pubsub dependency in LOCAL mode.
        # pylint: disable=import-outside-toplevel
        from google.cloud import pubsub
        pubsub_client = pubsub.PublisherClient()

    # These are all I/O bound tasks so use Thread Pool concurrency for speed.
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_gsurl = {}
        for blob in find_blobs_with_suffix(gcs_client, args.gcs_path, suffix):
            if pubsub_client:
                # kwargs are message attributes
                # https://googleapis.dev/python/pubsub/latest/publisher/index.html#publish-a-message
                logging.info("sending pubsub message for: %s",
                             f"gs://{blob.bucket.name}/{blob.name}")
                future_to_gsurl[executor.submit(
                    pubsub_client.publish,
                    args.pubsub_topic,
                    b'',  # cloud function ignores message body
                    bucketId=blob.bucket.name,
                    objectId=blob.name,
                    _metaInfo="this message was submitted with "
                    "gcs_ocn_bq_ingest backfill.py utility"
                )] = f"gs://{blob.bucket.name}/{blob.name}"
            else:
                logging.info("running  cloud function locally for: %s",
                             f"gs://{blob.bucket.name}/{blob.name}")
                future_to_gsurl[executor.submit(
                    gcs_ocn_bq_ingest.main.main,
                    {
                        "attributes": {
                            "bucketId": blob.bucket.name,
                            "objectId": blob.name
                        }
                    },
                    None,
                )] = f"gs://{blob.bucket.name}/{blob.name}"
        exceptions: Dict[str, Exception] = dict()
        for future in concurrent.futures.as_completed(future_to_gsurl):
            gsurl = future_to_gsurl[future]
            try:
                future.result()
            except Exception as err:  # pylint: disable=broad-except
                logging.error("Error processing %s: %s", gsurl, err)
                exceptions[gsurl] = err
        if exceptions:
            raise RuntimeError("The following errors were encountered:\n" +
                               pprint.pformat(exceptions))
Example #5
0
def main():
    sc_client = Socrata(SOCRATA_DOMAIN, SOCRATA_APP_TOKEN)
    publisher = pubsub.PublisherClient()
    data_list = sc_client.get(SOCRATA_DATASET_IDENTIFIER, limit=LIMIT)
    for data in data_list:
        publisher.publish(TOPIC, json.dumps(data))
    print('completed to publish data as of {0:%Y%m%d_%H%M%S}'.format(
        datetime.now()))
def _infinite_publish_job(topic_path):
    publisher_client = pubsub.PublisherClient()
    while True:
        future = publisher_client.publish(
            topic_path,
            b'{"url": "https://beam.apache.org/", "review": "positive"}')
        future.result()
        time.sleep(1)
Example #7
0
def main():
    with open('youtube-ids') as f:
        ids = [s.strip() for s in f.readlines()]

    publisher = pubsub.PublisherClient()
    topic = 'projects/{}/topics/{}'.format(PROJECT, TOPIC)
    for id in tqdm(ids):
        publisher.publish(topic, id)
Example #8
0
def send_message(message, project_id, topic):
    data = json.dumps(message)
    data = data.encode('utf-8')
    publisher = pubsub.PublisherClient()

    topic_path = publisher.topic_path(project_id, topic)

    publisher.publish(topic_path, data=data)
Example #9
0
def send_to_pubsub(topic_name, data, project_id):
    from google.cloud import pubsub

    publisher = pubsub.PublisherClient()
    topic_path = publisher.topic_path(project_id, topic_name)
    data = data.encode('utf-8')
    future = publisher.publish(topic_path, data=data)
    print('Published {} of message ID {}.'.format(data, future.result()))
Example #10
0
    def execute(self, context):
        date = self.execution_date.format(**context)

        clean_and_filtered_episodes = self.clean_and_filter_dataset(date)

        if len(clean_and_filtered_episodes) > 0:
            publisher = pubsub.PublisherClient()
            self.simulate_streaming(clean_and_filtered_episodes, publisher)
def test_topic():
    topic = manager.create_iot_topic(project_id, topic_id)

    yield topic

    pubsub_client = pubsub.PublisherClient()
    topic_path = pubsub_client.topic_path(project_id, topic_id)
    pubsub_client.delete_topic(topic_path)
Example #12
0
def late_dpi_to_topic(row):
    # Write row to pubsub output topic
    publisher = pubsub.PublisherClient()
    topic_url = 'projects/{project_id}/topics/{topic}'.format(
        project_id='big-data-env',
        topic='late-dpi-topic',
    )
    publisher.publish(topic_url, ','.join([str(row.window.start), str(row.window.end), row.number[-4:], str(row['count'])]).encode('utf-8'))
Example #13
0
def publish_message(project_name, topic_name, data):
  try:
	publisher = pubsub.PublisherClient()	
	topic = 'projects/' + project_name + '/topics/' + topic_name
	publisher.publish(topic, data, placeholder='')
	print data
  except:
	print "There was an error publishing weather data."
Example #14
0
def main():
    publisher = pubsub.PublisherClient()
    topic_s = 'projects/{}/topics/{}'.format(PROJECT, TOPIC)
    tkrs_l = [
        'FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG', 'IBM', 'SPY', 'WMT', 'MSFT',
        'XOM', 'ORCL', 'NVDA'
    ]
    for id in tqdm(tkrs_l):
        publisher.publish(topic_s, 'You should predict: ' + id)
Example #15
0
def publishWeather(sensor, weatherData):
    latitude=sensor['Location'].latitude
    longitude=sensor['Location'].longitude
    publisher = pubsub.PublisherClient()
    topic = 'projects/ml-demo-212200/topics/iot-topic'.format(
        project_id=os.getenv('GOOGLE_CLOUD_PROJECT'),
        topic='MY_TOPIC_NAME',  # Set this to something appropriate.
    )
    publisher.publish(topic, json.dumps(weatherData), projectId='ml-demo-212200', deviceRegistryId='iot-registry', deviceNumId='000000000000000', deviceId='esp32_virtual', deviceRegistryLocation='us-central1')
Example #16
0
    def __init__(self, jobs_denylist=None, jobs_allowlist=None):
        """Initialization for PSQ Worker.

    Args:
      jobs_denylist (Optional[list[str]]): Jobs we will exclude from running
      jobs_allowlist (Optional[list[str]]): The only Jobs we will include to run
    """
        setup()
        psq_publisher = pubsub.PublisherClient()
        psq_subscriber = pubsub.SubscriberClient()
        datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
        try:
            self.psq = psq.Queue(
                psq_publisher,
                psq_subscriber,
                config.TURBINIA_PROJECT,
                name=config.PSQ_TOPIC,
                storage=psq.DatastoreStorage(datastore_client))
        except exceptions.GoogleCloudError as e:
            msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
            log.error(msg)
            raise TurbiniaException(msg)

        # Deregister jobs from denylist/allowlist.
        job_manager.JobsManager.DeregisterJobs(jobs_denylist, jobs_allowlist)
        disabled_jobs = list(
            config.DISABLED_JOBS) if config.DISABLED_JOBS else []
        disabled_jobs = [j.lower() for j in disabled_jobs]
        # Only actually disable jobs that have not been allowlisted.
        if jobs_allowlist:
            disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist))
        if disabled_jobs:
            log.info(
                'Disabling non-allowlisted jobs configured to be disabled in the '
                'config file: {0:s}'.format(', '.join(disabled_jobs)))
            job_manager.JobsManager.DeregisterJobs(jobs_denylist=disabled_jobs)

        # Check for valid dependencies/directories.
        dependencies = config.ParseDependencies()
        if config.DOCKER_ENABLED:
            try:
                check_docker_dependencies(dependencies)
            except TurbiniaException as e:
                log.warning(
                    "DOCKER_ENABLED=True is set in the config, but there is an error checking for the docker daemon: {0:s}"
                ).format(str(e))
        check_system_dependencies(dependencies)
        check_directory(config.MOUNT_DIR_PREFIX)
        check_directory(config.OUTPUT_DIR)
        check_directory(config.TMP_DIR)
        register_job_timeouts(dependencies)

        jobs = job_manager.JobsManager.GetJobNames()
        log.info('Dependency check complete. The following jobs are enabled '
                 'for this worker: {0:s}'.format(','.join(jobs)))
        log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
        self.worker = psq.Worker(queue=self.psq)
Example #17
0
def waitOnFunctionsStart(splitFiles,
                         recv_topic,
                         project_id,
                         bucket,
                         topicId,
                         work_dir,
                         alignsDir,
                         uploadDir,
                         startTimeout,
                         finishTimeout,
                         checkInterval=30):
    reinvokeFutures = {}
    waitStartTime = timer()
    #for reinvoke
    publisher = pubsub.PublisherClient()
    topic_path = createTopic(publisher, project_id, topicId)
    fullUploadDir = os.path.join(work_dir, uploadDir)
    #create client to read/write to pubsub queue
    client = pubsub.SubscriberClient()
    subscription_path = client.subscription_path(project_id, recv_topic)
    recv_path = client.topic_path(project_id, recv_topic)
    create_subscription(client, subscription_path, recv_path)
    storageClient = storage.Client()
    waitIntervalTime = timer()
    unstartedSplitFiles = splitFiles
    #unstartedSplitFiles=listFunctionsNotStarted(splitFiles,bucket,work_dir,uploadDir)
    #wait on start
    while (not checkAllFunctionsStarted(unstartedSplitFiles, storageClient,
                                        bucket, work_dir, uploadDir)
           and timer() - waitStartTime < startTimeout):
        streaming_pull_future = client.subscribe(
            subscription_path,
            callback=lambda message: recv_callback(
                message, publisher, topic_path, bucket, work_dir, recv_topic,
                fullUploadDir, project_id, reinvokeFutures))
        try:
            if streaming_pull_future:
                streaming_pull_future.result(timeout=10)
        except:  # noqa
            streaming_pull_future.cancel()
            if timer() - waitIntervalTime > checkInterval:
                sys.stderr.write(
                    "Checking start functions at time (queue empty) {}\n".
                    format(timer() - waitStartTime))
                waitIntervalTime = timer()
                unstartedSplitFiles = listFunctionsNotStarted(
                    unstartedSplitFiles, bucket, work_dir, uploadDir)
    unstartedSplitFiles = listFunctionsNotStarted(unstartedSplitFiles, bucket,
                                                  work_dir, uploadDir)
    if unstartedSplitFiles:
        for unstartedSplitFile in unstartedSplitFiles:
            sys.stderr.write('{} not started\n'.format(unstartedSplitFile))
        else:
            sys.stderr.write(
                'Time after last message for functions to start is {}\n'.
                format(timer() - waitStartTime))
    return unstartedSplitFiles
def pubsub_client():
    """Google Cloud PubSub client"""
    try:
        from google.cloud import pubsub

        return pubsub.PublisherClient()
    except Exception:
        LOGGER.exception("unable to initialise PubSub client")
    return None
Example #19
0
def create_clients() -> None:
    global __subs_client
    global __pubs_client

    if __subs_client is None:
        __subs_client = pubsub.SubscriberClient()

    if __pubs_client is None:
        __pubs_client = pubsub.PublisherClient()
Example #20
0
    def process_item(self, item, spider):
        """We need to establish a an authorized connection to Google Cloud in order to upload to Google Pub/Sub.
        In order to host the spiders on Github, the service account credentials are housed on the Scrapy platform
        and dynamically created in the script."""

        # Pull all of the credential info from the Scrapy platform into a dictionary.
        cred_dict = {
            "auth_provider_x509_cert_url":
            spider.settings.get('auth_provider_x509_cert_url'),
            "auth_uri":
            spider.settings.get('auth_uri'),
            "client_email":
            spider.settings.get('client_email'),
            "client_id":
            spider.settings.get('client_id'),
            "client_x509_cert_url":
            spider.settings.get('client_x509_cert_url'),
            "private_key":
            spider.settings.get('private_key'),
            "private_key_id":
            spider.settings.get('private_key_id'),
            "project_id":
            spider.settings.get('project_id'),
            "token_uri":
            spider.settings.get('token_uri'),
            "type":
            spider.settings.get('account_type')
        }
        logging.info('Credentials downloaded from Scrapy server.')
        cred_dict['private_key'] = cred_dict['private_key'].replace(
            '\\n', '\n')

        # Build a Credentials object from the above dictionary. This will properly allow access as part of a
        # Google Cloud Client.
        logging.info('Credentials object created.')
        credentials = service_account.Credentials.from_service_account_info(
            cred_dict)

        # Create Publisher client.
        publisher = pubsub.PublisherClient(credentials=credentials)
        logging.info('Publisher Client created.')

        # Set location of proper publisher topic
        project_id = 'politics-data-tracker-1'
        topic_name = 'senate_pols'
        topic_path = publisher.topic_path(project_id, topic_name)
        data = u'This is a representative in the Senate.'  #Consider how to better use this.
        data = data.encode('utf-8')
        publisher.publish(topic_path,
                          data=data,
                          first_name=item['first_name'],
                          last_name=item['last_name'],
                          party=item['party'],
                          state=item['state'])
        logging.info('Published item: {0}'.format(item))

        return item
Example #21
0
class TweetStreamListener(tweepy.StreamListener):
    """
    A listener handles tweets that are received from the stream.
    This listener dumps the tweets into a Pub/Sub topic.
    """
    client = pubsub.PublisherClient()
    pubsub_topic = client.topic_path(GCP_PROJECT_NAME, PUBSUB_TOPIC_NAME)
    count = 0
    tweets = []
    batch_size = 1  #process-tweets.py file set up to handle one tweet at a time!
    total_tweets = TOTAL_TWEETS

    def write_to_pubsub(self, tweets):
        publish(self.client, self.pubsub_topic, tweets)

    def on_status(self, status):

        created_at = status.created_at.isoformat()
        id_str = status.id_str
        text = status.text
        source = status.source
        user_name = status.user.name
        user_screen_name = status.user.screen_name
        loc = status.user.location
        coords = status.coordinates
        lang = status.user.lang
        bio = status.user.description

        tw = dict(text=text,
                  bio=bio,
                  created_at=created_at,
                  tweet_id=id_str,
                  location=loc,
                  user_name=user_name,
                  user_screen_name=user_screen_name,
                  source=source,
                  coords=coords,
                  lang=lang)

        self.tweets.append(tw)

        if len(self.tweets) >= self.batch_size:
            self.write_to_pubsub(self.tweets)
            # print(self.tweets) # <- For debugging
            self.tweets = []

        self.count += 1
        if self.count >= self.total_tweets:
            return False
        if (self.count % 100) == 0:
            print("count is: {} at {}".format(self.count,
                                              datetime.datetime.now()))
            # Status message every 100 tweets.
        return True

    def on_error(self, status_code):
        print('ERROR:{}'.format(status_code))
Example #22
0
def get_publisher():
    client = pubsub.PublisherClient()
    try:
        client.create_topic(TOPIC)
    except Exception as e:
        # already created
        pass

    return client
Example #23
0
def main():

    # Publishes the message 'Hello World'
    publisher = pubsub.PublisherClient()
    topic = 'projects/{}/topics/{}'.format(PROJECT, TOPIC)
    for asset in data_settings.ASSET_LIST:
        asset = asset.encode('utf-8')
        message_future = publisher.publish(topic, data=asset)
        message_future.add_done_callback(pub_callback)
Example #24
0
def build_client(service_account_data=None,
                 scopes=None,
                 project=None,
                 client_type="bigquery"):
    """Builds bigquery or pubsub client

    Parameters
    ----------
    service_account_data : str or dict
        Your service account data as filename or dict (default is
        value of global SERVICE_ACCOUNT_DICT)
    scopes : list, optional
        GCP execution scopes (default is
        value of SERVICE_ACCOUNT_SCOPES)
    project : str, optional
        The GCP project
    client_type : str, optional
        Either `bigquery` or `pubsub` (default is `bigquery`)

    Returns
    -------
    google.cloud client
        Either `bigquery` or `pubsub` client
    """

    project = project or GCP_PROJECT
    service_account_data = service_account_data or SERVICE_ACCOUNT_DICT
    scopes = scopes or SERVICE_ACCOUNT_SCOPES

    if isinstance(service_account_data, dict):
        credentials = service_account.\
                      Credentials.\
                      from_service_account_info(service_account_data, \
                                                scopes=scopes)

    elif isinstance(service_account_data, str):
        credentials = service_account.\
                      Credentials.\
                      from_service_account_file(service_account_data, \
                                                scopes=scopes)

    else:
        raise AttributeError("Only file location (str) or json (dict) are " \
                              "valid for --service_account_data.")

    if client_type == "bigquery":
        client = bigquery.Client(credentials=credentials, project=project)
    elif client_type == "pubsub":
        client = pubsub.PublisherClient(credentials=credentials)
    else:
        raise AttributeError("Only `bigquery` and `pubsub` are supported"\
                             " values for --client_type")

    _LOG.info("{} client built successfully.".format(client_type))

    return client
Example #25
0
 def publish(self, message):
     """publsih method to send message to downstream queues"""
     if len(self.get_channels()) == 0:
         return
     data = self.encode(message)
     publisher = pubsub.PublisherClient()
     for channel in self.get_channels():
         path = publisher.topic_path(self.PROJECT_ID, channel)
         publish_future = publisher.publish(path, data=data)
         publish_future.result()
Example #26
0
def publish_tweets(keyword, messages):
    client = pubsub.PublisherClient()
    topic_path = client.topic_path(GOOGLE_CLOUD_PROJECT, PUB_SUB_TOPIC)
    for message in messages:
        data = message['tweet'].encode('utf-8')
        client.publish(topic_path,
                       data=data,
                       keyword=keyword,
                       timestamp=message['timestamp'],
                       location=message['location'])
Example #27
0
def send(_type, id, action):

    pubsub = PubSub.PublisherClient()
    topic_name = "events-v1"

    attributes = {"type": _type, "id": id, "action": action}

    data = b"{}"

    return pubsub.publish(topic_name, data, **attributes)
Example #28
0
def topic_path():
    publisher_client = pubsub.PublisherClient()
    topic_path = publisher_client.topic_path(PROJECT, TOPIC)
    try:
        publisher_client.delete_topic(topic_path)
    except Exception:
        pass
    topic = publisher_client.create_topic(topic_path)
    yield topic.name
    publisher_client.delete_topic(topic_path)
Example #29
0
def get_publisher():
    """Get Google Pub/Sub publisher client."""
    client = pubsub.PublisherClient()
    try:
        client.create_topic(TOPIC)
    except Exception as e:
        # already created
        pass

    return client
Example #30
0
def tracking_article_view(article_id):
    # instantiate pubsub client
    credentials, gcp_project_id = google.auth.default()
    pubsub_client = pubsub.PublisherClient(credentials=credentials)
    
    # tracks the article clicked prior to redirecting the user
    user_id = check_or_set_user_id()
    redirect_url = track_click_and_get_url(gcp_project_id, pubsub_client, article_id, articles, user_id)

    return redirect(redirect_url)