def _backend_setup(self, server=True, *args, **kwargs): """ Args: server (bool): Whether this is the client or a server Raises: TurbiniaException: When there are errors creating PSQ Queue """ log.debug( 'Setting up PSQ Task Manager requirements on project {0:s}'.format( config.TURBINIA_PROJECT)) self.server_pubsub = turbinia_pubsub.TurbiniaPubSub( config.PUBSUB_TOPIC) if server: self.server_pubsub.setup_subscriber() else: self.server_pubsub.setup_publisher() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise turbinia.TurbiniaException(msg)
def get_visitors_queue(): client = pubsub.Client(project=current_app.config['PROJECT_ID']) # Create a queue specifically for processing books and pass in the # Flask application context. This ensures that tasks will have access # to any extensions / configuration specified to the app, such as # models. return psq.Queue(client, 'books', extra_context=current_app.app_context)
def get_daily_fetch_queue(): #current_appを使うのでwith app.app_context():内で呼ばれる必要がある。 project = current_app.config['PROJECT_ID'] return psq.Queue(publisher_client, subscriber_client, project, 'daily_fetch_queue', extra_context=current_app.app_context)
def __init__(self, jobs_denylist=None, jobs_allowlist=None): """Initialization for PSQ Worker. Args: jobs_denylist (Optional[list[str]]): Jobs we will exclude from running jobs_allowlist (Optional[list[str]]): The only Jobs we will include to run """ setup() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) # Deregister jobs from denylist/allowlist. job_manager.JobsManager.DeregisterJobs(jobs_denylist, jobs_allowlist) disabled_jobs = list( config.DISABLED_JOBS) if config.DISABLED_JOBS else [] disabled_jobs = [j.lower() for j in disabled_jobs] # Only actually disable jobs that have not been allowlisted. if jobs_allowlist: disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist)) if disabled_jobs: log.info( 'Disabling non-allowlisted jobs configured to be disabled in the ' 'config file: {0:s}'.format(', '.join(disabled_jobs))) job_manager.JobsManager.DeregisterJobs(jobs_denylist=disabled_jobs) # Check for valid dependencies/directories. dependencies = config.ParseDependencies() if config.DOCKER_ENABLED: try: check_docker_dependencies(dependencies) except TurbiniaException as e: log.warning( "DOCKER_ENABLED=True is set in the config, but there is an error checking for the docker daemon: {0:s}" ).format(str(e)) check_system_dependencies(dependencies) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) register_job_timeouts(dependencies) jobs = job_manager.JobsManager.GetJobNames() log.info('Dependency check complete. The following jobs are enabled ' 'for this worker: {0:s}'.format(','.join(jobs))) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def get_moviess_queue(): project = current_app.config['PROJECT_ID'] # Create a queue specifically for processing moviess and pass in the # Flask application context. This ensures that tasks will have access # to any extensions / configuration specified to the app, such as # models. return psq.Queue(publisher_client, subscriber_client, project, 'moviess', extra_context=current_app.app_context)
def __init__(self, jobs_blacklist=None, jobs_whitelist=None): """Initialization for PSQ Worker. Args: jobs_blacklist (Optional[list[str]]): Jobs we will exclude from running jobs_whitelist (Optional[list[str]]): The only Jobs we will include to run """ config.LoadConfig() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) # Deregister jobs from blacklist/whitelist. disabled_jobs = list( config.DISABLED_JOBS) if config.DISABLED_JOBS else [] job_manager.JobsManager.DeregisterJobs(jobs_blacklist, jobs_whitelist) if disabled_jobs: log.info( 'Disabling jobs that were configured to be disabled in the ' 'config file: {0:s}'.format(', '.join(disabled_jobs))) job_manager.JobsManager.DeregisterJobs( jobs_blacklist=disabled_jobs) # Check for valid dependencies/directories. dependencies = config.ParseDependencies() if config.DOCKER_ENABLED: check_docker_dependencies(dependencies) check_system_dependencies(dependencies) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) jobs = job_manager.JobsManager.GetJobNames() log.info( 'Dependency check complete. The following jobs will be enabled ' 'for this worker: {0:s}'.format(','.join(jobs))) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def _backend_setup(self): log.debug( 'Setting up PSQ Task Manager requirements on project {0:s}'.format( config.PROJECT)) self.server_pubsub = turbinia_pubsub.TurbiniaPubSub(config.PUBSUB_TOPIC) self.server_pubsub.setup() psq_pubsub_client = pubsub.Client(project=config.PROJECT) datastore_client = datastore.Client(project=config.PROJECT) try: self.psq = psq.Queue( psq_pubsub_client, config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except GaxError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise turbinia.TurbiniaException(msg)
def _backend_setup(self): psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def __init__(self, *_, **__): """Initialization for PSQ Worker.""" config.LoadConfig() psq_publisher = pubsub.PublisherClient() psq_subscriber = pubsub.SubscriberClient() datastore_client = datastore.Client(project=config.TURBINIA_PROJECT) try: self.psq = psq.Queue( psq_publisher, psq_subscriber, config.TURBINIA_PROJECT, name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client)) except exceptions.GoogleCloudError as e: msg = 'Error creating PSQ Queue: {0:s}'.format(str(e)) log.error(msg) raise TurbiniaException(msg) check_directory(config.MOUNT_DIR_PREFIX) check_directory(config.OUTPUT_DIR) check_directory(config.TMP_DIR) log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name)) self.worker = psq.Worker(queue=self.psq)
def scrape_reddit_task(subreddit, pages=20): for image_urls in scrape_reddit(subreddit, pages): q = psq.Queue(pubsub.Client(), 'images') q.enqueue('main.label_images_task', image_urls) print("Enqueued {} images".format(len(image_urls)))
response = vision.detect_labels(image_contents) for image_url, labels in zip(image_urls, response): storage.add_labels(labels) storage.add_image(image_url, labels) def label_images_task(image_urls): vision = VisionApi() storage = Storage() label_images(vision, storage, image_urls) def scrape_reddit(subreddit, pages=10): after = None for _ in range(pages): posts, after = reddit.get_hot('aww', after=after) yield reddit.get_previews(posts) def scrape_reddit_task(subreddit, pages=20): for image_urls in scrape_reddit(subreddit, pages): q = psq.Queue(pubsub.Client(), 'images') q.enqueue('main.label_images_task', image_urls) print("Enqueued {} images".format(len(image_urls))) q = psq.Queue(pubsub.Client(), 'images')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from gcloud import datastore, pubsub import psq import tasks PROJECT_ID = 'your-project-id' pubsub_client = pubsub.Client(project=PROJECT_ID) datastore_client = datastore.Client(project=PROJECT_ID) q = psq.Queue( pubsub_client, storage=psq.DatastoreStorage(datastore_client)) def main(): q.enqueue(tasks.slow_task) q.enqueue(tasks.print_task, "Hello, World") r = q.enqueue(tasks.adder, 1, 5) print(r.result(timeout=10)) if __name__ == '__main__': main()
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from google.cloud import datastore, pubsub import psq import tasks PROJECT_ID = 'junyiacademytest1' pubsub_client = pubsub.Client(project=PROJECT_ID) datastore_client = datastore.Client(project=PROJECT_ID) storage = psq.DatastoreStorage(datastore_client) q = psq.Queue(pubsub_client, storage=storage) def main(): r = q.enqueue(tasks.adder, 1, 5) print(r.result(timeout=10)) storage.delete_task(r.task_id) if __name__ == '__main__': main()
import logging from google.cloud import datastore from google.cloud import pubsub_v1 import psq import tasks PROJECT_ID = 'your-project-id' # CHANGE ME publisher_client = pubsub_v1.PublisherClient() subscriber_client = pubsub_v1.SubscriberClient() datastore_client = datastore.Client(project=PROJECT_ID) q = psq.Queue(publisher_client, subscriber_client, PROJECT_ID, storage=psq.DatastoreStorage(datastore_client)) def main(): q.enqueue(tasks.slow_task) q.enqueue(tasks.print_task, "Hello, World") r = q.enqueue(tasks.adder, 1, 5) print(r.result(timeout=10)) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) main()
def get_books_queue(): ps_client = pubsub.Client(project=current_app.config['PROJECT_ID']) return psq.Queue(ps_client, extra_context=current_app.app_context)
def get_scraper_queue(): project = current_app.config['PROJECT_ID'] return psq.Queue( publisher_client, subscriber_client, project, 'nogizaka_scraper', extra_context=current_app.app_context)