def update_celery(self, new_celery: celery.Celery) -> None: if self.app: self.celery.__dict__.update(vars(new_celery)) self.celery.conf.update(self.app.config.get_namespace("CELERY_")) worker_process_init.connect(self._worker_process_init) task_postrun.connect(self._task_postrun) task_prerun.connect(self._task_prerun)
def init_app(self, app, sentry=None): self.app = app new_celery = celery.Celery( app.import_name, broker=app.config['CELERY_BROKER_URL'], backend=app.config['CELERY_RESULT_BACKEND'], ) self.celery.__dict__.update(vars(new_celery)) self.celery.conf.update(app.config) worker_process_init.connect(self._worker_process_init) task_postrun.connect(self._task_postrun) task_prerun.connect(self._task_prerun)
def init_app(self, app): self.app = app new_celery = celery.Celery( app.import_name, broker=app.config["CELERY_BROKER_URL"], backend=app.config["CELERY_RESULT_BACKEND"], ) # XXX(dcramer): why the hell am I wasting time trying to make Celery work? self.celery.__dict__.update(vars(new_celery)) self.celery.conf.update(app.config) worker_process_init.connect(self._worker_process_init) task_postrun.connect(self._task_postrun) task_prerun.connect(self._task_prerun)
import django from celery import platforms from celery.signals import worker_process_init from celery.task import task @task def placeholder(): raise NotImplementedError def cleanup_after_tasks(signum, frame): django.setup() def install_pool_process_sighandlers(**kwargs): platforms.signals["TERM"] = cleanup_after_tasks platforms.signals["INT"] = cleanup_after_tasks worker_process_init.connect(install_pool_process_sighandlers)
specifiy that it's ok to overrite certain files ''' assert ( raw_doc and normalized ), 'Raw and normalized documents must be provided to process_normalized' for p in settings.NORMALIZED_PROCESSING: extras = kwargs.get(p, {}) get_processor(p).process_normalized(raw_doc, normalized, **extras) def process_raw(raw_doc, kwargs): assert raw_doc, 'A raw document must be provided to process_raw' for p in settings.RAW_PROCESSING: extras = kwargs.get(p, {}) get_processor(p).process_raw(raw_doc, **extras) HarvesterResponse = get_processor( settings.RESPONSE_PROCESSOR).HarvesterResponseModel all_processors = list( map( get_processor, list( set(settings.NORMALIZED_PROCESSING + settings.RAW_PROCESSING + [settings.RESPONSE_PROCESSOR])))) for processor in all_processors: processor.manager.setup() worker_process_init.connect(processor.manager.celery_setup)
The initialization of Mako templating is usually done when Django is initializing middleware packages as part of processing a server request. When this is run on a celery worker server, no such initialization is called. To make sure that we don't load this twice (just in case), we look for the result: the defining of the lookup paths for templates. """ if 'main' not in middleware.lookup: TASK_LOG.info("Initializing Mako middleware explicitly") middleware.MakoMiddleware() # Actually make the call to define the hook: worker_process_init.connect(initialize_mako) class UpdateProblemModuleStateError(Exception): """ Error signaling a fatal condition while updating problem modules. Used when the current module cannot be processed and no more modules should be attempted. """ pass def _get_current_task(): """Stub to make it easier to test without actually running Celery""" return current_task
self._setup = False def clear_keyspace(self, force=False): assert force, 'clear_keyspace must be called with force' assert self.keyspace != settings.CASSANDRA_KEYSPACE, 'Cannot erase the keyspace in settings' management.delete_keyspace(self.keyspace) self.tear_down() return self.setup() def register_model(self, model): model. __keyspace__ = self.keyspace self._models.append(model) if self._setup: management.sync_table(model) return model def celery_setup(self, *args, **kwargs): self.tear_down() self.setup() _manager = DatabaseManager() setup = _manager.setup tear_down = _manager.tear_down register_model = _manager.register_model worker_process_init.connect(_manager.celery_setup)
n += 1 logger.info('request type: %s', type(self.request)) logger.error('{0} Request: {1!r}'.format(n, self.request)) def update_loglevel(*args, **kwargs): app.log.redirect_stdouts(loglevel='INFO') # it's not at all clear to me why these # two signals work, or the correct timing at # which to call the function to redirect the # stdouts, but this worked, so I felt it # was wise to just go with it . . . after_setup_logger.connect(update_loglevel) worker_process_init.connect(update_loglevel) from djenga.celery.utils import auto_step @auto_step(key=1) def fly_to_the_moon(self): pass @auto_step(key=2) def shrink_the_moon(self): pass @auto_step(key=3)
MausConfiguration from the master process. @param kwargs Arguments - unused. """ logger = logging.getLogger(__name__) if logger.isEnabledFor(logging.INFO): logger.info("Setting MAUS ErrorHandler to raise exceptions") ErrorHandler.DefaultHandler().on_error = 'raise' if not maus_cpp.globals.has_instance() and \ MausConfiguration.configuration != '{}': maus_cpp.globals.birth(MausConfiguration.configuration) MausTransform.initialize(MausConfiguration.transform) MausTransform.birth(MausConfiguration.configuration) # Bind the callback method to the Celery worker_process_init signal. worker_process_init.connect(worker_process_init_callback) def process_birth(pids, config_id, transform, configuration, run_number): """ Create and birth a new transform. This is invoked in a sub-process via a call from the Celery master process. Any existing transform is death-ed first. @param pids List of process IDs whose process_birth method has been invoked. If this process is in the list then this method just returns (PID, None). @param config_id Configuration ID from client. @param transform Either a single name can be given - representing a single transform - or a list of transforms - representing a MapPyGroup. Sub-lists are treated as nested MapPyGroups. If None then the current transform is deathed and rebirthed.
self.handler.process_message('[STDERR]Operation timed out') self.on_term() if is_test: self.on_failure() self.after_return() raise if is_test: self.on_success() self.after_return() return result def _initialize_process(**kwargs): """Initialize the the octave instance. Function to be called when a worker process is spawned. We use this to opportunity to actually launch octave and execute a quick MATL program """ global octave octave = OctaveSession(octaverc=config.OCTAVERC, paths=[config.MATL_WRAP_DIR]) # When a worker process is spawned, initialize octave worker_process_init.connect(_initialize_process)
def mark_celery_running(): """ marks celery as running by setting the `IS_CELERY_RUNNING` flag in the `django.conf` `settings` object. """ worker_process_init.connect(_mark_celery_running)
def monitor_report_config(): boot_cmd = " ".join(sys.argv) if "celery worker" in boot_cmd: try: q_conf_index = sys.argv.index("-Q") except ValueError as e: sys.stdout.write( "[!]can't found -Q option in command: %s, skip celery monitor report config: %s\n" % (boot_cmd, e)) return try: queues = sys.argv[q_conf_index + 1] except IndexError as e: sys.stdout.write( "[!]can't found -Q value in command: %s, skip celery monitor report config: %s\n" % (boot_cmd, e)) return # 只对存在以下队列的情况进行上报 monitor_queues = ["er_execute", "er_schedule", "timeout_node"] if not any( [monitor_queue in queues for monitor_queue in monitor_queues]): sys.stdout.write( "[!]can't found er queue in command: %s, skip celery monitor report config\n" % boot_cmd) return from bk_monitor_report import MonitorReporter # noqa from bk_monitor_report.contrib.celery import MonitorReportStep # noqa from blueapps.core.celery import celery_app # noqa reporter = MonitorReporter( data_id=env.BK_MONITOR_REPORT_DATA_ID, # 监控 Data ID access_token=env.BK_MONITOR_REPORT_ACCESS_TOKEN, # 自定义上报 Token target=env.BK_MONITOR_REPORT_TARGET, # 上报唯一标志符 url=env.BK_MONITOR_REPORT_URL, # 上报地址 report_interval=env.BK_MONITOR_REPORT_INTERVAL, # 上报周期,秒 ) # 针对多进程worker需要做特殊梳理,在worker进程中进行reporter start prefork_config_check = [("-P", "-P prefork"), ("--pool", "--pool=prefork")] if any([ config[0] in boot_cmd and config[1] not in boot_cmd for config in prefork_config_check ]): MonitorReportStep.setup_reporter(reporter) celery_app.steps["worker"].add(MonitorReportStep) else: from celery.signals import worker_process_init # noqa worker_process_init.connect(reporter.start, weak=False) elif "gunicorn wsgi" or "node_timeout_process" in boot_cmd: from bk_monitor_report import MonitorReporter # noqa reporter = MonitorReporter( data_id=env.BK_MONITOR_REPORT_DATA_ID, # 监控 Data ID access_token=env.BK_MONITOR_REPORT_ACCESS_TOKEN, # 自定义上报 Token target=env.BK_MONITOR_REPORT_TARGET, # 上报唯一标志符 url=env.BK_MONITOR_REPORT_URL, # 上报地址 report_interval=env.BK_MONITOR_REPORT_INTERVAL, # 上报周期,秒 ) reporter.start() else: sys.stdout.write( "[!]unknown boot cmd: %s, skip monitor report config\n" % boot_cmd)
app = Celery('tasks', backend='redis://localhost:6379/0', broker='amqp://') app.config_from_object('celeryconfig') # This will force each child process to have a sep def _redo_uuid(**kwargs): import openpathsampling.netcdfplus as npl # logger.info('OLD UUID `%s`' % npl.StorableObject.INSTANCE_UUID) npl.StorableObject.initialize_uuid() logger.info('NEW UUID `%s`' % npl.StorableObject.INSTANCE_UUID) # need to use registered instance for sender argument. worker_process_init.connect(_redo_uuid) @app.task def add(x, y): return x + y @app.task def generate(engine, template, ensemble): engine.initialize('CPU') traj = engine.generate(template, ensemble.can_append) return traj @app.task
from __future__ import absolute_import from celery import Celery from app import config from celery.app.log import Logging from celery.signals import worker_process_init import logging import testcfg as cfg from app.init import worker_init # initialize worker worker_init() celery = Celery(include=['app.sdk_client_tasks','app.rest_client_tasks','app.workload_manager','app.stats','app.admin_manager']) celery.config_from_object(config) # setup celery process logger log = Logging(celery) log.setup(logfile=cfg.LOGDIR+'/celery-proc.log') def stats_tasks_setup_logging(**kw): logger = logging.getLogger('app.stats') handler = logging.FileHandler(cfg.LOGDIR+'/celery-stats.log') formatter = logging.Formatter(logging.BASIC_FORMAT) # you may want to customize this. handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False worker_process_init.connect(stats_tasks_setup_logging)
process, the sub-process will always inherit the latest version of MausConfiguration from the master process. @param kwargs Arguments - unused. """ logger = logging.getLogger(__name__) if logger.isEnabledFor(logging.INFO): logger.info("Setting MAUS ErrorHandler to raise exceptions") ErrorHandler.DefaultHandler().on_error = 'raise' if not maus_cpp.globals.has_instance() and \ MausConfiguration.configuration != '{}': maus_cpp.globals.birth(MausConfiguration.configuration) MausTransform.initialize(MausConfiguration.transform) MausTransform.birth(MausConfiguration.configuration) # Bind the callback method to the Celery worker_process_init signal. worker_process_init.connect(worker_process_init_callback) def process_birth(pids, config_id, transform, configuration, run_number): """ Create and birth a new transform. This is invoked in a sub-process via a call from the Celery master process. Any existing transform is death-ed first. @param pids List of process IDs whose process_birth method has been invoked. If this process is in the list then this method just returns (PID, None). @param config_id Configuration ID from client. @param transform Either a single name can be given - representing a single transform - or a list of transforms - representing a MapPyGroup. Sub-lists are treated as nested MapPyGroups. If None then the current transform is deathed and rebirthed. @param configuration Valid JSON configuration document.
# retrieve option value workerTypes = sys.argv[opt_pos + 1].split(',') # create custom config config = BaseConfig(workerTypes) # remove from worker opts # so we don't crash when it's # started in main since # comma separated list isn't acceptable here del sys.argv[opt_pos:opt_pos+2] if config is None: config = BaseConfig(cfg.WORKER_CONFIGS) celery.config_from_object(config) def setup_logging(**kw): setup_query_logger() def setup_query_logger(): logger = logging.getLogger('app.rest_client_tasks') handler = logging.FileHandler(cfg.LOGDIR+'/celery-query.log') formatter = logging.Formatter(logging.BASIC_FORMAT) # you may want to customize this. handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False worker_process_init.connect(setup_logging)
Get mako templates to work on celery worker server's worker thread. The initialization of Mako templating is usually done when Django is initializing middleware packages as part of processing a server request. When this is run on a celery worker server, no such initialization is called. To make sure that we don't load this twice (just in case), we look for the result: the defining of the lookup paths for templates. """ if 'main' not in middleware.lookup: TASK_LOG.info("Initializing Mako middleware explicitly") middleware.MakoMiddleware() # Actually make the call to define the hook: worker_process_init.connect(initialize_mako) class UpdateProblemModuleStateError(Exception): """ Error signaling a fatal condition while updating problem modules. Used when the current module cannot be processed and no more modules should be attempted. """ pass def _get_current_task(): """Stub to make it easier to test without actually running Celery""" return current_task
celery.config_from_object(config) def setup_logging(**kw): setup_stat_logger() setup_query_logger() def setup_stat_logger(): logger = logging.getLogger('app.stats') handler = logging.FileHandler(cfg.LOGDIR + '/celery-stats.log') formatter = logging.Formatter( logging.BASIC_FORMAT) # you may want to customize this. handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False def setup_query_logger(): logger = logging.getLogger('app.rest_client_tasks') handler = logging.FileHandler(cfg.LOGDIR + '/celery-query.log') formatter = logging.Formatter( logging.BASIC_FORMAT) # you may want to customize this. handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False worker_process_init.connect(setup_logging)
else: log.info("Connected...") break else: log.critical("Unable to connect to %s at %s: %r", config.MONGO_DB, config.MONGO_URI, exc) raise exc try: import uwsgi # noqa except ImportError: if os.getenv('CELERY_CONTEXT'): log.info('Celery context') from celery.signals import worker_process_init worker_process_init.connect(mongo_connect) else: log.debug('Not in uwsgi/celery context') mongo_connect() else: log.info('Uwsgi context') from uwsgidecorators import postfork mongo_connect = postfork(mongo_connect) def main(global_config, **settings): """This function returns a Pyramid WSGI application.""" import mist.api.auth.middleware settings = {}
from celery.signals import import_modules, worker_process_init, beat_init from flask import session from snms.core import signals from snms.database import tsdb from .core import SnmsCelery __all__ = ('celery', ) def cassandra_init(**kwargs): """ Initialize a clean Cassandra connection. """ tsdb.restart() worker_process_init.connect(cassandra_init) beat_init.connect(cassandra_init) #: The Celery instance for all SNMS tasks celery = SnmsCelery('snms') @signals.app_created.connect def _load_default_modules(app, **kwargs): celery.loader.import_default_modules() # load all tasks @import_modules.connect def _import_modules(*args, **kwargs): import snms.tasks signals.import_tasks.send()
try: cassandra_host = connections['default'].settings_dict['HOST'].split(',') keyspace = connections['default'].settings_dict['NAME'] user = connections['default'].settings_dict['USER'] password = connections['default'].settings_dict['PASSWORD'] auth_provider = PlainTextAuthProvider(username=user, password=password) if cql_cluster is not None: cql_cluster.shutdown() if cql_session is not None: cql_session.shutdown() connection.setup(cassandra_host, keyspace, auth_provider=auth_provider) except NoHostAvailable: pass # Initialize worker context for both standard and periodic tasks, just in the # case the 'default' connection is defined # set the default Django settings module for the 'celery' program. worker_process_init.connect(cassandra_init) beat_init.connect(cassandra_init) app = Celery('scraper_module') # Using a string here means the worker will not have to # pickle the object when using Windows. app.config_from_object('django.conf:settings') # set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
from celery.signals import worker_process_init import logging import testcfg as cfg from app.init import worker_init # initialize worker worker_init() celery = Celery(include=[ 'app.sdk_client_tasks', 'app.rest_client_tasks', 'app.workload_manager', 'app.stats', 'app.admin_manager' ]) celery.config_from_object(config) # setup celery process logger log = Logging(celery) log.setup(logfile=cfg.LOGDIR + '/celery-proc.log') def stats_tasks_setup_logging(**kw): logger = logging.getLogger('app.stats') handler = logging.FileHandler(cfg.LOGDIR + '/celery-stats.log') formatter = logging.Formatter( logging.BASIC_FORMAT) # you may want to customize this. handler.setFormatter(formatter) logger.addHandler(handler) logger.propagate = False worker_process_init.connect(stats_tasks_setup_logging)