def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) logging.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = dagbag.executor executor.start() i = 0 while (not self.test_mode) or i < 1: try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: logging.exception(e) i += 1 try: if i % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except: logging.error("Failed at reloading the dagbag") if statsd: statsd.incr('dag_refresh_error', 1, 1) sleep(5) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() for dag in dags: logging.debug("Scheduling {}".format(dag.dag_id)) dag = dagbag.get_dag(dag.dag_id) if not dag or (dag.dag_id in paused_dag_ids): continue try: self.process_dag(dag, executor) except Exception as e: logging.exception(e) logging.debug( "Done queuing tasks, calling the executor's heartbeat") try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: logging.exception(e) logging.error("Tachycardia!") executor.end()
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() # Sleep time (seconds) between master runs logging.basicConfig(level=logging.DEBUG) logging.info("Starting a master scheduler") # This should get new code dagbag = models.DagBag(self.subdir) executor = dagbag.executor executor.start() i = 0 while (not self.test_mode) or i < 1: i += 1 if i % self.refresh_dags_every == 0: dagbag.collect_dags(only_if_updated=False) else: dagbag.collect_dags(only_if_updated=True) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag ] paused_dag_ids = dagbag.paused_dags() for dag in dags: if dag.dag_id in paused_dag_ids: continue try: self.process_dag(dag, executor) except Exception as e: logging.exeption(e) self.heartbeat() executor.end()
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() # Sleep time (seconds) between master runs logging.basicConfig(level=logging.DEBUG) logging.info("Starting a master scheduler") # This should get new code dagbag = models.DagBag(self.subdir) executor = dagbag.executor executor.start() i = 0 while (not self.test_mode) or i < 1: i += 1 if i % self.refresh_dags_every == 0: dagbag.collect_dags(only_if_updated=False) else: dagbag.collect_dags(only_if_updated=True) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() for dag in dags: if dag.dag_id in paused_dag_ids: continue try: self.process_dag(dag, executor) except Exception as e: logging.exeption(e) self.heartbeat() executor.end()
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) subdir = None if args.subdir: subdir = args.subdir.replace( "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER")) subdir = os.path.expanduser(subdir) logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found'.format(args.dag_id) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end()
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) # store old log (to help with S3 appends) if os.path.exists(filename): with open(filename, 'r') as logfile: old_log = logfile.read() else: old_log = None subdir = process_subdir(args.subdir) logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found'.format(args.dag_id) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end() if conf.get('core', 'S3_LOG_FOLDER').startswith('s3:'): import boto s3_log = filename.replace(log, conf.get('core', 'S3_LOG_FOLDER')) bucket, key = s3_log.lstrip('s3:/').split('/', 1) if os.path.exists(filename): # get logs with open(filename, 'r') as logfile: new_log = logfile.read() # remove old logs (since they are already in S3) if old_log: new_log.replace(old_log, '') try: s3 = boto.connect_s3() s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key) # append new logs to old S3 logs, if available if s3_key.exists(): old_s3_log = s3_key.get_contents_as_string().decode() new_log = old_s3_log + '\n' + new_log # send log to S3 s3_key.set_contents_from_string(new_log) except: print('Could not send logs to S3.')
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): self.logger.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = dagbag.executor executor.start() i = 0 while not self.num_runs or self.num_runs > i: try: loop_start_dttm = datetime.now() try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) i += 1 try: if i % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except: self.logger.error("Failed at reloading the dagbag") Stats.incr('dag_refresh_error', 1, 1) sleep(5) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag] paused_dag_ids = dagbag.paused_dags() for dag in dags: self.logger.debug("Scheduling {}".format(dag.dag_id)) dag = dagbag.get_dag(dag.dag_id) if not dag or (dag.dag_id in paused_dag_ids): continue try: self.schedule_dag(dag) self.process_dag(dag, executor) self.manage_slas(dag) except Exception as e: self.logger.exception(e) self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) finally: settings.Session.remove() executor.end()
def run(args, dag=None): utils.pessimistic_connection_handling() if dag: args.dag_id = dag.dag_id # Setting up logging log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) logging.root.handlers = [] logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle and not dag: dag = get_dag(args) elif not dag: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=args.task_start_date, ignore_dependencies=args.ignore_dependencies, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force, pool=args.pool) executor.heartbeat() executor.end() # store logs remotely remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') # deprecated as of March 2016 if not remote_base and conf.get('core', 'S3_LOG_FOLDER'): warnings.warn( 'The S3_LOG_FOLDER conf key has been replaced by ' 'REMOTE_BASE_LOG_FOLDER. Your conf still works but please ' 'update airflow.cfg to ensure future compatibility.', DeprecationWarning) remote_base = conf.get('core', 'S3_LOG_FOLDER') if os.path.exists(filename): # read log and remove old logs to get just the latest additions with open(filename, 'r') as logfile: log = logfile.read() remote_log_location = filename.replace(log_base, remote_base) # S3 if remote_base.startswith('s3:/'): utils.S3Log().write(log, remote_log_location) # GCS elif remote_base.startswith('gs:/'): utils.GCSLog().write( log, remote_log_location, append=True) # Other elif remote_base: logging.error( 'Unsupported remote log location: {}'.format(remote_base))
def run(self): utils.pessimistic_connection_handling() # Setting up logging self.start_date = datetime.now() # pickle = DagPickle(self.dag) self.save() identifier = self.id # logging.basicConfig( # filename=filename, # level=settings.LOGGING_LEVEL, # format=settings.LOG_FORMAT) # print("Logging into: " + filename) # if we want to log to files. find way to do both! import logging log_folder = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log_folder + "/job_{self.id}".format(**locals()) if not os.path.exists(directory): os.makedirs(directory) log_filename = "{directory}/job_{self.id}.run_log.txt".format(**locals()) task_logger = logging.getLogger('job_{}_logger'.format(self.id)) task_logger.setLevel(logging.DEBUG) logformat = logging.Formatter("%(asctime)s %(name)s %(levelname)s %(message)s") channel = logging.FileHandler(log_filename, mode='a', encoding=None, delay=False) channel.setFormatter(logformat) task_logger.addHandler(channel) # old_logging = logging logging = task_logger logging.info("logging rerouted to {log_filename}") # session = settings.Session() # logging.info('Loading pickle id {args.pickle}'.format(**locals())) # dag_pickle = session.query( # DagPickle).filter(DagPickle.id == args.pickle).first() # if not dag_pickle: # raise AirflowException("Who hid the pickle!? [missing pickle]") # dag = dag_pickle.pickle # figure out how to traverse a dependency map session = settings.Session() # executor = DEFAULT_EXECUTOR from airflow.executors import LocalBashExecutor executor = LocalBashExecutor(logging=logging) executor.start() self.task_instances = [] for task in self.dag.tasks: #make TI and kick off run ti = models.TaskInstance(task, datetime.now()) ti.dag_id = self.dag.dag_id ti.job_id = self.id ti.state == State.QUEUED ti.save() self.task_instances.append(ti) logging.info("{} saved task instance {}".format(self.__class__.__name__, ti.id)) for ti in sorted(self.task_instances, key=lambda x: x.priority_weight, reverse=True): logging.info("{} queuing task {} with priority {} on executor {}".format(self.__class__.__name__, ti.id, ti.priority_weight, executor.__class__.__name__)) # print("Sending to executor.") executor.queue_task_instance(ti) executor.heartbeat() executor.heartbeat() executor.end()
from airflow import utils from airflow.www import utils as wwwutils from airflow.www.login import login_manager import flask_login from flask_login import login_required QUERY_LIMIT = 100000 CHART_LIMIT = 200000 AUTHENTICATE = conf.getboolean('core', 'AUTHENTICATE') if AUTHENTICATE is False: login_required = lambda x: x dagbag = models.DagBag(conf.get('core', 'DAGS_FOLDER')) utils.pessimistic_connection_handling() app = Flask(__name__) app.config['SQLALCHEMY_POOL_RECYCLE'] = 3600 login_manager.init_app(app) app.secret_key = 'airflowified' cache = Cache(app=app, config={ 'CACHE_TYPE': 'filesystem', 'CACHE_DIR': '/tmp' }) # Init for chartkick, the python wrapper for highcharts ck = Blueprint('ck_page',
def run(args): utils.pessimistic_connection_handling() # Setting up logging log_base = os.path.expanduser(configuration.get('core', 'BASE_LOG_FOLDER')) directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) subdir = process_subdir(args.subdir) logging.root.handlers = [] logging.basicConfig(filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found in {1}'.format( args.dag_id, subdir) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query(DagPickle).filter( DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(('Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force, pool=args.pool) executor.heartbeat() executor.end() # store logs remotely remote_base = configuration.get('core', 'REMOTE_BASE_LOG_FOLDER') # deprecated as of March 2016 if not remote_base and configuration.get('core', 'S3_LOG_FOLDER'): warnings.warn( 'The S3_LOG_FOLDER configuration key has been replaced by ' 'REMOTE_BASE_LOG_FOLDER. Your configuration still works but please ' 'update airflow.cfg to ensure future compatibility.', DeprecationWarning) remote_base = configuration.get('core', 'S3_LOG_FOLDER') if os.path.exists(filename): # read log and remove old logs to get just the latest additions with open(filename, 'r') as logfile: log = logfile.read() remote_log_location = filename.replace(log_base, remote_base) # S3 if remote_base.startswith('s3:/'): utils.S3Log().write(log, remote_log_location) # GCS elif remote_base.startswith('gs:/'): utils.GCSLog().write(log, remote_log_location, append=True) # Other elif remote_base: logging.error( 'Unsupported remote log location: {}'.format(remote_base))
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() # Sleep time (seconds) between master runs logging.basicConfig(level=logging.DEBUG) logging.info("Starting a master scheduler") session = settings.Session() TI = models.TaskInstance # This should get new code dagbag = models.DagBag(self.subdir) executor = dagbag.executor executor.start() while True: self.heartbeat() dagbag.collect_dags(only_if_updated=True) dags = [dagbag.dags[dag_id]] if dag_id else dagbag.dags.values() for dag in dags: logging.info( "Getting latest instance " "for all task in dag " + dag.dag_id) sq = session.query( TI.task_id, func.max(TI.execution_date).label('max_ti') ).filter(TI.dag_id == dag.dag_id).group_by(TI.task_id).subquery( 'sq') qry = session.query(TI).filter( TI.dag_id == dag.dag_id, TI.task_id == sq.c.task_id, TI.execution_date == sq.c.max_ti, ) latest_ti = qry.all() ti_dict = {ti.task_id: ti for ti in latest_ti} session.expunge_all() session.commit() for task in dag.tasks: if task.task_id not in ti_dict: # Brand new task, let's get started ti = TI(task, task.start_date) ti.refresh_from_db() if ti.is_runnable(): logging.debug( 'First run for {ti}'.format(**locals())) executor.queue_command(ti.key, ti.command()) else: ti = ti_dict[task.task_id] ti.task = task # Hacky but worky if ti.state == State.RUNNING: continue # Only one task at a time elif ti.state == State.UP_FOR_RETRY: # If task instance if up for retry, make sure # the retry delay is met if ti.is_runnable(): logging.debug('Queuing retry: ' + str(ti)) executor.queue_command(ti.key, ti.command()) else: # Trying to run the next schedule ti = TI( task=task, execution_date=ti.execution_date + task.schedule_interval ) ti.refresh_from_db() if ti.is_runnable(): logging.debug('Queuing next run: ' + str(ti)) executor.queue_command(ti.key, ti.command()) executor.heartbeat() session.close() executor.end()
from airflow import utils from airflow.www import utils as wwwutils from airflow.www.login import login_manager import flask_login from flask_login import login_required QUERY_LIMIT = 100000 CHART_LIMIT = 200000 AUTHENTICATE = conf.getboolean('core', 'AUTHENTICATE') if AUTHENTICATE is False: login_required = lambda x: x dagbag = models.DagBag(conf.get('core', 'DAGS_FOLDER')) utils.pessimistic_connection_handling() app = Flask(__name__) app.config['SQLALCHEMY_POOL_RECYCLE'] = 3600 login_manager.init_app(app) app.secret_key = 'airflowified' cache = Cache( app=app, config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': '/tmp'}) # Init for chartkick, the python wrapper for highcharts ck = Blueprint( 'ck_page', __name__, static_folder=chartkick.js(), static_url_path='/static') app.register_blueprint(ck, url_prefix='/ck')
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): self.logger.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() logging.basicConfig(level=logging.DEBUG) self.logger.info("Starting the scheduler") dagbag = models.DagBag(self.subdir, sync_to_db=True) executor = dagbag.executor executor.start() i = 0 while not self.num_runs or self.num_runs > i: try: loop_start_dttm = datetime.now() try: self.prioritize_queued(executor=executor, dagbag=dagbag) except Exception as e: self.logger.exception(e) i += 1 try: if i % self.refresh_dags_every == 0: dagbag = models.DagBag(self.subdir, sync_to_db=True) else: dagbag.collect_dags(only_if_updated=True) except: self.logger.error("Failed at reloading the dagbag") if statsd: statsd.incr('dag_refresh_error', 1, 1) sleep(5) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag ] paused_dag_ids = dagbag.paused_dags() for dag in dags: self.logger.debug("Scheduling {}".format(dag.dag_id)) dag = dagbag.get_dag(dag.dag_id) if not dag or (dag.dag_id in paused_dag_ids): continue try: self.schedule_dag(dag) self.process_dag(dag, executor) self.manage_slas(dag) except Exception as e: self.logger.exception(e) self.logger.info("Done queuing tasks, calling the executor's " "heartbeat") duration_sec = (datetime.now() - loop_start_dttm).total_seconds() self.logger.info("Loop took: {} seconds".format(duration_sec)) try: self.import_errors(dagbag) except Exception as e: self.logger.exception(e) try: dagbag.kill_zombies() except Exception as e: self.logger.exception(e) try: # We really just want the scheduler to never ever stop. executor.heartbeat() self.heartbeat() except Exception as e: self.logger.exception(e) self.logger.error("Tachycardia!") except Exception as deep_e: self.logger.exception(deep_e) executor.end()
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) # store old log (to help with S3 appends) if os.path.exists(filename): with open(filename, 'r') as logfile: old_log = logfile.read() else: old_log = None subdir = None if args.subdir: subdir = args.subdir.replace( "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER")) subdir = os.path.expanduser(subdir) logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found'.format(args.dag_id) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end() if conf.get('core', 'S3_LOG_FOLDER').startswith('s3:'): import boto s3_log = filename.replace(log, conf.get('core', 'S3_LOG_FOLDER')) bucket, key = s3_log.lstrip('s3:/').split('/', 1) if os.path.exists(filename): # get logs with open(filename, 'r') as logfile: new_log = logfile.read() # remove old logs (since they are already in S3) if old_log: new_log.replace(old_log, '') try: s3 = boto.connect_s3() s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key) # append new logs to old S3 logs, if available if s3_key.exists(): old_s3_log = s3_key.get_contents_as_string().decode() new_log = old_s3_log + '\n' + new_log # send log to S3 s3_key.set_contents_from_string(new_log) except: print('Could not send logs to S3.')
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() # Sleep time (seconds) between master runs logging.basicConfig(level=logging.DEBUG) logging.info("Starting a master scheduler") session = settings.Session() TI = models.TaskInstance # This should get new code dagbag = models.DagBag(self.subdir) executor = dagbag.executor executor.start() while True: self.heartbeat() dagbag.collect_dags(only_if_updated=True) dags = [dagbag.dags[dag_id]] if dag_id else dagbag.dags.values() for dag in dags: logging.info("Getting latest instance " "for all task in dag " + dag.dag_id) sq = session.query( TI.task_id, func.max(TI.execution_date).label('max_ti')).filter( TI.dag_id == dag.dag_id).group_by( TI.task_id).subquery('sq') qry = session.query(TI).filter( TI.dag_id == dag.dag_id, TI.task_id == sq.c.task_id, TI.execution_date == sq.c.max_ti, ) latest_ti = qry.all() ti_dict = {ti.task_id: ti for ti in latest_ti} session.expunge_all() session.commit() for task in dag.tasks: if task.task_id not in ti_dict: # Brand new task, let's get started ti = TI(task, task.start_date) ti.refresh_from_db() if ti.is_runnable(): logging.debug( 'First run for {ti}'.format(**locals())) executor.queue_command(ti.key, ti.command()) else: ti = ti_dict[task.task_id] ti.task = task # Hacky but worky if ti.state == State.RUNNING: continue # Only one task at a time elif ti.state == State.UP_FOR_RETRY: # If task instance if up for retry, make sure # the retry delay is met if ti.is_runnable(): logging.debug('Queuing retry: ' + str(ti)) executor.queue_command(ti.key, ti.command()) else: # Trying to run the next schedule ti = TI(task=task, execution_date=ti.execution_date + task.schedule_interval) ti.refresh_from_db() if ti.is_runnable(): logging.debug('Queuing next run: ' + str(ti)) executor.queue_command(ti.key, ti.command()) executor.heartbeat() session.close() executor.end()