def set_dirty(dag_id, session=None): """ :param dag_id: the dag_id to mark dirty :param session: database session :return: """ # 给dag的每一种状态都创建一条记录 # 将统计表中不存在的状态插入到db中 DagStat.create(dag_id=dag_id, session=session) try: # 给指定的dag所有的状态行加行锁 stats = session.query(DagStat).filter( DagStat.dag_id == dag_id ).with_for_update().all() # 修改设置dirty标记 for stat in stats: stat.dirty = True session.commit() except Exception as e: session.rollback() log = LoggingMixin().log log.warning("Could not update dag stats for %s", dag_id) log.exception(e)
def load_login(): log = LoggingMixin().log auth_backend = 'airflow.default_login' try: # 获得默认web认证 if conf.getboolean('webserver', 'AUTHENTICATE'): auth_backend = conf.get('webserver', 'auth_backend') except (AirflowConfigException, XToolConfigException): if conf.getboolean('webserver', 'AUTHENTICATE'): log.warning( "auth_backend not found in webserver config reverting to " "*deprecated* behavior of importing airflow_login") auth_backend = "airflow_login" # 导入认证模块 try: global login login = import_module(auth_backend) except ImportError as err: log.critical( "Cannot import authentication module %s. " "Please correct your authentication backend or disable authentication: %s", auth_backend, err ) if conf.getboolean('webserver', 'AUTHENTICATE'): raise AirflowException("Failed to import authentication backend")
def update(dag_ids=None, dirty_only=True, session=None): """更新dag每个状态的dag_run的数量,并设置dirty为False Updates the stats for dirty/out-of-sync dags :param dag_ids: dag_ids to be updated :type dag_ids: list :param dirty_only: only updated for marked dirty, defaults to True :type dirty_only: bool :param session: db session to use :type session: Session """ try: qry = session.query(DagStat) if dag_ids: qry = qry.filter(DagStat.dag_id.in_(set(dag_ids))) # 仅仅获得脏数据 if dirty_only: qry = qry.filter(DagStat.dirty == True) # noqa # 添加行级锁 qry = qry.with_for_update().all() # 获得所有的dagId列表 dag_ids = set([dag_stat.dag_id for dag_stat in qry]) # avoid querying with an empty IN clause if not dag_ids: session.commit() return # 获得dag每个dagrun状态的记录数量 begin_time = datetime.now() - timedelta(days=configuration.getint('core', 'sql_query_history_days')) dagstat_states = set(itertools.product(dag_ids, State.dag_states)) qry = ( session.query(DagRun.dag_id, DagRun.state, func.count('*')) .filter(DagRun.dag_id.in_(dag_ids)) .filter(DagRun.execution_date > begin_time) .group_by(DagRun.dag_id, DagRun.state) ) counts = {(dag_id, state): count for dag_id, state, count in qry} # 修改每个dag_id的每个状态的dagrund的数量 for dag_id, state in dagstat_states: count = counts.get((dag_id, state), 0) session.merge( DagStat(dag_id=dag_id, state=state, count=count, dirty=False) ) session.commit() except Exception as e: session.rollback() log = LoggingMixin().log log.warning("Could not update dag stat table") log.exception(e)
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if response.status_code >= 200 and response.status_code < 300: log.info('Email with subject %s is successfully sent to recipients: %s' % (mail_data['subject'], mail_data['personalizations'])) else: log.warning('Failed to send out email with subject %s, status code: %s' % (mail_data['subject'], response.status_code))
def create(dag_id, session=None): """将统计表中不存在的状态插入到db中 Creates the missing states the stats table for the dag specified :param dag_id: dag id of the dag to create stats for :param session: database session :return: """ # 获得DagStat中存在的状态 qry = session.query(DagStat).filter(DagStat.dag_id == dag_id).all() states = {dag_stat.state for dag_stat in qry} # 遍历所有状态, 找出不再数据库中的状态 states_not_found = set(State.dag_states) - states for state in states_not_found: try: session.merge(DagStat(dag_id=dag_id, state=state)) session.commit() except Exception as e: session.rollback() log = LoggingMixin().log log.warning("Could not create stat record") log.exception(e)
def _to_timestamp(cls, col): """ Convert a column of a dataframe to UNIX timestamps if applicable :param col: A Series object representing a column of a dataframe. """ # try and convert the column to datetimes # the column MUST have a four digit year somewhere in the string # there should be a better way to do this, # but just letting pandas try and convert every column without a format # caused it to convert floats as well # For example, a column of integers # between 0 and 10 are turned into timestamps # if the column cannot be converted, # just return the original column untouched try: col = pd.to_datetime(col) except ValueError: log = LoggingMixin().log log.warning("Could not convert field to timestamps: %s", col.name) return col # now convert the newly created datetimes into timestamps # we have to be careful here # because NaT cannot be converted to a timestamp # so we have to return NaN converted = [] for i in col: try: converted.append(i.timestamp()) except ValueError: converted.append(pd.np.NaN) except AttributeError: converted.append(pd.np.NaN) # return a new series that maintains the same index as the original return pd.Series(converted, index=col.index)
broker_url, 'broker_transport_options': broker_transport_options, # worker执行结果输出的存储介质 'result_backend': configuration.conf.get('celery', 'RESULT_BACKEND'), # worker并发执行的数量 'worker_concurrency': configuration.conf.getint('celery', 'WORKER_CONCURRENCY'), } celery_ssl_active = False try: celery_ssl_active = configuration.conf.getboolean('celery', 'SSL_ACTIVE') except (AirflowConfigException, XToolConfigException) as e: log.warning("Celery Executor will run without SSL") try: if celery_ssl_active: broker_use_ssl = { 'keyfile': configuration.conf.get('celery', 'SSL_KEY'), 'certfile': configuration.conf.get('celery', 'SSL_CERT'), 'ca_certs': configuration.conf.get('celery', 'SSL_CACERT'), 'cert_reqs': ssl.CERT_REQUIRED } DEFAULT_CELERY_CONFIG['broker_use_ssl'] = broker_use_ssl except (AirflowConfigException, XToolConfigException) as e: raise AirflowException('AirflowConfigException: SSL_ACTIVE is True, ' 'please ensure SSL_KEY, ' 'SSL_CERT and SSL_CACERT are set') except Exception as e: