def test_masking_from_db(self): """Test secrets are masked when loaded directly from the DB""" from airflow.settings import Session session = Session() try: conn = Connection( conn_id=f"test-{os.getpid()}", conn_type="http", password="******", extra='{"apikey":"masked too"}', ) session.add(conn) session.flush() # Make sure we re-load it, not just get the cached object back session.expunge(conn) self.mask_secret.reset_mock() from_db = session.query(Connection).get(conn.id) from_db.extra_dejson assert self.mask_secret.mock_calls == [ # We should have called it _again_ when loading from the DB mock.call("s3cr3t"), mock.call({"apikey": "masked too"}), ] finally: session.rollback()
def create_airflow_url(dag_id, start_date, end_date): """ Creates the airflow url to redirect to. Gets the host_server based on if it's a fabio url or host:port. Then queries the database for the execution date, which will be in the range of the start and end date. Can have multiple values so will only return the earliest result. If no results are found, it'll use the start date, which will just take you to the most recent dagrun for that dag in the UI. :param dag_id: Dag id name. String. :param start_date: Start date. String of form %Y-%m-%d %H:%M:%S. :param end_date: End date. String of form %Y-%m-%d %H:%M:%S. :return: Airflow URL to redirect to. String. """ start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S') end_date = datetime.strptime(end_date, '%Y-%m-%d %H:%M:%S') host_server = conf.get('webserver', 'base_url') session = Session() try: dagrun_query_result = session.query(DagRun) \ .filter(DagRun.dag_id == dag_id) \ .filter(DagRun.execution_date >= start_date) \ .filter(DagRun.execution_date < end_date) \ .order_by(DagRun.execution_date.asc()) \ .first() execution_date = dagrun_query_result.execution_date.isoformat() except: session.rollback() execution_date = start_date.isoformat() finally: session.close() url = '{0}/admin/airflow/graph?dag_id={1}&execution_date={2}'.format( host_server, dag_id, execution_date) return url
def unpause_dag(dag): """ Wrapper around airflow.bin.cli.unpause. The issue is when we deploy the airflow dags they don't exist in the DagModel yet, so need to check if it exists first and then run the unpause. :param dag: DAG object """ session = Session() try: dm = session.query(DagModel).filter(DagModel.dag_id == dag.dag_id).first() if dm: unpause(dag.default_args, dag) except: session.rollback() finally: session.close()
def clear_dag(dag): """ Delete all TaskInstances and DagRuns of the specified dag_id. :param dag: DAG object """ session = Session() try: session.query(TaskInstance).filter(TaskInstance.dag_id == dag.dag_id).delete() session.query(DagRun).filter(DagRun.dag_id == dag.dag_id).delete() session.query(DagStat).filter(DagStat.dag_id == dag.dag_id).delete() session.commit() log_dir = conf.get('core', 'base_log_folder') full_dir = os.path.join(log_dir, dag.dag_id) shutil.rmtree(full_dir, ignore_errors=True) except: session.rollback() finally: session.close()
def clear_dag_runs(dag_id, start_date, end_date): """ Clears all the DagRuns and corrects the DagStats for an interval passed in the clear command because the clear command only clears the TaskInstances. :param dag_id: Dag id name. String. :param start_date: Start date. String of form %Y-%m-%d %H:%M:%S. :param end_date: End date. String of form %Y-%m-%d %H:%M:%S. :return: None. """ start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S') end_date = datetime.strptime(end_date, '%Y-%m-%d %H:%M:%S') session = Session() try: dagrun_query = session.query(DagRun) \ .filter(DagRun.dag_id == dag_id) \ .filter(DagRun.execution_date >= start_date) \ .filter(DagRun.execution_date < end_date) dagrun_query_result = dagrun_query.all() # remove dagruns with this state for clear command for result in dagrun_query_result: session.delete(result) # fix DagStats for state in State.dag_states: removed_state_counts = dagrun_query.filter( DagRun.state == state).count() dagstat_query = session.query(DagStat) \ .filter(DagStat.dag_id == dag_id) \ .filter(DagStat.state == state) dagstat_query_result = dagstat_query.first( ) # only one row every time dagstat_query_result.count = max( dagstat_query_result.count - removed_state_counts, 0) session.commit() except: session.rollback() finally: session.close()
def notify(self, context=None, success=False): ts = context['ts'] dag = context['dag'] did = dag.dag_id if success: context['dagrun_status'] = 'SUCCESS' context['dagrun_class'] = 'success' else: context['dagrun_status'] = 'FAILED' context['dagrun_class'] = 'failed' context['elapsed_time'] = 'unknown' task_id = 'unknown' session = Session() try: task_id = context['task'].task_id logging.info('Context task_id {}'.format(task_id)) start_time = session.query(TaskInstance)\ .filter(TaskInstance.dag_id == did)\ .filter(TaskInstance.execution_date == ts)\ .filter(TaskInstance.start_date != None)\ .order_by(TaskInstance.start_date.asc())\ .first().start_date context['start_time'] = start_time end_time = datetime.now() context['end_time'] = end_time context['elapsed_time'] = self.td_format( end_time - start_time) if (start_time and end_time) else 'N/A' task_instances = session.query(TaskInstance)\ .filter(TaskInstance.dag_id == did)\ .filter(TaskInstance.execution_date == ts)\ .filter(TaskInstance.state != State.REMOVED)\ .order_by(TaskInstance.end_date.asc())\ .all() tis = [] for ti in task_instances: if ti.task_id == task_id: logging.info( 'Adjusting details for task_id: {}'.format(task_id)) # fix status/end/duration for the task which is causing a notification ti.end_date = end_time ti.state = 'success' if success else 'failed' if not ti.duration: # If the reporting task has no duration, make one based on the report time ti.duration = self.td_format(ti.end_date - ti.start_date) if not ti.duration: # If other tasks are still running, make duration N/A ti.duration = 'N/A' else: if not isinstance(ti.duration, str): ti.duration = self.td_format( timedelta(seconds=ti.duration)) tis.append(ti) context['task_instances'] = tis operators = sorted(list(set([op.__class__ for op in dag.tasks])), key=lambda x: x.__name__) context['operators'] = operators send_slack = self.args[ 'send_slack_message'] if 'send_slack_message' in self.args else True if send_slack: slack_message = self.message_slack_success if success else self.message_slack_fail self.slack_api_params['text'] = context[ 'task'].render_template(None, slack_message, context) self.sc.api_call('chat.postMessage', **self.slack_api_params) # don't spam email if multiple completions. spamming Slack is OK ;-) state_key = context['dag'].dag_id + '.state' dag_state = Variable.get(state_key, deserialize_json=True, default_var={}) if not dag_state.has_key('history'): dag_state['history'] = {} history = dag_state['history'] if not history.has_key(ts): history[ts] = {} date = history[ts] sent_email_key = 'sent_success_email' if success else 'sent_failure_email' if not date.has_key(sent_email_key): date[sent_email_key] = False send_multiple_failures = self.get_value_from_args( 'send_multiple_failures', False) send_success_email = self.get_value_from_args( 'send_success_emails', True) if (not success ) and date[sent_email_key] and not send_multiple_failures: logging.info( 'Skipping failure email notification because one was already sent for {0} regarding date {1}' .format(did, ts)) # nothing to do here else: subject = self.subject_success if success else self.subject_fail title = context['task'].render_template(None, subject, context) body = context['task'].render_template( None, self.message_completion(), context) email_list = context['task'].email # conditions to send an email are if task failure or # if task succeeds and user wants to receive success emails if not success or (send_success_email and success): if success: email_list = self.get_value_from_args( 'success_email', email_list) send_email(email_list, title, body) date[sent_email_key] = True Variable.set(state_key, dag_state, serialize_json=True) except Exception as e: logging.warn( 'Problem reading task state when notifying result of task: {0}' '\nException reason: {1}'.format(task_id, e)) finally: session.rollback() session.close()