Esempio n. 1
0
    def test_masking_from_db(self):
        """Test secrets are masked when loaded directly from the DB"""
        from airflow.settings import Session

        session = Session()

        try:
            conn = Connection(
                conn_id=f"test-{os.getpid()}",
                conn_type="http",
                password="******",
                extra='{"apikey":"masked too"}',
            )
            session.add(conn)
            session.flush()

            # Make sure we re-load it, not just get the cached object back
            session.expunge(conn)

            self.mask_secret.reset_mock()

            from_db = session.query(Connection).get(conn.id)
            from_db.extra_dejson

            assert self.mask_secret.mock_calls == [
                # We should have called it _again_ when loading from the DB
                mock.call("s3cr3t"),
                mock.call({"apikey": "masked too"}),
            ]
        finally:
            session.rollback()
Esempio n. 2
0
    def create_airflow_url(dag_id, start_date, end_date):
        """
        Creates the airflow url to redirect to. Gets the host_server based on if it's a fabio url or host:port. Then
        queries the database for the execution date, which will be in the range of the start and end date. Can have
        multiple values so will only return the earliest result. If no results are found, it'll use the start date,
        which will just take you to the most recent dagrun for that dag in the UI.
        :param dag_id: Dag id name. String.
        :param start_date: Start date. String of form %Y-%m-%d %H:%M:%S.
        :param end_date: End date. String of form %Y-%m-%d %H:%M:%S.
        :return: Airflow URL to redirect to. String.
        """
        start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S')
        end_date = datetime.strptime(end_date, '%Y-%m-%d %H:%M:%S')
        host_server = conf.get('webserver', 'base_url')

        session = Session()
        try:
            dagrun_query_result = session.query(DagRun) \
                .filter(DagRun.dag_id == dag_id) \
                .filter(DagRun.execution_date >= start_date) \
                .filter(DagRun.execution_date < end_date) \
                .order_by(DagRun.execution_date.asc()) \
                .first()
            execution_date = dagrun_query_result.execution_date.isoformat()
        except:
            session.rollback()
            execution_date = start_date.isoformat()
        finally:
            session.close()

        url = '{0}/admin/airflow/graph?dag_id={1}&execution_date={2}'.format(
            host_server, dag_id, execution_date)
        return url
Esempio n. 3
0
def unpause_dag(dag):
    """
    Wrapper around airflow.bin.cli.unpause. The issue is when we deploy the airflow dags they don't exist
    in the DagModel yet, so need to check if it exists first and then run the unpause.
    :param dag: DAG object
    """
    session = Session()
    try:
        dm = session.query(DagModel).filter(DagModel.dag_id == dag.dag_id).first()
        if dm:
            unpause(dag.default_args, dag)
    except:
        session.rollback()
    finally:
        session.close()
Esempio n. 4
0
def clear_dag(dag):
    """
    Delete all TaskInstances and DagRuns of the specified dag_id.
    :param dag: DAG object
    """
    session = Session()
    try:
        session.query(TaskInstance).filter(TaskInstance.dag_id == dag.dag_id).delete()
        session.query(DagRun).filter(DagRun.dag_id == dag.dag_id).delete()
        session.query(DagStat).filter(DagStat.dag_id == dag.dag_id).delete()
        session.commit()
        log_dir = conf.get('core', 'base_log_folder')
        full_dir = os.path.join(log_dir, dag.dag_id)
        shutil.rmtree(full_dir, ignore_errors=True)
    except:
        session.rollback()
    finally:
        session.close()
Esempio n. 5
0
 def clear_dag_runs(dag_id, start_date, end_date):
     """
     Clears all the DagRuns and corrects the DagStats for an interval passed in the clear command because the
     clear command only clears the TaskInstances.
     :param dag_id: Dag id name. String.
     :param start_date: Start date. String of form %Y-%m-%d %H:%M:%S.
     :param end_date: End date. String of form %Y-%m-%d %H:%M:%S.
     :return: None.
     """
     start_date = datetime.strptime(start_date, '%Y-%m-%d %H:%M:%S')
     end_date = datetime.strptime(end_date, '%Y-%m-%d %H:%M:%S')
     session = Session()
     try:
         dagrun_query = session.query(DagRun) \
             .filter(DagRun.dag_id == dag_id) \
             .filter(DagRun.execution_date >= start_date) \
             .filter(DagRun.execution_date < end_date)
         dagrun_query_result = dagrun_query.all()
         # remove dagruns with this state for clear command
         for result in dagrun_query_result:
             session.delete(result)
         # fix DagStats
         for state in State.dag_states:
             removed_state_counts = dagrun_query.filter(
                 DagRun.state == state).count()
             dagstat_query = session.query(DagStat) \
                 .filter(DagStat.dag_id == dag_id) \
                 .filter(DagStat.state == state)
             dagstat_query_result = dagstat_query.first(
             )  # only one row every time
             dagstat_query_result.count = max(
                 dagstat_query_result.count - removed_state_counts, 0)
         session.commit()
     except:
         session.rollback()
     finally:
         session.close()
    def notify(self, context=None, success=False):

        ts = context['ts']
        dag = context['dag']
        did = dag.dag_id

        if success:
            context['dagrun_status'] = 'SUCCESS'
            context['dagrun_class'] = 'success'
        else:
            context['dagrun_status'] = 'FAILED'
            context['dagrun_class'] = 'failed'

        context['elapsed_time'] = 'unknown'
        task_id = 'unknown'

        session = Session()
        try:
            task_id = context['task'].task_id

            logging.info('Context task_id {}'.format(task_id))

            start_time = session.query(TaskInstance)\
                .filter(TaskInstance.dag_id == did)\
                .filter(TaskInstance.execution_date == ts)\
                .filter(TaskInstance.start_date != None)\
                .order_by(TaskInstance.start_date.asc())\
                .first().start_date

            context['start_time'] = start_time
            end_time = datetime.now()
            context['end_time'] = end_time
            context['elapsed_time'] = self.td_format(
                end_time - start_time) if (start_time and end_time) else 'N/A'

            task_instances = session.query(TaskInstance)\
                .filter(TaskInstance.dag_id == did)\
                .filter(TaskInstance.execution_date == ts)\
                .filter(TaskInstance.state != State.REMOVED)\
                .order_by(TaskInstance.end_date.asc())\
                .all()

            tis = []
            for ti in task_instances:
                if ti.task_id == task_id:
                    logging.info(
                        'Adjusting details for task_id: {}'.format(task_id))
                    # fix status/end/duration for the task which is causing a notification
                    ti.end_date = end_time
                    ti.state = 'success' if success else 'failed'
                    if not ti.duration:
                        # If the reporting task has no duration, make one based on the report time
                        ti.duration = self.td_format(ti.end_date -
                                                     ti.start_date)

                if not ti.duration:
                    # If other tasks are still running, make duration N/A
                    ti.duration = 'N/A'
                else:
                    if not isinstance(ti.duration, str):
                        ti.duration = self.td_format(
                            timedelta(seconds=ti.duration))

                tis.append(ti)

            context['task_instances'] = tis

            operators = sorted(list(set([op.__class__ for op in dag.tasks])),
                               key=lambda x: x.__name__)
            context['operators'] = operators

            send_slack = self.args[
                'send_slack_message'] if 'send_slack_message' in self.args else True
            if send_slack:
                slack_message = self.message_slack_success if success else self.message_slack_fail
                self.slack_api_params['text'] = context[
                    'task'].render_template(None, slack_message, context)
                self.sc.api_call('chat.postMessage', **self.slack_api_params)

            # don't spam email if multiple completions. spamming Slack is OK ;-)
            state_key = context['dag'].dag_id + '.state'
            dag_state = Variable.get(state_key,
                                     deserialize_json=True,
                                     default_var={})
            if not dag_state.has_key('history'):
                dag_state['history'] = {}
            history = dag_state['history']
            if not history.has_key(ts):
                history[ts] = {}
            date = history[ts]
            sent_email_key = 'sent_success_email' if success else 'sent_failure_email'
            if not date.has_key(sent_email_key):
                date[sent_email_key] = False

            send_multiple_failures = self.get_value_from_args(
                'send_multiple_failures', False)
            send_success_email = self.get_value_from_args(
                'send_success_emails', True)
            if (not success
                ) and date[sent_email_key] and not send_multiple_failures:
                logging.info(
                    'Skipping failure email notification because one was already sent for {0} regarding date {1}'
                    .format(did, ts))
                # nothing to do here
            else:
                subject = self.subject_success if success else self.subject_fail
                title = context['task'].render_template(None, subject, context)
                body = context['task'].render_template(
                    None, self.message_completion(), context)
                email_list = context['task'].email
                # conditions to send an email are if task failure or
                # if task succeeds and user wants to receive success emails
                if not success or (send_success_email and success):
                    if success:
                        email_list = self.get_value_from_args(
                            'success_email', email_list)
                    send_email(email_list, title, body)
                date[sent_email_key] = True
                Variable.set(state_key, dag_state, serialize_json=True)
        except Exception as e:
            logging.warn(
                'Problem reading task state when notifying result of task: {0}'
                '\nException reason: {1}'.format(task_id, e))
        finally:
            session.rollback()
            session.close()