def test_read_with_empty_metadata(self):
        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(self.ti, 1, {})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertFalse(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(1, metadatas[0]['offset'])
        # last_log_timestamp will be initialized using log reading time
        # if not last_log_timestamp is provided.
        self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)

        # case where offset is missing but metadata not empty.
        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(self.ti, 1, {'end_of_log': False})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertFalse(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(0, metadatas[0]['offset'])
        # last_log_timestamp will be initialized using log reading time
        # if not last_log_timestamp is provided.
        self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def task_instance_info(dag_id, execution_date, task_id):
    """
    Returns a JSON with a task instance's public instance variables.
    The format for the exec_date is expected to be
    "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will
    of course need to have been encoded for URL in the request.
    """

    # Convert string datetime into actual datetime
    try:
        execution_date = timezone.parse(execution_date)
    except ValueError:
        error_message = (
            'Given execution date, {}, could not be identified '
            'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format(
                execution_date))
        _log.info(error_message)
        response = jsonify({'error': error_message})
        response.status_code = 400

        return response

    try:
        info = get_task_instance(dag_id, task_id, execution_date)
    except AirflowException as err:
        _log.info(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response

    # JSONify and return.
    fields = {k: str(v)
              for k, v in vars(info).items()
              if not k.startswith('_')}
    return jsonify(fields)
 def test_read_with_none_meatadata(self):
     logs, metadatas = self.es_task_handler.read(self.ti, 1)
     self.assertEqual(1, len(logs))
     self.assertEqual(len(logs), len(metadatas))
     self.assertEqual(self.test_message, logs[0])
     self.assertFalse(metadatas[0]['end_of_log'])
     self.assertEqual(1, metadatas[0]['offset'])
     self.assertTrue(
         timezone.parse(metadatas[0]['last_log_timestamp']) < pendulum.now())
 def test_read(self):
     ts = pendulum.now()
     logs, metadatas = self.es_task_handler.read(self.ti,
                                                 1,
                                                 {'offset': 0,
                                                  'last_log_timestamp': str(ts),
                                                  'end_of_log': False})
     self.assertEqual(1, len(logs))
     self.assertEqual(len(logs), len(metadatas))
     self.assertEqual(self.test_message, logs[0])
     self.assertFalse(metadatas[0]['end_of_log'])
     self.assertEqual(1, metadatas[0]['offset'])
     self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
    def test_read_timeout(self):
        ts = pendulum.now().subtract(minutes=5)

        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(self.ti,
                                                    1,
                                                    {'offset': 0,
                                                     'last_log_timestamp': str(ts),
                                                     'end_of_log': False})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertTrue(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(0, metadatas[0]['offset'])
        self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
 def execute(self, context):
     if self.execution_date is not None:
         run_id = 'trig__{}'.format(self.execution_date)
         self.execution_date = timezone.parse(self.execution_date)
     else:
         run_id = 'trig__' + timezone.utcnow().isoformat()
     dro = DagRunOrder(run_id=run_id)
     if self.python_callable is not None:
         dro = self.python_callable(context, dro)
     if dro:
         trigger_dag(dag_id=self.trigger_dag_id,
                     run_id=dro.run_id,
                     conf=json.dumps(dro.payload),
                     execution_date=self.execution_date,
                     replace_microseconds=False)
     else:
         self.log.info("Criteria not met, moving on")
Exemple #7
0
def trigger_dag(dag_id):
    """
    Trigger a new dag run for a Dag with an execution date of now unless
    specified in the data.
    """
    data = request.get_json(force=True)

    run_id = None
    if 'run_id' in data:
        run_id = data['run_id']

    conf = None
    if 'conf' in data:
        conf = data['conf']

    execution_date = None
    if 'execution_date' in data and data['execution_date'] is not None:
        execution_date = data['execution_date']

        # Convert string datetime into actual datetime
        try:
            execution_date = timezone.parse(execution_date)
        except ValueError:
            error_message = (
                'Given execution date, {}, could not be identified '
                'as a date. Example date format: 2015-11-16T14:34:15+00:00'.
                format(execution_date))
            _log.info(error_message)
            response = jsonify({'error': error_message})
            response.status_code = 400

            return response

    try:
        dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date)
    except AirflowException as err:
        _log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response

    if getattr(g, 'user', None):
        _log.info("User {} created {}".format(g.user, dr))

    response = jsonify(message="Created {}".format(dr))
    return response
Exemple #8
0
    def test_read(self):
        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {
                'offset': 0,
                'last_log_timestamp': str(ts),
                'end_of_log': False
            })

        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(len(logs[0]), 1)
        self.assertEqual(self.test_message, logs[0][0][-1])
        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual('1', metadatas[0]['offset'])
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def trigger_dag(dag_id):
    """
    Trigger a new dag run for a Dag with an execution date of now unless
    specified in the data.
    """
    data = request.get_json(force=True)

    run_id = None
    if 'run_id' in data:
        run_id = data['run_id']

    conf = None
    if 'conf' in data:
        conf = data['conf']

    execution_date = None
    if 'execution_date' in data and data['execution_date'] is not None:
        execution_date = data['execution_date']

        # Convert string datetime into actual datetime
        try:
            execution_date = timezone.parse(execution_date)
        except ValueError:
            error_message = (
                'Given execution date, {}, could not be identified '
                'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format(
                    execution_date))
            _log.info(error_message)
            response = jsonify({'error': error_message})
            response.status_code = 400

            return response

    try:
        dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date)
    except AirflowException as err:
        _log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response

    if getattr(g, 'user', None):
        _log.info("User {} created {}".format(g.user, dr))

    response = jsonify(message="Created {}".format(dr))
    return response
 def execute(self, context):
     if self.execution_date is not None:
         run_id = 'trig__{}'.format(self.execution_date)
         self.execution_date = timezone.parse(self.execution_date)
     else:
         run_id = 'trig__' + timezone.utcnow().isoformat()
     dro = DagRunOrder(run_id=run_id)
     if self.python_callable is not None:
         dro = self.python_callable(context, dro)
     if dro:
         trigger_dag(dag_id=self.trigger_dag_id,
                     run_id=dro.run_id,
                     conf=json.dumps(dro.payload),
                     execution_date=self.execution_date,
                     replace_microseconds=False)
     else:
         self.log.info("Criteria not met, moving on")
Exemple #11
0
def get_date_time_num_runs_dag_runs_form_data(request, session, dag):
    dttm = request.args.get('execution_date')
    if dttm:
        dttm = pendulum.parse(dttm)
    else:
        dttm = dag.latest_execution_date or timezone.utcnow()

    base_date = request.args.get('base_date')
    if base_date:
        base_date = timezone.parse(base_date)
    else:
        # The DateTimeField widget truncates milliseconds and would loose
        # the first dag run. Round to next second.
        base_date = (dttm + timedelta(seconds=1)).replace(microsecond=0)

    default_dag_run = conf.getint('webserver',
                                  'default_dag_run_display_number')
    num_runs = request.args.get('num_runs')
    num_runs = int(num_runs) if num_runs else default_dag_run

    DR = models.DagRun
    drs = (session.query(DR).filter(
        DR.dag_id == dag.dag_id, DR.execution_date <= base_date).order_by(
            desc(DR.execution_date)).limit(num_runs).all())
    dr_choices = []
    dr_state = None
    for dr in drs:
        dr_choices.append((dr.execution_date.isoformat(), dr.run_id))
        if dttm == dr.execution_date:
            dr_state = dr.state

    # Happens if base_date was changed and the selected dag run is not in result
    if not dr_state and drs:
        dr = drs[0]
        dttm = dr.execution_date
        dr_state = dr.state

    return {
        'dttm': dttm,
        'base_date': base_date,
        'num_runs': num_runs,
        'execution_date': dttm.isoformat(),
        'dr_choices': dr_choices,
        'dr_state': dr_state,
    }
 def test_read_nonexistent_log(self):
     ts = pendulum.now()
     # In ElasticMock, search is going to return all documents with matching index
     # and doc_type regardless of match filters, so we delete the log entry instead
     # of making a new TaskInstance to query.
     self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
     logs, metadatas = self.es_task_handler.read(self.ti,
                                                 1,
                                                 {'offset': 0,
                                                  'last_log_timestamp': str(ts),
                                                  'end_of_log': False})
     self.assertEqual(1, len(logs))
     self.assertEqual(len(logs), len(metadatas))
     self.assertEqual([''], logs)
     self.assertFalse(metadatas[0]['end_of_log'])
     self.assertEqual(0, metadatas[0]['offset'])
     # last_log_timestamp won't change if no log lines read.
     self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
 def test_read_nonexistent_log(self):
     ts = pendulum.now()
     # In ElasticMock, search is going to return all documents with matching index
     # and doc_type regardless of match filters, so we delete the log entry instead
     # of making a new TaskInstance to query.
     self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
     logs, metadatas = self.es_task_handler.read(self.ti,
                                                 1,
                                                 {'offset': 0,
                                                  'last_log_timestamp': str(ts),
                                                  'end_of_log': False})
     self.assertEqual(1, len(logs))
     self.assertEqual(len(logs), len(metadatas))
     self.assertEqual([''], logs)
     self.assertFalse(metadatas[0]['end_of_log'])
     self.assertEqual('0', metadatas[0]['offset'])
     # last_log_timestamp won't change if no log lines read.
     self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
    def test_local_run(self):
        args = create_mock_args(
            task_id='print_the_context',
            dag_id='example_python_operator',
            subdir='/root/dags/example_python_operator.py',
            interactive=True,
            execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00'))

        reset(args.dag_id)

        with patch('argparse.Namespace', args) as mock_args:
            run(mock_args)
            dag = get_dag(mock_args)
            task = dag.get_task(task_id=args.task_id)
            ti = TaskInstance(task, args.execution_date)
            ti.refresh_from_db()
            state = ti.current_state()
            self.assertEqual(state, State.SUCCESS)
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            run_id = "trig__{}".format(self.execution_date.isoformat())
        elif isinstance(self.execution_date, str):
            run_id = "trig__{}".format(self.execution_date)
            self.execution_date = timezone.parse(
                self.execution_date)  # trigger_dag() expects datetime
        else:
            run_id = "trig__{}".format(timezone.utcnow().isoformat())

        # Ignore MyPy type for self.execution_date because it doesn't pick up the timezone.parse() for strings
        trigger_dag(
            dag_id=self.trigger_dag_id,
            run_id=run_id,
            conf=self.conf,
            execution_date=self.execution_date,
            replace_microseconds=False,
        )
Exemple #16
0
    def test_read_timeout(self):
        ts = pendulum.now().subtract(minutes=5)

        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {
                'offset': 0,
                'last_log_timestamp': str(ts),
                'end_of_log': False
            })
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertTrue(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual('0', metadatas[0]['offset'])
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
Exemple #17
0
    def test_limit_and_offset(self, url, expected_import_error_ids, session):
        import_errors = [
            ImportError(
                filename=f"/tmp/file_{i}.py",
                stacktrace="Lorem ipsum",
                timestamp=timezone.parse(self.timestamp, timezone="UTC"),
            ) for i in range(1, 110)
        ]
        session.add_all(import_errors)
        session.commit()

        response = self.client.get(url)

        assert response.status_code == 200
        import_ids = [
            pool["filename"] for pool in response.json["import_errors"]
        ]
        self.assertEqual(import_ids, expected_import_error_ids)
Exemple #18
0
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            execution_date = timezone.parse(self.execution_date)
            self.execution_date = execution_date
        else:
            execution_date = timezone.utcnow()

        run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date)
        # Ignore MyPy type for self.execution_date because it doesn't pick up the timezone.parse() for strings
        trigger_dag(
            dag_id=self.trigger_dag_id,
            run_id=run_id,
            conf=self.conf,
            execution_date=self.execution_date,
            replace_microseconds=False,
        )
    def test_local_run(self):
        args = create_mock_args(
            task_id='print_the_context',
            dag_id='example_python_operator',
            subdir='/root/dags/example_python_operator.py',
            interactive=True,
            execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00')
        )

        reset(args.dag_id)

        with patch('argparse.Namespace', args) as mock_args:
            run(mock_args)
            dag = get_dag(mock_args)
            task = dag.get_task(task_id=args.task_id)
            ti = TaskInstance(task, args.execution_date)
            ti.refresh_from_db()
            state = ti.current_state()
            self.assertEqual(state, State.SUCCESS)
Exemple #20
0
 def test_serialize(self, session):
     event_log_model = Log(event="TEST_EVENT",
                           task_instance=self._create_task_instance())
     session.add(event_log_model)
     session.commit()
     event_log_model.dttm = timezone.parse(self.default_time)
     log_model = session.query(Log).first()
     deserialized_log = event_log_schema.dump(log_model)
     self.assertEqual(
         deserialized_log, {
             "event_log_id": event_log_model.id,
             "event": "TEST_EVENT",
             "dag_id": "TEST_DAG_ID",
             "task_id": "TEST_TASK_ID",
             "execution_date": self.default_time,
             "owner": 'airflow',
             "when": self.default_time,
             "extra": None
         })
    def test_test(self):
        """Test the `airflow test` command"""
        args = create_mock_args(task_id='print_the_context',
                                dag_id='example_python_operator',
                                subdir=None,
                                execution_date=timezone.parse('2018-01-01'))

        saved_stdout = sys.stdout
        try:
            sys.stdout = out = io.StringIO()
            cli.test(args)

            output = out.getvalue()
            # Check that prints, and log messages, are shown
            self.assertIn(
                "'example_python_operator__print_the_context__20180101'",
                output)
        finally:
            sys.stdout = saved_stdout
Exemple #22
0
    def execute(self, context: Dict):
        if isinstance(self.execution_date, datetime.datetime):
            execution_date = self.execution_date
        elif isinstance(self.execution_date, str):
            execution_date = timezone.parse(self.execution_date)
            self.execution_date = execution_date
        else:
            execution_date = timezone.utcnow()

        run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date)
        try:
            # Ignore MyPy type for self.execution_date
            # because it doesn't pick up the timezone.parse() for strings
            trigger_dag(
                dag_id=self.trigger_dag_id,
                run_id=run_id,
                conf=self.conf,
                execution_date=self.execution_date,
                replace_microseconds=False,
            )

        except DagRunAlreadyExists as e:
            if self.reset_dag_run:
                self.log.info("Clearing %s on %s", self.trigger_dag_id,
                              self.execution_date)

                # Get target dag object and call clear()

                dag_model = DagModel.get_current(self.trigger_dag_id)
                if dag_model is None:
                    raise DagNotFound(
                        f"Dag id {self.trigger_dag_id} not found in DagModel")

                dag_bag = DagBag(
                    dag_folder=dag_model.fileloc,
                    store_serialized_dags=settings.STORE_SERIALIZED_DAGS)

                dag = dag_bag.get_dag(self.trigger_dag_id)

                dag.clear(start_date=self.execution_date,
                          end_date=self.execution_date)
            else:
                raise e
Exemple #23
0
 def test_serialize(self, session):
     import_error = ImportError(
         filename="lorem.py",
         stacktrace="Lorem Ipsum",
         timestamp=timezone.parse(self.timestamp, timezone="UTC"),
     )
     session.add(import_error)
     session.commit()
     serialized_data = import_error_schema.dump(import_error)
     serialized_data["import_error_id"] = 1
     self.assertEqual(
         {
             "filename": "lorem.py",
             "import_error_id": 1,
             "stack_trace": "Lorem Ipsum",
             "timestamp": "2020-06-10T12:02:44+00:00",
         },
         serialized_data,
     )
Exemple #24
0
    def _read(self, ti, try_number, metadata=None):
        """
        Endpoint for streaming log.
        :param ti: task instance object
        :param try_number: try_number of the task instance
        :param metadata: log metadata,
                         can be used for steaming log reading and auto-tailing.
        :return: a list of log documents and metadata.
        """
        if not metadata:
            metadata = {'offset': 0}
        if 'offset' not in metadata:
            metadata['offset'] = 0

        offset = metadata['offset']
        log_id = self._render_log_id(ti, try_number)

        logs = self.es_read(log_id, offset, metadata)

        next_offset = offset if not logs else logs[-1].offset

        metadata['offset'] = next_offset
        # end_of_log_mark may contain characters like '\n' which is needed to
        # have the log uploaded but will not be stored in elasticsearch.
        metadata['end_of_log'] = False if not logs \
            else logs[-1].message == self.end_of_log_mark.strip()

        cur_ts = pendulum.now()
        # Assume end of log after not receiving new log for 5 min,
        # as executor heartbeat is 1 min and there might be some
        # delay before Elasticsearch makes the log available.
        if 'last_log_timestamp' in metadata:
            last_log_ts = timezone.parse(metadata['last_log_timestamp'])
            if cur_ts.diff(last_log_ts).in_minutes() >= 5 or 'max_offset' in metadata \
                    and offset >= metadata['max_offset']:
                metadata['end_of_log'] = True

        if offset != next_offset or 'last_log_timestamp' not in metadata:
            metadata['last_log_timestamp'] = str(cur_ts)

        message = '\n'.join([log.message for log in logs])

        return message, metadata
    def _read(self, ti, try_number, metadata=None):
        """
        Endpoint for streaming log.
        :param ti: task instance object
        :param try_number: try_number of the task instance
        :param metadata: log metadata,
                         can be used for steaming log reading and auto-tailing.
        :return a list of log documents and metadata.
        """
        if not metadata:
            metadata = {'offset': 0}
        if 'offset' not in metadata:
            metadata['offset'] = 0

        offset = metadata['offset']
        log_id = self._render_log_id(ti, try_number)

        logs = self.es_read(log_id, offset)

        next_offset = offset if not logs else logs[-1].offset

        metadata['offset'] = next_offset
        # end_of_log_mark may contain characters like '\n' which is needed to
        # have the log uploaded but will not be stored in elasticsearch.
        metadata['end_of_log'] = False if not logs \
            else logs[-1].message == self.end_of_log_mark.strip()

        cur_ts = pendulum.now()
        # Assume end of log after not receiving new log for 5 min,
        # as executor heartbeat is 1 min and there might be some
        # delay before Elasticsearch makes the log available.
        if 'last_log_timestamp' in metadata:
            last_log_ts = timezone.parse(metadata['last_log_timestamp'])
            if cur_ts.diff(last_log_ts).in_minutes() >= 5:
                metadata['end_of_log'] = True

        if offset != next_offset or 'last_log_timestamp' not in metadata:
            metadata['last_log_timestamp'] = str(cur_ts)

        message = '\n'.join([log.message for log in logs])

        return message, metadata
Exemple #26
0
 def test_read_as_download_logs(self):
     ts = pendulum.now()
     logs, metadatas = self.es_task_handler.read(
         self.ti,
         1,
         {
             'offset': 0,
             'last_log_timestamp': str(ts),
             'download_logs': True,
             'end_of_log': False
         },
     )
     assert 1 == len(logs)
     assert len(logs) == len(metadatas)
     assert len(logs[0]) == 1
     assert self.test_message == logs[0][0][-1]
     assert not metadatas[0]['end_of_log']
     assert metadatas[0]['download_logs']
     assert '1' == metadatas[0]['offset']
     assert timezone.parse(metadatas[0]['last_log_timestamp']) > ts
Exemple #27
0
    def test_test(self):
        """Test the `airflow test` command"""
        args = create_mock_args(
            task_id='print_the_context',
            dag_id='example_python_operator',
            subdir=None,
            execution_date=timezone.parse('2018-01-01')
        )

        saved_stdout = sys.stdout
        try:
            sys.stdout = out = StringIO()
            cli.test(args)

            output = out.getvalue()
            # Check that prints, and log messages, are shown
            self.assertIn('END_DATE', output)
            self.assertIn("'example_python_operator__print_the_context__20180101'", output)
        finally:
            sys.stdout = saved_stdout
Exemple #28
0
def trigger_dag(dag_id):
    """
    Trigger a new dag run for a Dag with an execution date of now unless
    specified in the data.
    """
    data = request.get_json(force=True)

    run_id = None
    if "run_id" in data:
        run_id = data["run_id"]

    conf = None
    if "conf" in data:
        conf = data["conf"]

    execution_date = None
    if "execution_date" in data and data["execution_date"] is not None:
        execution_date = data["execution_date"]

        # Convert string datetime into actual datetime
        try:
            execution_date = timezone.parse(execution_date)
        except ValueError:
            error_message = (
                "Given execution date, {}, could not be identified "
                "as a date. Example date format: 2015-11-16T14:34:15+00:00".
                format(execution_date))
            response = jsonify({"error": error_message})
            response.status_code = 400

            return response

    try:
        dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date)
    except AirflowException as err:
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response

    response = jsonify(message="Created {}".format(dr))
    return response
    def test_response_200(self, session):
        import_error = ImportError(
            filename="Lorem_ipsum.py",
            stacktrace="Lorem ipsum",
            timestamp=timezone.parse(self.timestamp, timezone="UTC"),
        )
        session.add(import_error)
        session.commit()

        response = self.client.get(f"/api/v1/importErrors/{import_error.id}",
                                   environ_overrides={'REMOTE_USER': "******"})

        assert response.status_code == 200
        response_data = response.json
        response_data["import_error_id"] = 1
        assert {
            "filename": "Lorem_ipsum.py",
            "import_error_id": 1,
            "stack_trace": "Lorem ipsum",
            "timestamp": "2020-06-10T12:00:00+00:00",
        } == response_data
    def test_read_with_match_phrase_query(self):
        similar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format(
            dag_id=TestElasticsearchTaskHandler.DAG_ID, task_id=TestElasticsearchTaskHandler.TASK_ID
        )
        another_test_message = 'another message'

        another_body = {'message': another_test_message, 'log_id': similar_log_id, 'offset': 1}
        self.es.index(index=self.index_name, doc_type=self.doc_type, body=another_body, id=1)

        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {'offset': '0', 'last_log_timestamp': str(ts), 'end_of_log': False, 'max_offset': 2}
        )
        assert 1 == len(logs)
        assert len(logs) == len(metadatas)
        assert self.test_message == logs[0][0][-1]
        assert another_test_message != logs[0]

        assert not metadatas[0]['end_of_log']
        assert '1' == metadatas[0]['offset']
        assert timezone.parse(metadatas[0]['last_log_timestamp']) > ts
Exemple #31
0
    def wrapper(*args, **kwargs):
        if current_user and hasattr(current_user, 'username'):
            user = current_user.username
        else:
            user = '******'

        log = models.Log(
            event=f.__name__,
            task_instance=None,
            owner=user,
            extra=str(list(request.args.items())),
            task_id=request.args.get('task_id'),
            dag_id=request.args.get('dag_id'))

        if 'execution_date' in request.args:
            log.execution_date = timezone.parse(request.args.get('execution_date'))

        with create_session() as session:
            session.add(log)
            session.commit()

        return f(*args, **kwargs)
Exemple #32
0
    def wrapper(*args, **kwargs):
        if current_user and hasattr(current_user, 'username'):
            user = current_user.username
        else:
            user = '******'

        log = models.Log(
            event=f.__name__,
            task_instance=None,
            owner=user,
            extra=str(list(request.args.items())),
            task_id=request.args.get('task_id'),
            dag_id=request.args.get('dag_id'))

        if 'execution_date' in request.args:
            log.execution_date = timezone.parse(request.args.get('execution_date'))

        with create_session() as session:
            session.add(log)
            session.commit()

        return f(*args, **kwargs)
Exemple #33
0
 def test_should_respond_200(self, session):
     log_model = Log(
         event='TEST_EVENT',
         task_instance=self._create_task_instance(),
     )
     log_model.dttm = timezone.parse(self.default_time)
     session.add(log_model)
     session.commit()
     event_log_id = log_model.id
     response = self.client.get(f"/api/v1/eventLogs/{event_log_id}",
                                environ_overrides={'REMOTE_USER': "******"})
     assert response.status_code == 200
     assert response.json == {
         "event_log_id": event_log_id,
         "event": "TEST_EVENT",
         "dag_id": "TEST_DAG_ID",
         "task_id": "TEST_TASK_ID",
         "execution_date": self.default_time,
         "owner": 'airflow',
         "when": self.default_time,
         "extra": None,
     }
Exemple #34
0
 def test_serialize(self, session):
     import_error = [
         ImportError(
             filename="Lorem_ipsum.py",
             stacktrace="Lorem ipsum",
             timestamp=timezone.parse(self.timestamp, timezone="UTC"),
         ) for i in range(2)
     ]
     session.add_all(import_error)
     session.commit()
     query = session.query(ImportError)
     query_list = query.all()
     serialized_data = (import_error_collection_schema.dump(
         ImportErrorCollection(import_errors=query_list,
                               total_entries=2)).data, )
     # To maintain consistency in the key sequence accross the db in tests
     serialized_data[0]["import_errors"][0]["import_error_id"] = 1
     serialized_data[0]["import_errors"][1]["import_error_id"] = 2
     self.assertEqual(
         {
             "import_errors": [
                 {
                     "filename": "Lorem_ipsum.py",
                     "import_error_id": 1,
                     "stack_trace": "Lorem ipsum",
                     "timestamp": "2020-06-10T12:02:44+00:00",
                 },
                 {
                     "filename": "Lorem_ipsum.py",
                     "import_error_id": 2,
                     "stack_trace": "Lorem ipsum",
                     "timestamp": "2020-06-10T12:02:44+00:00",
                 },
             ],
             "total_entries":
             2,
         },
         serialized_data[0],
     )
Exemple #35
0
    def test_get_import_errors(self, session):
        import_error = [
            ImportError(
                filename="Lorem_ipsum.py",
                stacktrace="Lorem ipsum",
                timestamp=timezone.parse(self.timestamp, timezone="UTC"),
            ) for _ in range(2)
        ]
        session.add_all(import_error)
        session.commit()

        response = self.client.get("/api/v1/importErrors",
                                   environ_overrides={'REMOTE_USER': "******"})

        assert response.status_code == 200
        response_data = response.json
        self._normalize_import_errors(response_data['import_errors'])
        self.assertEqual(
            {
                "import_errors": [
                    {
                        "filename": "Lorem_ipsum.py",
                        "import_error_id": 1,
                        "stack_trace": "Lorem ipsum",
                        "timestamp": "2020-06-10T12:00:00+00:00",
                    },
                    {
                        "filename": "Lorem_ipsum.py",
                        "import_error_id": 2,
                        "stack_trace": "Lorem ipsum",
                        "timestamp": "2020-06-10T12:00:00+00:00",
                    },
                ],
                "total_entries":
                2,
            },
            response_data,
        )
Exemple #36
0
    def test_get_import_errors_order_by(self, session):
        import_error = [
            ImportError(
                filename=f"Lorem_ipsum{i}.py",
                stacktrace="Lorem ipsum",
                timestamp=timezone.parse(self.timestamp, timezone="UTC") +
                timedelta(days=-i),
            ) for i in range(1, 3)
        ]
        session.add_all(import_error)
        session.commit()

        response = self.client.get("/api/v1/importErrors?order_by=-timestamp",
                                   environ_overrides={'REMOTE_USER': "******"})

        assert response.status_code == 200
        response_data = response.json
        self._normalize_import_errors(response_data['import_errors'])
        assert {
            "import_errors": [
                {
                    "filename": "Lorem_ipsum1.py",
                    "import_error_id":
                    1,  # id normalized with self._normalize_import_errors
                    "stack_trace": "Lorem ipsum",
                    "timestamp": "2020-06-09T12:00:00+00:00",
                },
                {
                    "filename": "Lorem_ipsum2.py",
                    "import_error_id": 2,
                    "stack_trace": "Lorem ipsum",
                    "timestamp": "2020-06-08T12:00:00+00:00",
                },
            ],
            "total_entries":
            2,
        } == response_data
Exemple #37
0
def get_lineage(dag_id: str, execution_date: str):
    # Convert string datetime into actual datetime
    try:
        execution_date = timezone.parse(execution_date)
    except ValueError:
        error_message = (
            'Given execution date, {}, could not be identified '
            'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format(
                execution_date))
        log.info(error_message)
        response = jsonify({'error': error_message})
        response.status_code = 400

        return response

    try:
        lineage = get_lineage_api(dag_id=dag_id, execution_date=execution_date)
    except AirflowException as err:
        log.error(err)
        response = jsonify(error=f"{err}")
        response.status_code = err.status_code
        return response
    else:
        return jsonify(lineage)
Exemple #38
0
def task_instance_info(dag_id, execution_date, task_id):
    """
    Returns a JSON with a task instance's public instance variables.
    The format for the exec_date is expected to be
    "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will
    of course need to have been encoded for URL in the request.
    """

    # Convert string datetime into actual datetime
    try:
        execution_date = timezone.parse(execution_date)
    except ValueError:
        error_message = (
            'Given execution date, {}, could not be identified '
            'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format(
                execution_date))
        log.info(error_message)
        response = jsonify({'error': error_message})
        response.status_code = 400

        return response

    try:
        info = get_task_instance(dag_id, task_id, execution_date)
    except AirflowException as err:
        log.info(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response

    # JSONify and return.
    fields = {
        k: str(v)
        for k, v in vars(info).items() if not k.startswith('_')
    }
    return jsonify(fields)
    def test_read_with_match_phrase_query(self):
        similar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format(
            dag_id=TestElasticsearchTaskHandler.DAG_ID,
            task_id=TestElasticsearchTaskHandler.TASK_ID)
        another_test_message = 'another message'

        another_body = {'message': another_test_message, 'log_id': similar_log_id, 'offset': 1}
        self.es.index(index=self.index_name, doc_type=self.doc_type,
                      body=another_body, id=1)

        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(self.ti,
                                                    1,
                                                    {'offset': 0,
                                                     'last_log_timestamp': str(ts),
                                                     'end_of_log': False})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertNotEqual(another_test_message, logs[0])

        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual('1', metadatas[0]['offset'])
        self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
    def test_read_with_match_phrase_query(self):
        simiar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format(
            dag_id=TestElasticsearchTaskHandler.DAG_ID,
            task_id=TestElasticsearchTaskHandler.TASK_ID)
        another_test_message = 'another message'

        another_body = {'message': another_test_message, 'log_id': simiar_log_id, 'offset': 1}
        self.es.index(index=self.index_name, doc_type=self.doc_type,
                      body=another_body, id=1)

        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(self.ti,
                                                    1,
                                                    {'offset': 0,
                                                     'last_log_timestamp': str(ts),
                                                     'end_of_log': False})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertNotEqual(another_test_message, logs[0])

        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual(1, metadatas[0]['offset'])
        self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
Exemple #41
0
def get_lineage(dag_id: str, execution_date: str):
    """Get Lineage details for a DagRun"""
    # Convert string datetime into actual datetime
    try:
        execution_dt = timezone.parse(execution_date)
    except ValueError:
        log.error("Given execution date could not be identified as a date.")
        error_message = (
            f'Given execution date, {execution_date}, could not be identified as a date. '
            f'Example date format: 2015-11-16T14:34:15+00:00')
        response = jsonify({'error': error_message})
        response.status_code = 400

        return response

    try:
        lineage = get_lineage_api(dag_id=dag_id, execution_date=execution_dt)
    except AirflowException as err:
        log.error(err)
        response = jsonify(error=f"{err}")
        response.status_code = err.status_code
        return response
    else:
        return jsonify(lineage)
Exemple #42
0
def dag_run_state(dag_id, execution_date):
    """
    Get dag run state by dag_id and execution_date.
    """
    try:
        execution_date = timezone.parse(execution_date)
    except ValueError:
        error_message = (
        'Given execution date, {}, could not be identified '
        'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format(
            execution_date))
        _log.info(error_message)
        response = jsonify({'error': error_message})
        response.status_code = 400
        return response

    try:
        state = get_dag_run_state(dag_id, execution_date)
    except AirflowException as e:
        _log.error(e)
        response = jsonify(error="{}".format(e))
        response.status_code = getattr(e, 'status', 500)
        return response
    return jsonify(state)
Exemple #43
0
    def _create_test_dag_run(self,
                             state='running',
                             extra_dag=False,
                             commit=True):
        dag_runs = []
        dags = [DagModel(dag_id="TEST_DAG_ID")]
        dagrun_model_1 = DagRun(
            dag_id="TEST_DAG_ID",
            run_id="TEST_DAG_RUN_ID_1",
            run_type=DagRunType.MANUAL.value,
            execution_date=timezone.parse(self.default_time),
            start_date=timezone.parse(self.default_time),
            external_trigger=True,
            state=state,
        )
        dag_runs.append(dagrun_model_1)
        dagrun_model_2 = DagRun(
            dag_id="TEST_DAG_ID",
            run_id="TEST_DAG_RUN_ID_2",
            run_type=DagRunType.MANUAL.value,
            execution_date=timezone.parse(self.default_time_2),
            start_date=timezone.parse(self.default_time),
            external_trigger=True,
        )
        dag_runs.append(dagrun_model_2)
        if extra_dag:
            for i in range(3, 5):

                dags.append(DagModel(dag_id='TEST_DAG_ID_' + str(i)))
                dag_runs.append(
                    DagRun(
                        dag_id='TEST_DAG_ID_' + str(i),
                        run_id='TEST_DAG_RUN_ID_' + str(i),
                        run_type=DagRunType.MANUAL.value,
                        execution_date=timezone.parse(self.default_time_2),
                        start_date=timezone.parse(self.default_time),
                        external_trigger=True,
                    ))
        if commit:
            with create_session() as session:
                session.add_all(dag_runs)
                session.add_all(dags)
        return dag_runs
Exemple #44
0
    def apply(self, query, value):
        value = timezone.parse(value, timezone=timezone.utc)

        return super(UtcAwareFilterMixin, self).apply(query, value)
Exemple #45
0
    def apply(self, query, value):
        value = timezone.parse(value, timezone=timezone.utc)

        return super().apply(query, value)
def parse_execution_date(execution_date_str):
    """
    Parse execution date string to datetime object.
    """
    return timezone.parse(execution_date_str)