def test_read_with_empty_metadata(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read(self.ti, 1, {}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(1, metadatas[0]['offset']) # last_log_timestamp will be initialized using log reading time # if not last_log_timestamp is provided. self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts) # case where offset is missing but metadata not empty. self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read(self.ti, 1, {'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(0, metadatas[0]['offset']) # last_log_timestamp will be initialized using log reading time # if not last_log_timestamp is provided. self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def task_instance_info(dag_id, execution_date, task_id): """ Returns a JSON with a task instance's public instance variables. The format for the exec_date is expected to be "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will of course need to have been encoded for URL in the request. """ # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: info = get_task_instance(dag_id, task_id, execution_date) except AirflowException as err: _log.info(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response # JSONify and return. fields = {k: str(v) for k, v in vars(info).items() if not k.startswith('_')} return jsonify(fields)
def test_read_with_none_meatadata(self): logs, metadatas = self.es_task_handler.read(self.ti, 1) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(1, metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) < pendulum.now())
def test_read(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(1, metadatas[0]['offset']) self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def test_read_timeout(self): ts = pendulum.now().subtract(minutes=5) self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertTrue(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(0, metadatas[0]['offset']) self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
def execute(self, context): if self.execution_date is not None: run_id = 'trig__{}'.format(self.execution_date) self.execution_date = timezone.parse(self.execution_date) else: run_id = 'trig__' + timezone.utcnow().isoformat() dro = DagRunOrder(run_id=run_id) if self.python_callable is not None: dro = self.python_callable(context, dro) if dro: trigger_dag(dag_id=self.trigger_dag_id, run_id=dro.run_id, conf=json.dumps(dro.payload), execution_date=self.execution_date, replace_microseconds=False) else: self.log.info("Criteria not met, moving on")
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] execution_date = None if 'execution_date' in data and data['execution_date'] is not None: execution_date = data['execution_date'] # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'. format(execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response if getattr(g, 'user', None): _log.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def test_read(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False }) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(len(logs[0]), 1) self.assertEqual(self.test_message, logs[0][0][-1]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual('1', metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] execution_date = None if 'execution_date' in data and data['execution_date'] is not None: execution_date = data['execution_date'] # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response if getattr(g, 'user', None): _log.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def get_date_time_num_runs_dag_runs_form_data(request, session, dag): dttm = request.args.get('execution_date') if dttm: dttm = pendulum.parse(dttm) else: dttm = dag.latest_execution_date or timezone.utcnow() base_date = request.args.get('base_date') if base_date: base_date = timezone.parse(base_date) else: # The DateTimeField widget truncates milliseconds and would loose # the first dag run. Round to next second. base_date = (dttm + timedelta(seconds=1)).replace(microsecond=0) default_dag_run = conf.getint('webserver', 'default_dag_run_display_number') num_runs = request.args.get('num_runs') num_runs = int(num_runs) if num_runs else default_dag_run DR = models.DagRun drs = (session.query(DR).filter( DR.dag_id == dag.dag_id, DR.execution_date <= base_date).order_by( desc(DR.execution_date)).limit(num_runs).all()) dr_choices = [] dr_state = None for dr in drs: dr_choices.append((dr.execution_date.isoformat(), dr.run_id)) if dttm == dr.execution_date: dr_state = dr.state # Happens if base_date was changed and the selected dag run is not in result if not dr_state and drs: dr = drs[0] dttm = dr.execution_date dr_state = dr.state return { 'dttm': dttm, 'base_date': base_date, 'num_runs': num_runs, 'execution_date': dttm.isoformat(), 'dr_choices': dr_choices, 'dr_state': dr_state, }
def test_read_nonexistent_log(self): ts = pendulum.now() # In ElasticMock, search is going to return all documents with matching index # and doc_type regardless of match filters, so we delete the log entry instead # of making a new TaskInstance to query. self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(0, metadatas[0]['offset']) # last_log_timestamp won't change if no log lines read. self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
def test_read_nonexistent_log(self): ts = pendulum.now() # In ElasticMock, search is going to return all documents with matching index # and doc_type regardless of match filters, so we delete the log entry instead # of making a new TaskInstance to query. self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual('0', metadatas[0]['offset']) # last_log_timestamp won't change if no log lines read. self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
def test_local_run(self): args = create_mock_args( task_id='print_the_context', dag_id='example_python_operator', subdir='/root/dags/example_python_operator.py', interactive=True, execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00')) reset(args.dag_id) with patch('argparse.Namespace', args) as mock_args: run(mock_args) dag = get_dag(mock_args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() state = ti.current_state() self.assertEqual(state, State.SUCCESS)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): run_id = "trig__{}".format(self.execution_date.isoformat()) elif isinstance(self.execution_date, str): run_id = "trig__{}".format(self.execution_date) self.execution_date = timezone.parse( self.execution_date) # trigger_dag() expects datetime else: run_id = "trig__{}".format(timezone.utcnow().isoformat()) # Ignore MyPy type for self.execution_date because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, )
def test_read_timeout(self): ts = pendulum.now().subtract(minutes=5) self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False }) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertTrue(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual('0', metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) == ts)
def test_limit_and_offset(self, url, expected_import_error_ids, session): import_errors = [ ImportError( filename=f"/tmp/file_{i}.py", stacktrace="Lorem ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC"), ) for i in range(1, 110) ] session.add_all(import_errors) session.commit() response = self.client.get(url) assert response.status_code == 200 import_ids = [ pool["filename"] for pool in response.json["import_errors"] ] self.assertEqual(import_ids, expected_import_error_ids)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) # Ignore MyPy type for self.execution_date because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, )
def test_local_run(self): args = create_mock_args( task_id='print_the_context', dag_id='example_python_operator', subdir='/root/dags/example_python_operator.py', interactive=True, execution_date=timezone.parse('2018-04-27T08:39:51.298439+00:00') ) reset(args.dag_id) with patch('argparse.Namespace', args) as mock_args: run(mock_args) dag = get_dag(mock_args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.refresh_from_db() state = ti.current_state() self.assertEqual(state, State.SUCCESS)
def test_serialize(self, session): event_log_model = Log(event="TEST_EVENT", task_instance=self._create_task_instance()) session.add(event_log_model) session.commit() event_log_model.dttm = timezone.parse(self.default_time) log_model = session.query(Log).first() deserialized_log = event_log_schema.dump(log_model) self.assertEqual( deserialized_log, { "event_log_id": event_log_model.id, "event": "TEST_EVENT", "dag_id": "TEST_DAG_ID", "task_id": "TEST_TASK_ID", "execution_date": self.default_time, "owner": 'airflow', "when": self.default_time, "extra": None })
def test_test(self): """Test the `airflow test` command""" args = create_mock_args(task_id='print_the_context', dag_id='example_python_operator', subdir=None, execution_date=timezone.parse('2018-01-01')) saved_stdout = sys.stdout try: sys.stdout = out = io.StringIO() cli.test(args) output = out.getvalue() # Check that prints, and log messages, are shown self.assertIn( "'example_python_operator__print_the_context__20180101'", output) finally: sys.stdout = saved_stdout
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=settings.STORE_SERIALIZED_DAGS) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) else: raise e
def test_serialize(self, session): import_error = ImportError( filename="lorem.py", stacktrace="Lorem Ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC"), ) session.add(import_error) session.commit() serialized_data = import_error_schema.dump(import_error) serialized_data["import_error_id"] = 1 self.assertEqual( { "filename": "lorem.py", "import_error_id": 1, "stack_trace": "Lorem Ipsum", "timestamp": "2020-06-10T12:02:44+00:00", }, serialized_data, )
def _read(self, ti, try_number, metadata=None): """ Endpoint for streaming log. :param ti: task instance object :param try_number: try_number of the task instance :param metadata: log metadata, can be used for steaming log reading and auto-tailing. :return: a list of log documents and metadata. """ if not metadata: metadata = {'offset': 0} if 'offset' not in metadata: metadata['offset'] = 0 offset = metadata['offset'] log_id = self._render_log_id(ti, try_number) logs = self.es_read(log_id, offset, metadata) next_offset = offset if not logs else logs[-1].offset metadata['offset'] = next_offset # end_of_log_mark may contain characters like '\n' which is needed to # have the log uploaded but will not be stored in elasticsearch. metadata['end_of_log'] = False if not logs \ else logs[-1].message == self.end_of_log_mark.strip() cur_ts = pendulum.now() # Assume end of log after not receiving new log for 5 min, # as executor heartbeat is 1 min and there might be some # delay before Elasticsearch makes the log available. if 'last_log_timestamp' in metadata: last_log_ts = timezone.parse(metadata['last_log_timestamp']) if cur_ts.diff(last_log_ts).in_minutes() >= 5 or 'max_offset' in metadata \ and offset >= metadata['max_offset']: metadata['end_of_log'] = True if offset != next_offset or 'last_log_timestamp' not in metadata: metadata['last_log_timestamp'] = str(cur_ts) message = '\n'.join([log.message for log in logs]) return message, metadata
def _read(self, ti, try_number, metadata=None): """ Endpoint for streaming log. :param ti: task instance object :param try_number: try_number of the task instance :param metadata: log metadata, can be used for steaming log reading and auto-tailing. :return a list of log documents and metadata. """ if not metadata: metadata = {'offset': 0} if 'offset' not in metadata: metadata['offset'] = 0 offset = metadata['offset'] log_id = self._render_log_id(ti, try_number) logs = self.es_read(log_id, offset) next_offset = offset if not logs else logs[-1].offset metadata['offset'] = next_offset # end_of_log_mark may contain characters like '\n' which is needed to # have the log uploaded but will not be stored in elasticsearch. metadata['end_of_log'] = False if not logs \ else logs[-1].message == self.end_of_log_mark.strip() cur_ts = pendulum.now() # Assume end of log after not receiving new log for 5 min, # as executor heartbeat is 1 min and there might be some # delay before Elasticsearch makes the log available. if 'last_log_timestamp' in metadata: last_log_ts = timezone.parse(metadata['last_log_timestamp']) if cur_ts.diff(last_log_ts).in_minutes() >= 5: metadata['end_of_log'] = True if offset != next_offset or 'last_log_timestamp' not in metadata: metadata['last_log_timestamp'] = str(cur_ts) message = '\n'.join([log.message for log in logs]) return message, metadata
def test_read_as_download_logs(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'download_logs': True, 'end_of_log': False }, ) assert 1 == len(logs) assert len(logs) == len(metadatas) assert len(logs[0]) == 1 assert self.test_message == logs[0][0][-1] assert not metadatas[0]['end_of_log'] assert metadatas[0]['download_logs'] assert '1' == metadatas[0]['offset'] assert timezone.parse(metadatas[0]['last_log_timestamp']) > ts
def test_test(self): """Test the `airflow test` command""" args = create_mock_args( task_id='print_the_context', dag_id='example_python_operator', subdir=None, execution_date=timezone.parse('2018-01-01') ) saved_stdout = sys.stdout try: sys.stdout = out = StringIO() cli.test(args) output = out.getvalue() # Check that prints, and log messages, are shown self.assertIn('END_DATE', output) self.assertIn("'example_python_operator__print_the_context__20180101'", output) finally: sys.stdout = saved_stdout
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if "run_id" in data: run_id = data["run_id"] conf = None if "conf" in data: conf = data["conf"] execution_date = None if "execution_date" in data and data["execution_date"] is not None: execution_date = data["execution_date"] # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( "Given execution date, {}, could not be identified " "as a date. Example date format: 2015-11-16T14:34:15+00:00". format(execution_date)) response = jsonify({"error": error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response response = jsonify(message="Created {}".format(dr)) return response
def test_response_200(self, session): import_error = ImportError( filename="Lorem_ipsum.py", stacktrace="Lorem ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC"), ) session.add(import_error) session.commit() response = self.client.get(f"/api/v1/importErrors/{import_error.id}", environ_overrides={'REMOTE_USER': "******"}) assert response.status_code == 200 response_data = response.json response_data["import_error_id"] = 1 assert { "filename": "Lorem_ipsum.py", "import_error_id": 1, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-10T12:00:00+00:00", } == response_data
def test_read_with_match_phrase_query(self): similar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format( dag_id=TestElasticsearchTaskHandler.DAG_ID, task_id=TestElasticsearchTaskHandler.TASK_ID ) another_test_message = 'another message' another_body = {'message': another_test_message, 'log_id': similar_log_id, 'offset': 1} self.es.index(index=self.index_name, doc_type=self.doc_type, body=another_body, id=1) ts = pendulum.now() logs, metadatas = self.es_task_handler.read( self.ti, 1, {'offset': '0', 'last_log_timestamp': str(ts), 'end_of_log': False, 'max_offset': 2} ) assert 1 == len(logs) assert len(logs) == len(metadatas) assert self.test_message == logs[0][0][-1] assert another_test_message != logs[0] assert not metadatas[0]['end_of_log'] assert '1' == metadatas[0]['offset'] assert timezone.parse(metadatas[0]['last_log_timestamp']) > ts
def wrapper(*args, **kwargs): if current_user and hasattr(current_user, 'username'): user = current_user.username else: user = '******' log = models.Log( event=f.__name__, task_instance=None, owner=user, extra=str(list(request.args.items())), task_id=request.args.get('task_id'), dag_id=request.args.get('dag_id')) if 'execution_date' in request.args: log.execution_date = timezone.parse(request.args.get('execution_date')) with create_session() as session: session.add(log) session.commit() return f(*args, **kwargs)
def test_should_respond_200(self, session): log_model = Log( event='TEST_EVENT', task_instance=self._create_task_instance(), ) log_model.dttm = timezone.parse(self.default_time) session.add(log_model) session.commit() event_log_id = log_model.id response = self.client.get(f"/api/v1/eventLogs/{event_log_id}", environ_overrides={'REMOTE_USER': "******"}) assert response.status_code == 200 assert response.json == { "event_log_id": event_log_id, "event": "TEST_EVENT", "dag_id": "TEST_DAG_ID", "task_id": "TEST_TASK_ID", "execution_date": self.default_time, "owner": 'airflow', "when": self.default_time, "extra": None, }
def test_serialize(self, session): import_error = [ ImportError( filename="Lorem_ipsum.py", stacktrace="Lorem ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC"), ) for i in range(2) ] session.add_all(import_error) session.commit() query = session.query(ImportError) query_list = query.all() serialized_data = (import_error_collection_schema.dump( ImportErrorCollection(import_errors=query_list, total_entries=2)).data, ) # To maintain consistency in the key sequence accross the db in tests serialized_data[0]["import_errors"][0]["import_error_id"] = 1 serialized_data[0]["import_errors"][1]["import_error_id"] = 2 self.assertEqual( { "import_errors": [ { "filename": "Lorem_ipsum.py", "import_error_id": 1, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-10T12:02:44+00:00", }, { "filename": "Lorem_ipsum.py", "import_error_id": 2, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-10T12:02:44+00:00", }, ], "total_entries": 2, }, serialized_data[0], )
def test_get_import_errors(self, session): import_error = [ ImportError( filename="Lorem_ipsum.py", stacktrace="Lorem ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC"), ) for _ in range(2) ] session.add_all(import_error) session.commit() response = self.client.get("/api/v1/importErrors", environ_overrides={'REMOTE_USER': "******"}) assert response.status_code == 200 response_data = response.json self._normalize_import_errors(response_data['import_errors']) self.assertEqual( { "import_errors": [ { "filename": "Lorem_ipsum.py", "import_error_id": 1, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-10T12:00:00+00:00", }, { "filename": "Lorem_ipsum.py", "import_error_id": 2, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-10T12:00:00+00:00", }, ], "total_entries": 2, }, response_data, )
def test_get_import_errors_order_by(self, session): import_error = [ ImportError( filename=f"Lorem_ipsum{i}.py", stacktrace="Lorem ipsum", timestamp=timezone.parse(self.timestamp, timezone="UTC") + timedelta(days=-i), ) for i in range(1, 3) ] session.add_all(import_error) session.commit() response = self.client.get("/api/v1/importErrors?order_by=-timestamp", environ_overrides={'REMOTE_USER': "******"}) assert response.status_code == 200 response_data = response.json self._normalize_import_errors(response_data['import_errors']) assert { "import_errors": [ { "filename": "Lorem_ipsum1.py", "import_error_id": 1, # id normalized with self._normalize_import_errors "stack_trace": "Lorem ipsum", "timestamp": "2020-06-09T12:00:00+00:00", }, { "filename": "Lorem_ipsum2.py", "import_error_id": 2, "stack_trace": "Lorem ipsum", "timestamp": "2020-06-08T12:00:00+00:00", }, ], "total_entries": 2, } == response_data
def get_lineage(dag_id: str, execution_date: str): # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( execution_date)) log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: lineage = get_lineage_api(dag_id=dag_id, execution_date=execution_date) except AirflowException as err: log.error(err) response = jsonify(error=f"{err}") response.status_code = err.status_code return response else: return jsonify(lineage)
def task_instance_info(dag_id, execution_date, task_id): """ Returns a JSON with a task instance's public instance variables. The format for the exec_date is expected to be "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". This will of course need to have been encoded for URL in the request. """ # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( execution_date)) log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: info = get_task_instance(dag_id, task_id, execution_date) except AirflowException as err: log.info(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response # JSONify and return. fields = { k: str(v) for k, v in vars(info).items() if not k.startswith('_') } return jsonify(fields)
def test_read_with_match_phrase_query(self): similar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format( dag_id=TestElasticsearchTaskHandler.DAG_ID, task_id=TestElasticsearchTaskHandler.TASK_ID) another_test_message = 'another message' another_body = {'message': another_test_message, 'log_id': similar_log_id, 'offset': 1} self.es.index(index=self.index_name, doc_type=self.doc_type, body=another_body, id=1) ts = pendulum.now() logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertNotEqual(another_test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual('1', metadatas[0]['offset']) self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def test_read_with_match_phrase_query(self): simiar_log_id = '{task_id}-{dag_id}-2016-01-01T00:00:00+00:00-1'.format( dag_id=TestElasticsearchTaskHandler.DAG_ID, task_id=TestElasticsearchTaskHandler.TASK_ID) another_test_message = 'another message' another_body = {'message': another_test_message, 'log_id': simiar_log_id, 'offset': 1} self.es.index(index=self.index_name, doc_type=self.doc_type, body=another_body, id=1) ts = pendulum.now() logs, metadatas = self.es_task_handler.read(self.ti, 1, {'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertNotEqual(another_test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(1, metadatas[0]['offset']) self.assertTrue(timezone.parse(metadatas[0]['last_log_timestamp']) > ts)
def get_lineage(dag_id: str, execution_date: str): """Get Lineage details for a DagRun""" # Convert string datetime into actual datetime try: execution_dt = timezone.parse(execution_date) except ValueError: log.error("Given execution date could not be identified as a date.") error_message = ( f'Given execution date, {execution_date}, could not be identified as a date. ' f'Example date format: 2015-11-16T14:34:15+00:00') response = jsonify({'error': error_message}) response.status_code = 400 return response try: lineage = get_lineage_api(dag_id=dag_id, execution_date=execution_dt) except AirflowException as err: log.error(err) response = jsonify(error=f"{err}") response.status_code = err.status_code return response else: return jsonify(lineage)
def dag_run_state(dag_id, execution_date): """ Get dag run state by dag_id and execution_date. """ try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15+00:00'.format( execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: state = get_dag_run_state(dag_id, execution_date) except AirflowException as e: _log.error(e) response = jsonify(error="{}".format(e)) response.status_code = getattr(e, 'status', 500) return response return jsonify(state)
def _create_test_dag_run(self, state='running', extra_dag=False, commit=True): dag_runs = [] dags = [DagModel(dag_id="TEST_DAG_ID")] dagrun_model_1 = DagRun( dag_id="TEST_DAG_ID", run_id="TEST_DAG_RUN_ID_1", run_type=DagRunType.MANUAL.value, execution_date=timezone.parse(self.default_time), start_date=timezone.parse(self.default_time), external_trigger=True, state=state, ) dag_runs.append(dagrun_model_1) dagrun_model_2 = DagRun( dag_id="TEST_DAG_ID", run_id="TEST_DAG_RUN_ID_2", run_type=DagRunType.MANUAL.value, execution_date=timezone.parse(self.default_time_2), start_date=timezone.parse(self.default_time), external_trigger=True, ) dag_runs.append(dagrun_model_2) if extra_dag: for i in range(3, 5): dags.append(DagModel(dag_id='TEST_DAG_ID_' + str(i))) dag_runs.append( DagRun( dag_id='TEST_DAG_ID_' + str(i), run_id='TEST_DAG_RUN_ID_' + str(i), run_type=DagRunType.MANUAL.value, execution_date=timezone.parse(self.default_time_2), start_date=timezone.parse(self.default_time), external_trigger=True, )) if commit: with create_session() as session: session.add_all(dag_runs) session.add_all(dags) return dag_runs
def apply(self, query, value): value = timezone.parse(value, timezone=timezone.utc) return super(UtcAwareFilterMixin, self).apply(query, value)
def apply(self, query, value): value = timezone.parse(value, timezone=timezone.utc) return super().apply(query, value)
def parse_execution_date(execution_date_str): """ Parse execution date string to datetime object. """ return timezone.parse(execution_date_str)