def test_get_dag_runs_success_with_state_no_result(self): url_template = '/api/experimental/dags/{}/dag_runs?state=dummy' dag_id = 'example_bash_operator' # Create DagRun trigger_dag(dag_id=dag_id, run_id='test_get_dag_runs_success') response = self.app.get(url_template.format(dag_id)) self.assertEqual(200, response.status_code) data = json.loads(response.data.decode('utf-8')) self.assertIsInstance(data, list) self.assertEqual(len(data), 0)
def execute(self, context): dro = DagRunOrder(run_id='trig__' + timezone.utcnow().isoformat()) if self.python_callable is not None: dro = self.python_callable(context, dro) if dro: trigger_dag(dag_id=self.trigger_dag_id, run_id=dro.run_id, conf=json.dumps(dro.payload), execution_date=self.execution_date, replace_microseconds=False) else: self.log.info("Criteria not met, moving on")
def test_task_instance_info(self): url_template = '/api/experimental/dags/{}/dag_runs/{}/tasks/{}' dag_id = 'example_bash_operator' task_id = 'also_run_this' execution_date = utcnow().replace(microsecond=0) datetime_string = quote_plus(execution_date.isoformat()) wrong_datetime_string = quote_plus( datetime(1990, 1, 1, 1, 1, 1).isoformat() ) # Create DagRun trigger_dag(dag_id=dag_id, run_id='test_task_instance_info_run', execution_date=execution_date) # Test Correct execution response = self.client.get( url_template.format(dag_id, datetime_string, task_id) ) self.assertEqual(200, response.status_code) self.assertIn('state', response.data.decode('utf-8')) self.assertNotIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag response = self.client.get( url_template.format('does_not_exist_dag', datetime_string, task_id), ) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for nonexistent task response = self.client.get( url_template.format(dag_id, datetime_string, 'does_not_exist_task') ) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag run (wrong execution_date) response = self.client.get( url_template.format(dag_id, wrong_datetime_string, task_id) ) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for bad datetime format response = self.client.get( url_template.format(dag_id, 'not_a_datetime', task_id) ) self.assertEqual(400, response.status_code) self.assertIn('error', response.data.decode('utf-8'))
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] try: dr = trigger.trigger_dag(dag_id, run_id, conf) except AirflowException as err: logging.error(err) response = jsonify(error="{}".format(err)) response.status_code = 404 return response if getattr(g, 'user', None): logging.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def test_get_dag_runs_success_with_capital_state_parameter(self): url_template = '/api/experimental/dags/{}/dag_runs?state=RUNNING' dag_id = 'example_bash_operator' # Create DagRun dag_run = trigger_dag(dag_id=dag_id, run_id='test_get_dag_runs_success') response = self.app.get(url_template.format(dag_id)) self.assertEqual(200, response.status_code) data = json.loads(response.data.decode('utf-8')) self.assertIsInstance(data, list) self.assertEqual(len(data), 1) self.assertEqual(data[0]['dag_id'], dag_id) self.assertEqual(data[0]['id'], dag_run.id)
def test_get_dag_runs_success_with_state_parameter(self): url_template = '/api/experimental/dags/{}/dag_runs?state=running' dag_id = 'example_bash_operator' # Create DagRun dag_run = trigger_dag(dag_id=dag_id, run_id='test_get_dag_runs_success') response = self.app.get(url_template.format(dag_id)) self.assertEqual(200, response.status_code) data = json.loads(response.data.decode('utf-8')) self.assertIsInstance(data, list) self.assertEqual(len(data), 1) self.assertEqual(data[0]['dag_id'], dag_id) self.assertEqual(data[0]['id'], dag_run.id)
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] execution_date = None if 'execution_date' in data and data['execution_date'] is not None: execution_date = data['execution_date'] # Convert string datetime into actual datetime try: execution_date = datetime.strptime(execution_date, '%Y-%m-%dT%H:%M:%S') except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15' .format(execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = 404 return response if getattr(g, 'user', None): _log.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] execution_date = None if 'execution_date' in data and data['execution_date'] is not None: execution_date = data['execution_date'] # Convert string datetime into actual datetime try: execution_date = datetime.strptime(execution_date, '%Y-%m-%dT%H:%M:%S') except ValueError: error_message = ( 'Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15'.format( execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = 404 return response if getattr(g, 'user', None): _log.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def test_get_dag_runs_success_with_capital_state_parameter(self): with conf_vars( {("core", "store_serialized_dags"): self.dag_serialzation} ): url_template = '/api/experimental/dags/{}/dag_runs?state=RUNNING' dag_id = 'example_bash_operator' # Create DagRun dag_run = trigger_dag( dag_id=dag_id, run_id='test_get_dag_runs_success') response = self.app.get(url_template.format(dag_id)) self.assertEqual(200, response.status_code) data = json.loads(response.data.decode('utf-8')) self.assertIsInstance(data, list) self.assertEqual(len(data), 1) self.assertEqual(data[0]['dag_id'], dag_id) self.assertEqual(data[0]['id'], dag_run.id)
def trigger_dag_for_date(dag_id, execution_date): """ Trigger a new dag run for a Dag with the given execution date. The format for the execution date is expected to be "YYYY-mm-DDTHH:MM:SS", for example: "2016-11-16T11:34:15". The colons ought to be escaped to %3A, as you would expect, within the URL. These are then automatically replaced by Flask before being passed into this method. """ data = request.get_json(force=True) run_id = None if 'run_id' in data: run_id = data['run_id'] conf = None if 'conf' in data: conf = data['conf'] # Convert string datetime into actual datetime try: execution_date = datetime.strptime(execution_date, '%Y-%m-%dT%H:%M:%S') except ValueError: error_message = ('Given execution date, {}, could not be identified ' 'as a date. Example date format: 2015-11-16T14:34:15'. format(execution_date)) _log.info(error_message) response = jsonify({'error': error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: logging.error(err) response = jsonify(error="{}".format(err)) response.status_code = 404 return response if getattr(g, 'user', None): _log.info("User {} created {}".format(g.user, dr)) response = jsonify(message="Created {}".format(dr)) return response
def trigger_dag(dag_id): """ Trigger a new dag run for a Dag with an execution date of now unless specified in the data. """ data = request.get_json(force=True) run_id = None if "run_id" in data: run_id = data["run_id"] conf = None if "conf" in data: conf = data["conf"] execution_date = None if "execution_date" in data and data["execution_date"] is not None: execution_date = data["execution_date"] # Convert string datetime into actual datetime try: execution_date = timezone.parse(execution_date) except ValueError: error_message = ( "Given execution date, {}, could not be identified " "as a date. Example date format: 2015-11-16T14:34:15+00:00". format(execution_date)) response = jsonify({"error": error_message}) response.status_code = 400 return response try: dr = trigger.trigger_dag(dag_id, run_id, conf, execution_date) except AirflowException as err: response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response response = jsonify(message="Created {}".format(dr)) return response
def start_pipeline(self, id): from airflow.api.common.experimental.trigger_dag import trigger_dag from airflow import models models.DagModel.get_dagmodel(id).set_is_paused(is_paused=False) run = trigger_dag(id) return dict(result=run.run_id if run else None)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def trigger_dag(self, dag_id, run_id=None, conf=None, execution_date=None): dag_run = trigger_dag.trigger_dag(dag_id=dag_id, run_id=run_id, conf=conf, execution_date=execution_date) return "Created {}".format(dag_run)
def production_trigger__callable( *, dag_run: DagRun, files_path: Path, cdr_type_config: dict, **kwargs ): """ Function that determines which files in files/ should be processed and triggers the correct ETL dag with config based on filename. Parameters ---------- dag_run : DagRun Passed as part of the Dag context - contains the config. files_path : Path Location of files directory cdr_type_config : dict ETL config for each cdr type """ session = get_session() for cdr_type, cfg in cdr_type_config.items(): cdr_type = CDRType(cdr_type) source_type = cfg["source"]["source_type"] logger.info(f"Config for {cdr_type!r} ({source_type}): {cfg}") if source_type == "csv": filename_pattern = cfg["source"]["filename_pattern"] logger.info(f"Filename pattern: {filename_pattern!r}") all_files_found = find_files_matching_pattern( files_path=files_path, filename_pattern=filename_pattern ) dates_found = { filename: extract_date_from_filename(filename, filename_pattern) for filename in all_files_found } unprocessed_files_and_dates = { filename: date for filename, date in dates_found.items() if ETLRecord.can_process( cdr_type=cdr_type, cdr_date=date, session=session ) } for file, cdr_date in unprocessed_files_and_dates.items(): uuid = uuid1() cdr_date_str = cdr_date.strftime("%Y%m%d") execution_date = pendulum.Pendulum( cdr_date.year, cdr_date.month, cdr_date.day ) config = { "cdr_type": cdr_type, "cdr_date": cdr_date, "file_name": file, "template_path": f"etl/{cdr_type}", } trigger_dag( f"etl_{cdr_type}", execution_date=execution_date, run_id=f"{cdr_type.upper()}_{cdr_date_str}-{str(uuid)}", conf=config, replace_microseconds=False, ) elif source_type == "sql": source_table = cfg["source"]["table_name"] # Extract unprocessed dates from source_table # TODO: this requires a full parse of the existing data so may not be # the most be efficient if a lot of data is present (esp. data that has # already been processed). If it turns out too sluggish might be good to # think about a more efficient way to determine dates with unprocessed data. dates_present = find_distinct_dates_in_table( session, source_table, event_time_col="event_time" ) unprocessed_dates = [ date for date in dates_present if ETLRecord.can_process( cdr_type=cdr_type, cdr_date=date, session=session ) ] logger.info(f"Dates found: {dates_present}") logger.info(f"Unprocessed dates: {unprocessed_dates}") for cdr_date in unprocessed_dates: uuid = uuid1() cdr_date_str = cdr_date.strftime("%Y%m%d") execution_date = pendulum.Pendulum( cdr_date.year, cdr_date.month, cdr_date.day ) config = { "cdr_type": cdr_type, "cdr_date": cdr_date, "source_table": source_table, } trigger_dag( f"etl_{cdr_type}", execution_date=execution_date, run_id=f"{cdr_type.upper()}_{cdr_date_str}-{str(uuid)}", conf=config, replace_microseconds=False, ) else: raise ValueError(f"Invalid source type: '{source_type}'")
def trigger_dag(self, dag_id, run_id, conf): return trigger_dag.trigger_dag(dag_id=dag_id, run_id=run_id, conf=conf, replace_microseconds=False)
def request_ingest(): authorization = request.headers.get('authorization') LOGGER.info('top of request_ingest.') assert authorization[:len('BEARER')].lower( ) == 'bearer', 'authorization is not BEARER' substr = authorization[len('BEARER'):].strip() if 'nexus' in substr: auth_dct = ast.literal_eval(authorization[len('BEARER'):].strip()) LOGGER.info('auth_dct: %s', auth_dct) assert 'nexus_token' in auth_dct, 'authorization has no nexus_token' auth_tok = auth_dct['nexus_token'] else: auth_tok = substr #LOGGER.info('auth_tok: %s', auth_tok) # reduce visibility of auth_tok # decode input data = request.get_json(force=True) LOGGER.debug('request_ingest data: {}'.format(str(data))) # Test and extract required parameters try: provider = _get_required_string(data, 'provider') submission_id = _get_required_string(data, 'submission_id') process = _get_required_string(data, 'process') full_path = _get_required_string(data, 'full_path') except HubmapApiInputException as e: return HubmapApiResponse.bad_request( 'Must specify {} to request data be ingested'.format(str(e))) process = process.lower( ) # necessary because config parser has made the corresponding string lower case try: dag_id = config('ingest_map', process) except HubmapApiConfigException: return HubmapApiResponse.bad_request( '{} is not a known ingestion process'.format(process)) try: check_ingest_parms(provider, submission_id, process, full_path) session = settings.Session() dagbag = DagBag('dags') if dag_id not in dagbag.dags: LOGGER.warning('Requested dag {} not among {}'.format( dag_id, [did for did in dagbag.dags])) LOGGER.warning('Dag dir full path {}'.os.path.abspath('dags')) return HubmapApiResponse.not_found( "Dag id {} not found".format(dag_id)) dag = dagbag.get_dag(dag_id) # Produce one and only one run tz = pytz.timezone(config('core', 'timezone')) execution_date = datetime.now(tz) LOGGER.info('starting {} with execution_date: {}'.format( dag_id, execution_date)) run_id = '{}_{}_{}'.format(submission_id, process, execution_date.isoformat()) ingest_id = run_id fernet = Fernet(config('core', 'fernet_key').encode()) crypt_auth_tok = fernet.encrypt(auth_tok.encode()).decode() conf = { 'provider': provider, 'submission_id': submission_id, 'process': process, 'dag_id': dag_id, 'run_id': run_id, 'ingest_id': ingest_id, 'crypt_auth_tok': crypt_auth_tok, 'src_path': config('connections', 'src_path'), 'lz_path': full_path } if find_dag_runs(session, dag_id, run_id, execution_date): # The run already happened?? raise AirflowException('The request happened twice?') try: dr = trigger_dag.trigger_dag(dag_id, run_id, conf, execution_date=execution_date) except AirflowException as err: LOGGER.error(err) raise AirflowException( "Attempt to trigger run produced an error: {}".format(err)) LOGGER.info('dagrun follows: {}'.format(dr)) # dag.create_dagrun( # run_id=run['run_id'], # execution_date=run['execution_date'], # state=State.RUNNING, # conf=conf, # external_trigger=True # ) # results.append(run['run_id']) session.close() except HubmapApiInputException as e: return HubmapApiResponse.bad_request(str(e)) except ValueError as e: return HubmapApiResponse.server_error(str(e)) except AirflowException as e: return HubmapApiResponse.server_error(str(e)) except Exception as e: return HubmapApiResponse.server_error(str(e)) return HubmapApiResponse.success({ 'ingest_id': ingest_id, 'run_id': run_id })
def trigger_event(event, pipeline): from airflow.api.common.experimental.trigger_dag import trigger_dag from airflow import models dag_id = f'event_handler_{event}_pipeline_dag' models.DagModel.get_dagmodel(dag_id).set_is_paused(is_paused=False) trigger_dag(dag_id, conf=pipeline)
def trigger_dag(self, dag_id, run_id=None, conf=None): dr = trigger_dag.trigger_dag(dag_id=dag_id, run_id=run_id, conf=conf) return "Created {}".format(dr)
def trigger_dag(self, dag_id, execution_date): """ Trigger a new dag run for a Dag with an execution date. """ execution_date = timezone.parse(execution_date) trigger.trigger_dag(dag_id, None, None, execution_date)
def trigger_dag_python(**context): trigger_dag(dag_id='tutorial', run_id="triggered__" + str(datetime.utcnow()), conf={'process_id': '1'}, replace_microseconds=False)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException(f"{self.trigger_dag_id} failed with failed states {state}") if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def pollForFiles(**kwargs): # Create some local scope variables for use later in proc sftpName = kwargs['SFTP_Name'] sftpConnName = kwargs['SFTP_Connection_Name'] feedGroups = kwargs['Feed_Groups'] # Connect to SFTP site using provided credentials - should be saved in Connections sourceHook = SFTPHook(ftp_conn_id = sftpConnName) # Create empty dictionary for storing files that match file masks fileMatches = {} # Loop through feed locations and their regex for this SFTP site. for i in feedGroups: fullPath = i['Feed_Group_Location'] filePattern = i['Feed_Group_Regex'] feedGroupName = i['Feed_Group_Name'] try: directory = sourceHook.describe_directory(path = fullPath) for file in directory.keys(): if re.match(filePattern, file): fileMatches[os.path.join(fullPath, file)] = directory[file] except Exception as e: logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e # If we do not find a file that matches a file mask in any of the directories, exit. if not fileMatches: return 0 # If no trigger files or renaming is utilized by the client when placing files on SFTP, we # have to resort to polling for files, waiting for a time period and then comparing the size/modified time # to see if they are ready to pull down. time.sleep(SLEEP_TIME) for j in feedGroups: fullPath = j['Feed_Group_Location'] filePattern = j['Feed_Group_Regex'] feedGroupName = j['Feed_Group_Name'] newFileMatches = {} try: newDirResults = sourceHook.describe_directory(fullPath) # Add only the files that match regular expression for this feed group for file in newDirResults.keys(): if re.match(filePattern, file): newFileMatches[os.path.join(fullPath, file)] = newDirResults[file] for file in newFileMatches.keys(): # fullFilePath = os.path.join(fullPath, file) if file in fileMatches.keys(): if newFileMatches[file]['size'] == fileMatches[file]['size'] and \ newFileMatches[file]['modify'] == fileMatches[file]['modify']: readyFile = file + '.ready' # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer. sourceHook.conn.rename(file, readyFile) logging.info('SFTP: {} FeedGroup: {} File: {} is ready.'.format(sftpName, feedGroupName, os.path.basename(file))) triggerConfig = { 'SFTP_Name': sftpName, 'SFTP_Connection_Name': sftpConnName, 'File_Name': readyFile, } triggerConfig.update(j) trigger_dag( dag_id = 'SingleFileTransferJob', run_id = 'trig_{}'.format(timezone.utcnow().isoformat()), conf = json.dumps(triggerConfig), execution_date = None, replace_microseconds = False ) except Exception as e: logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath)) raise e
def trigger_dag(self, dag_id, run_id=None, conf=None, execution_date=None): dr = trigger_dag.trigger_dag(dag_id=dag_id, run_id=run_id, conf=conf, execution_date=execution_date) return "Created {}".format(dr)
def get_list_mongo_meta(**kwargs): meta_base = MongoClient("mongodb://" + globals()["META_MONGO_IP"] + ":" + globals()["MONGO_PORT"] + "/") # meta_base = MongoHook(globals()["MONGO_META_CONN_ID"]) data_to_process_list = meta_base.stats["swift"].find_one( {"type": "data_to_process_list"}) swift_data_list = data_to_process_list["data_to_process"] for data_doc in swift_data_list: run_id = '%s_%s_%s:%s' % ( data_doc["swift_user"], data_doc["swift_container"], data_doc["swift_id"], datetime.utcnow().replace(microsecond=0).isoformat()) trigger_dag(dag_id=data_account[data_doc["swift_container"]][ data_doc["content_type"]], run_id=run_id, conf=data_doc) logging.info('triggering dag %s with %s' % (run_id, data_doc)) meta_base.stats["swift"].find_one_and_update( {"type": "data_to_process_list"}, {"$pop": { "data_to_process": -1 }}) today = datetime.date.today() tomorrow = today + datetime.timedelta(days=1) today = datetime.datetime(today.year, today.month, today.day) tomorrow = datetime.datetime(tomorrow.year, tomorrow.month, tomorrow.day) if meta_base.stats["data_log"].find_one( {"date": { "$gte": today, "$lt": tomorrow }}) is None: meta_base.stats["data_log"].insert_one({ "date": today, "data_processed": [{ "swift_id": data_doc["swift_id"], "swift_container": data_doc["swift_container"], "content_type": data_doc["content_type"] }] }) else: meta_base.stats["data_log"].find_one_and_update( {"date": { "$gte": today, "$lt": tomorrow }}, { "$push": { "data_processed": { "swift_id": data_doc["swift_id"], "swift_container": data_doc["swift_container"], "content_type": data_doc["content_type"] } } }) return raise AirflowSkipException('No external dags triggered')