def test_get_dag_fileloc(self): """ Test that fileloc is correctly set when we load example DAGs, specifically SubDAGs and packaged DAGs. """ dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_zip.zip")) expected = { 'example_bash_operator': 'airflow/example_dags/example_bash_operator.py', 'example_subdag_operator': 'airflow/example_dags/example_subdag_operator.py', 'example_subdag_operator.section-1': 'airflow/example_dags/subdags/subdag.py', 'test_zip_dag': 'dags/test_zip.zip/test_zip.py' } for dag_id, path in expected.items(): dag = dagbag.get_dag(dag_id) self.assertTrue(dag.fileloc.endswith(path))
def test_get_existing_dag(self): """ Test that we're able to parse some example DAGs and retrieve them """ dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True) some_expected_dag_ids = [ "example_bash_operator", "example_branch_operator" ] for dag_id in some_expected_dag_ids: dag = dagbag.get_dag(dag_id) self.assertIsNotNone(dag) self.assertEqual(dag_id, dag.dag_id) self.assertGreaterEqual(dagbag.size(), 7)
def test_on_kill(self): """ Test that ensures that clearing in the UI SIGTERMS the task """ path = "/tmp/airflow_on_kill" try: os.unlink(path) except OSError: pass dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_on_kill') task = dag.get_task('task1') session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True) runner = StandardTaskRunner(job1) runner.start() # Give the task some time to startup time.sleep(10) runner.terminate() # Wait some time for the result for _ in range(20): if os.path.exists(path): break time.sleep(2) with open(path, "r") as f: self.assertEqual("ON_KILL_TEST", f.readline())
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 heartbeat_records = [] def heartbeat_recorder(): heartbeat_records.append(timezone.utcnow()) with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=TestExecutor(do_update=False)) job.heartrate = 2 job.heartbeat = heartbeat_recorder job._execute() self.assertGreater(len(heartbeat_records), 1) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] self.assertGreaterEqual((time2 - time1).total_seconds(), job.heartrate)
def test_heartbeat_failed_fast(self, mock_getpid): """ Test that task heartbeat will sleep when it fails fast """ mock_getpid.return_value = 1 self.mock_base_job_sleep.side_effect = time.sleep with create_session() as session: dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag_id = 'test_heartbeat_failed_fast' task_id = 'test_heartbeat_failed_fast_op' dag = dagbag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun(run_id="test_heartbeat_failed_fast_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.commit() job = LocalTaskJob(task_instance=ti, executor=MockExecutor(do_update=False)) job.heartrate = 2 heartbeat_records = [] job.heartbeat_callback = lambda session: heartbeat_records.append( job.latest_heartbeat) job._execute() self.assertGreater(len(heartbeat_records), 2) for i in range(1, len(heartbeat_records)): time1 = heartbeat_records[i - 1] time2 = heartbeat_records[i] # Assert that difference small enough to avoid: # AssertionError: 1.996401 not greater than or equal to 2 delta = (time2 - time1).total_seconds() self.assertAlmostEqual(delta, job.heartrate, delta=0.006)
def show_columns(selectedId, selectedNodeProps): if not selectedId or not selectedNodeProps: return Alert('Select a task to watch the output !') output_table = selectedNodeProps.get('output_table', '') if not output_table: return Alert( 'This operator has no output_table property. Is it a DataDriver workflow ?' ) dag_id = selectedNodeProps.get('parent_dag_id', '') from airflow import models, settings dagbag = models.DagBag(settings.DAGS_FOLDER) fileloc = dagbag.get_dag(dag_id).fileloc db = _get_db_from_datadriver_dag(fileloc) if db is None: return Alert("Object named 'db' not found in {}".format(fileloc)) logging.info("DB type is {} \t\t {}".format(type(db), db)) try: df = db.retrieve_table(output_table) except Exception as e: return Error( f"An exception occured when reading output_table {output_table} : {e}" ) describe_datatable = plot.describe_dashtable(df) describe_bar_chart = plot.col_histograms(df) head_and_tail_tables = plot.head_and_tail_tables(df) tab_head_tail = Tab(label='Head and tail', children=[head_and_tail_tables]) tab_bar_chart = Tab(label='Bar-chart by column', children=[describe_bar_chart]) tab_describe = Tab(label='Describe frame', children=[describe_datatable]) return Panel( head="Select a Tab to see the statistics on the task's output", body=Tabs([tab_describe, tab_bar_chart, tab_head_tail], colors={ "border": "white", "primary": "#00a9c5", "background": "#e0e0e0" }))
def test_depends_on_past(self): dagbag = models.DagBag() dag = dagbag.get_dag('test_depends_on_past') dag.clear() task = dag.tasks[0] run_date = task.start_date + datetime.timedelta(days=5) ti = TI(task, run_date) # depends_on_past prevents the run task.run(start_date=run_date, end_date=run_date) ti.refresh_from_db() self.assertIs(ti.state, None) # ignore first depends_on_past to allow the run task.run(start_date=run_date, end_date=run_date, ignore_first_depends_on_past=True) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def _execute(self): dag_id = self.dag_id def signal_handler(signum, frame): logging.error("SIGINT (ctrl-c) received") sys.exit(1) signal.signal(signal.SIGINT, signal_handler) utils.pessimistic_connection_handling() # Sleep time (seconds) between master runs logging.basicConfig(level=logging.DEBUG) logging.info("Starting a master scheduler") # This should get new code dagbag = models.DagBag(self.subdir) executor = dagbag.executor executor.start() i = 0 while (not self.test_mode) or i < 1: i += 1 if i % self.refresh_dags_every == 0: dagbag.collect_dags(only_if_updated=False) else: dagbag.collect_dags(only_if_updated=True) if dag_id: dags = [dagbag.dags[dag_id]] else: dags = [ dag for dag in dagbag.dags.values() if not dag.parent_dag ] paused_dag_ids = dagbag.paused_dags() for dag in dags: if dag.dag_id in paused_dag_ids: continue try: self.process_dag(dag, executor) except Exception as e: logging.exeption(e) self.heartbeat() executor.end()
def _run_dag(self, dag_id=None): self.log.info("Attempting to run DAG: %s", self.dag_id) if not self.setup_called: raise AirflowException( "Please make sure to call super.setUp() in your " "test class!") dag_folder = self._get_dag_folder() dag_bag = models.DagBag(dag_folder=dag_folder, include_examples=False) self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = dag_bag.get_dag(self.dag_id or dag_id) if dag is None: raise AirflowException( "The Dag {} could not be found. It's either an import problem or " "the dag {} was not symlinked to the DAGs folder. " "The content of the {} folder is {}".format( self.dag_id, self.dag_id + ".py", dag_folder, os.listdir(dag_folder))) dag.clear(reset_dag_runs=True) dag.run(ignore_first_depends_on_past=True, verbose=True)
def test_dag(): with testing.postgresql.Postgresql() as postgresql: with patch.dict( os.environ, { 'API_V1_DB_URL': postgresql.url(), 'OUTPUT_FOLDER': 'tests/api_sync_v1/input' }): configuration.load_test_config() # the scheduler messages, which will show up if something # happens to screw up execution, are INFO level so save us # some headaches but switching to that loglevel here logging.basicConfig(level=logging.INFO) bag = models.DagBag() dag = bag.get_dag(dag_id='open_skills_master.api_v1_sync') # expire old DAG runs, otherwise the max of 16 will automatically get scheduled dag.dagrun_timeout = 1 dag.clear() job = BackfillJob( dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ) job.run() engine = create_engine(postgresql.url()) session = sessionmaker(engine)() num_jobs = session.query(JobMaster).count() assert num_jobs > 1 num_skills = session.query(SkillMaster).count() assert num_skills > 1 num_importances = session.query(SkillImportance).count() assert num_importances > 1 assert session.query(GeoTitleCount).count() > 1 assert session.query(TitleCount).count() > 1 # make sure non-temporal data doesn't # load twice for a different quarter new_date = datetime(2014, 5, 1) dag.clear(start_date=new_date, end_date=new_date) dag.run(start_date=new_date, end_date=new_date, local=True) assert session.query(JobMaster).count() == num_jobs assert session.query(SkillMaster).count() == num_skills assert session.query(SkillImportance).count() == num_importances
def test_dag_sample_w_template_actual_run(): dagbag = models.DagBag(dag_folder=DAG_DIR, include_examples=False) dag = dagbag.get_dag(dag_id="dag_sample_w_template") # type: models.DAG dag.run( start_date=DEFAULT_DATE, ignore_first_depends_on_past=True, verbose=True, executor=DebugExecutor(), ) session = settings.Session() # type: SASession dagruns = session.query(DagRun) \ .filter(DagRun.dag_id == dag.dag_id) \ .order_by(DagRun.execution_date) \ .all() # type: List[models.DagRun] assert len(dagruns) == 1 assert dagruns[0].execution_date == DEFAULT_DATE assert dagruns[0].state == state.State.SUCCESS
def setUp(self): self.dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) try: subprocess.check_output( ['sudo', 'useradd', '-m', TEST_USER, '-g', str(os.getegid())]) except OSError as e: if e.errno == errno.ENOENT: raise unittest.SkipTest( "The 'useradd' command did not exist so unable to test " "impersonation; Skipping Test. These tests can only be run on a " "linux host that supports 'useradd'.") else: raise unittest.SkipTest( "The 'useradd' command exited non-zero; Skipping tests. Does the " "current user have permission to run 'useradd' without a password " "prompt (check sudoers file)?")
def test_get_dag_fileloc(self): """ Test that fileloc is correctly set when we load example DAGs, specifically SubDAGs and packaged DAGs. """ dagbag = models.DagBag(include_examples=True) expected = { 'example_bash_operator': 'airflow/example_dags/example_bash_operator.py', 'example_subdag_operator': 'airflow/example_dags/example_subdag_operator.py', 'example_subdag_operator.section-1': 'airflow/example_dags/subdags/subdag.py', 'test_zip_dag': 'tests/dags/test_zip.zip/test_zip.py' } for dag_id, path in expected.items(): dag = dagbag.get_dag(dag_id) self.assertTrue(dag.fileloc.endswith(path))
def test_kill_zombies(self, mock_ti_handle_failure): """ Test that kill zombies call TIs failure handler with proper context """ dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True) with create_session() as session: session.query(TI).delete() dag = dagbag.get_dag('example_branch_operator') task = dag.get_task(task_id='run_this_first') ti = TI(task, DEFAULT_DATE, State.RUNNING) session.add(ti) session.commit() zombies = [SimpleTaskInstance(ti)] dagbag.kill_zombies(zombies) mock_ti_handle_failure.assert_called_once_with( ANY, conf.getboolean('core', 'unit_test_mode'), ANY)
def run_dag(self, dag_id: str, dag_folder: str = DEFAULT_DAG_FOLDER) -> None: """ Runs example dag by it's ID. :param dag_id: id of a DAG to be run :type dag_id: str :param dag_folder: directory where to look for the specific DAG. Relative to AIRFLOW_HOME. :type dag_folder: str """ if os.environ.get("RUN_AIRFLOW_1_10"): # For system tests purpose we are mounting airflow/providers to /providers folder # So that we can get example_dags from there dag_folder = dag_folder.replace("/opt/airflow/airflow/providers", "/providers") temp_dir = mkdtemp() os.rmdir(temp_dir) shutil.copytree(dag_folder, temp_dir) dag_folder = temp_dir self.correct_imports_for_airflow_1_10(temp_dir) self.log.info("Looking for DAG: %s in %s", dag_id, dag_folder) dag_bag = models.DagBag(dag_folder=dag_folder, include_examples=False) dag = dag_bag.get_dag(dag_id) if dag is None: raise AirflowException( "The Dag {dag_id} could not be found. It's either an import problem," "wrong dag_id or DAG is not in provided dag_folder." "The content of the {dag_folder} folder is {content}".format( dag_id=dag_id, dag_folder=dag_folder, content=os.listdir(dag_folder), )) self.log.info("Attempting to run DAG: %s", dag_id) dag.clear(reset_dag_runs=True) try: dag.run(ignore_first_depends_on_past=True, verbose=True) except Exception: self._print_all_log_files() raise
def delete_dag(dag_id, keep_records_in_log=True): """ :param dag_id: the dag_id of the DAG to delete :type dag_id: str :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :type keep_records_in_log: bool """ session = settings.Session() DM = models.DagModel dag = session.query(DM).filter(DM.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) dagbag = models.DagBag() if dag_id in dagbag.dags: raise DagFileExists("Dag id {} is still in DagBag. " "Remove the DAG file first.".format(dag_id)) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for m in models.Base._decl_class_registry.values(): if hasattr(m, "dag_id"): if keep_records_in_log and m.__name__ == 'Log': continue cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%")) count += session.query(m).filter(cond).delete( synchronize_session='fetch') if dag.is_subdag: p, c = dag_id.rsplit(".", 1) for m in models.DagRun, models.TaskFail, models.TaskInstance: count += session.query(m).filter(m.dag_id == p, m.task_id == c).delete() session.commit() return count
def setUp(self): super(CompletionMonitorDagTest, self).setUp() models.Variable.set('DAG_ID', _DAG_ID) models.Variable.set('GCP_PROJECT_ID', _PROJECT_ID) models.Variable.set('QUEUE_LOCATION', _QUEUE_LOCATION) models.Variable.set('QUEUE_NAME', _QUEUE_NAME) models.Variable.set('TRY_COUNT_LIMIT', _TRY_COUNT_LIMIT) models.Variable.set('MONITOR_DATASET_ID', _MONITOR_DATASET_ID) models.Variable.set('MONITOR_TABLE_ID', _MONITOR_TABLE_ID) models.Variable.set('LAST_PROCESS_RESULT_QUERY_FILE_PATH', _QUERY_FILE_PATH) models.Variable.set('DESTINATION_PUBSUB_TOPIC', _TOPIC_NAME) models.Variable.set('TIMEZONE_UTC_OFFSET', _TIMEZONE_UTC_OFFSET) models.Variable.set('FEED_DATASET_ID', _FEED_DATASET_ID) models.Variable.set('ITEMS_TABLE_ID', _ITEMS_TABLE_ID) models.Variable.set('EXPIRATION_TRACKING_TABLE_ID', _EXPIRATION_TRACKING_TABLE_ID) models.Variable.set('ITEM_RESULTS_TABLE_ID', _ITEM_RESULTS_TABLE_ID) models.Variable.set('LOCK_BUCKET', _LOCK_BUCKET) self.dag_bag = models.DagBag(dag_folder='./') self.dag = self.dag_bag.dags.get(_DAG_ID)
def _load_from_file(self, file_path): try: from airflow import models # Use DagBag module to load all dags from a given file dag_bag = models.DagBag(file_path, include_examples=False) # Now the DagBag object contains the 'dags' dict which maps between each dag id to the dag object return dag_bag.dags except Exception: logger.warning( "Failed to load dag from %s. Exception:", file_path, exc_info=True ) except SystemExit: logger.warning( "Failed to load dag from %s, due to SystemExit", file_path, exc_info=True, ) return None
def test_mark_success_no_kill(self): """ Test that ensures that mark_success in the UI doesn't cause the task to fail, and that the task exits """ dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_mark_success') task = dag.get_task('task1') session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True) process = multiprocessing.Process(target=job1.run) process.start() ti.refresh_from_db() for _ in range(0, 50): if ti.state == State.RUNNING: break time.sleep(0.1) ti.refresh_from_db() self.assertEqual(State.RUNNING, ti.state) ti.state = State.SUCCESS session.merge(ti) session.commit() process.join(timeout=10) self.assertFalse(process.is_alive()) ti.refresh_from_db() self.assertEqual(State.SUCCESS, ti.state)
def export_data_directly( self, since, include_logs, include_task_args, include_xcom, dag_ids, quantity, incomplete_offset, dags_only, ): from airflow import models, settings, conf from airflow.settings import STORE_SERIALIZED_DAGS from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from dbnd_airflow_export.dbnd_airflow_export_plugin import get_airflow_data conf.set("core", "sql_alchemy_conn", value=self.sql_conn_string) dagbag = models.DagBag( self.dag_folder if self.dag_folder else settings.DAGS_FOLDER, include_examples=True, store_serialized_dags=STORE_SERIALIZED_DAGS, ) engine = create_engine(self.sql_conn_string) session = sessionmaker(bind=engine) result = get_airflow_data( dagbag=dagbag, since=since, include_logs=include_logs, include_task_args=include_task_args, include_xcom=include_xcom, dag_ids=dag_ids, quantity=quantity, incomplete_offset=incomplete_offset, dags_only=dags_only, session=session(), ) return result
def export_data_directly(sql_alchemy_conn, dag_folder, since, include_logs, dag_ids, tasks): from airflow import models, settings, conf from airflow.settings import STORE_SERIALIZED_DAGS from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker conf.set("core", "sql_alchemy_conn", value=sql_alchemy_conn) dagbag = models.DagBag( dag_folder if dag_folder else settings.DAGS_FOLDER, include_examples=True, store_serialized_dags=STORE_SERIALIZED_DAGS, ) engine = create_engine(sql_alchemy_conn) session = sessionmaker(bind=engine) return _handle_export_data(dagbag, since, include_logs, dag_ids, tasks, session=session())
def setUp(self): self.dagbag = models.DagBag(include_examples=True) self.dag1 = self.dagbag.dags['example_bash_operator'] self.dag2 = self.dagbag.dags['example_subdag_operator'] self.execution_dates = [days_ago(2), days_ago(1)] drs = _create_dagruns(self.dag1, self.execution_dates, state=State.RUNNING, run_id_template="scheduled__{}") for dr in drs: dr.dag = self.dag1 dr.verify_integrity() drs = _create_dagruns(self.dag2, [self.dag2.default_args['start_date']], state=State.RUNNING, run_id_template="scheduled__{}") for dr in drs: dr.dag = self.dag2 dr.verify_integrity()
def test_localtaskjob_double_trigger(self): dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dr = dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = dr.get_task_instance(task_id=task.task_id, session=session) ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.merge(ti) session.commit() ti_run = TI(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_method: job1.run() mock_method.assert_not_called() ti = dr.get_task_instance(task_id=task.task_id, session=session) self.assertEqual(ti.pid, 1) self.assertEqual(ti.state, State.RUNNING) session.close()
def list(self): title = "DAG Dependencies" if DAGDependenciesView.dagbag is None: DAGDependenciesView.dagbag = models.DagBag(settings.DAGS_FOLDER) if datetime.utcnow() > self.last_refresh + timedelta( seconds=self.refresh_interval ): DAGDependenciesView.dagbag.collect_dags() self.nodes, self.edges = self._generate_graph() self.last_refresh = datetime.utcnow() return self.render_template( "dag_dependencies.html", title=title, nodes=self.nodes, edges=self.edges, last_refresh=self.last_refresh.strftime("%Y-%m-%d %H:%M:%S"), arrange=conf.get("webserver", "dag_orientation"), width=request.args.get("width", "100%"), height=request.args.get("height", "800"), )
def prepare_dagruns(self): dagbag = models.DagBag(include_examples=True) self.bash_dag = dagbag.dags['example_bash_operator'] self.sub_dag = dagbag.dags['example_subdag_operator'] self.xcom_dag = dagbag.dags['example_xcom'] self.bash_dagrun = self.bash_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING) self.sub_dagrun = self.sub_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING) self.xcom_dagrun = self.xcom_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING)
def test_kill_zombies_doesn_nothing(self, mock_ti_handle_failure): """ Test that kill zombies does nothing when job is running and received heartbeat """ dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True) with create_session() as session: session.query(TI).delete() session.query(LJ).delete() dag = dagbag.get_dag('example_branch_operator') task = dag.get_task(task_id='run_this_first') ti = TI(task, DEFAULT_DATE, State.RUNNING) lj = LJ(ti) lj.latest_heartbeat = utcnow() lj.state = State.RUNNING lj.id = 1 ti.job_id = lj.id session.add(lj) session.add(ti) session.commit() dagbag.kill_zombies() mock_ti_handle_failure.assert_not_called()
def setUp(self): configuration.conf.load_test_config() self.dagbag = models.DagBag(dag_folder='/dev/null', include_examples=True) self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=self.args)
def create(self, validated_data): # TODO: Importar Jinja 2 # TODO: Crear el diccionario execution = Execution.objects.get(pk=validated_data['execution_id']) min_long, max_long, min_lat, max_lat = self.get_area( validated_data['parameters']) params = dict(self.get_kwargs(validated_data['parameters'])) params['lat'] = (min_lat, max_lat) params['lon'] = (min_long, max_long) params['products'] = self.get_product(validated_data['parameters']) params['time_ranges'] = self.get_time_periods( validated_data['parameters']) params['execID'] = 'exec_{}'.format(str( validated_data['execution_id'])) params['elimina_resultados_anteriores'] = True params['genera_mosaico'] = validated_data['generate_mosaic'] # params['owner'] = Execution.executed_by. params['owner'] = "API-REST" # TODO: Cargar el template template_path = os.path.join(os.environ['TEMPLATE_PATH'], slugify(validated_data['algorithm_name'])) generic_template_path = os.path.join(os.environ['TEMPLATE_PATH'], "generic-template") if execution.version is not None and execution.version.publishing_state == Version.PUBLISHED_STATE and os.path.exists( template_path): file_loader = FileSystemLoader(template_path) env = Environment(loader=file_loader) algorithm_template_path = '{}_{}.py'.format( slugify(validated_data['algorithm_name']), validated_data['version_id']) template = env.get_template(algorithm_template_path) else: file_loader = FileSystemLoader(generic_template_path) env = Environment(loader=file_loader) algorithm_template_path = '{}_{}.py'.format( "generic-template", "1.0") params['algorithm_name'] = slugify( validated_data['algorithm_name']) params['algorithm_version'] = validated_data['version_id'] template = env.get_template(algorithm_template_path) # TODO: Renderizar el template airflow_dag_path = os.environ['AIRFLOW_DAG_PATH'] execution_dag_path = '{}/exec_{}.py'.format( airflow_dag_path, str(validated_data['execution_id'])) output = template.render(params=params) with open(execution_dag_path, 'w') as dag: dag.write("from airflow.operators import CompressFileSensor\n") dag.write("from cdcol_utils import other_utils\n") dag.write(output) dag.write( "\nsensor_fin_ejecucion = CompressFileSensor(task_id='sensor_fin_ejecucion',poke_interval=60, soft_fail=True,mode='reschedule', queue='util', dag=dag) \n" ) dag.write( "comprimir_resultados = PythonOperator(task_id='comprimir_resultados',provide_context=True,python_callable=other_utils.compress_results,queue='util',op_kwargs={'execID': args['execID']},dag=dag) \n" ) dag.write("sensor_fin_ejecucion >> comprimir_resultados \n") dag.close() execution.dag_id = params['execID'] execution.save() # TODO: Ejecutar workflow bash_command1 = '/home/cubo/anaconda/bin/airflow list_dags' bash_command2 = '/home/cubo/anaconda/bin/airflow unpause' + params[ 'execID'] subprocess.call(bash_command1.split()) subprocess.call(bash_command2.split()) dagbag = models.DagBag(settings.DAGS_FOLDER) dagbag.collect_dags() dagbag.process_file(filepath=execution_dag_path) args = argparse.Namespace() args.dag_id = params['execID'] args.run_id = None args.exec_id = None args.conf = None args.exec_date = None args.subdir = None #cli.set_is_paused(False, args=args) cli.trigger_dag(args) # TODO: Modificar la ejecución en la base de datos # time_ranges = self.get_time_periods(validated_data['parameters']) # # gtask_parameters = {} # gtask_parameters['execID'] = str(validated_data['execution_id']) # gtask_parameters['algorithm'] = validated_data['algorithm_name'] # gtask_parameters['version'] = validated_data['version_id'] # gtask_parameters['output_expression'] = '' # gtask_parameters['product'], gtask_parameters['bands'] = self.get_product(validated_data['parameters']) # gtask_parameters = dict(self.get_kwargs(validated_data['parameters']), **gtask_parameters) # # gtask = import_module(os.environ['GEN_TASK_MOD']) # # flower = os.environ['FLOWER'] # for key in gtask_parameters: # print 'param \'' + key + '\': ' + str(gtask_parameters[key]) # result = gtask.generic_task(min_long=min_long, min_lat=min_lat, **gtask_parameters) # if validated_data['is_gif']: # gtask_parameters['min_lat'] = int(min_lat) # gtask_parameters['min_long'] = int(min_long) # result = group( # gtask.generic_task.s(time_ranges=[("01-01-" + str(A), +"31-12-" + str(A))], **gtask_parameters) for A in # xrange(int(time_ranges[0][0].split('-')[2]), int(time_ranges[0][1].split('-')[2]) + 1)).delay() # for each_result in result.results: # new_task = { # 'uuid': each_result.id, # 'state': '1', # 'execution_id': gtask_parameters['execID'], # 'state_updated_at': str(datetime.datetime.now()), # 'created_at': str(datetime.datetime.now()), # 'updated_at': str(datetime.datetime.now()), # 'start_date': str(datetime.date.today()), # 'end_date': str(datetime.date.today()), # # } # Task.objects.create(**new_task) # else: # gtask_parameters['time_ranges'] = time_ranges # result = group(gtask.generic_task.s(min_lat=Y, min_long=X, **gtask_parameters) for Y in # xrange(int(min_lat), int(max_lat)) for X in xrange(int(min_long), int(max_long))).delay() # for each_result in result.results: # # try: # # task = json.loads(urlopen(flower + '/api/task/info/'+each_result.id).read()) # # except: # # task = {'kwargs':''} # new_task = { # 'uuid': each_result.id, # 'state': '1', # 'execution_id': gtask_parameters['execID'], # 'state_updated_at': str(datetime.datetime.now()), # 'created_at': str(datetime.datetime.now()), # 'updated_at': str(datetime.datetime.now()), # 'start_date': str(datetime.date.today()), # 'end_date': str(datetime.date.today()), # # 'parameters': json.dumps(each_result.__dict__), # } # Task.objects.create(**new_task) return validated_data
def setUp(self): self.dagbag = models.DagBag(include_examples=True) self.dag_id = 'example_bash_operator' self.dag = self.dagbag.dags[self.dag_id]
def setUp(self): self.dagbag = models.DagBag( dag_folder=DEV_NULL, include_examples=True) self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=self.args)