def test_get_dag_fileloc(self):
        """
        Test that fileloc is correctly set when we load example DAGs,
        specifically SubDAGs and packaged DAGs.
        """
        dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True)
        dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_zip.zip"))

        expected = {
            'example_bash_operator': 'airflow/example_dags/example_bash_operator.py',
            'example_subdag_operator': 'airflow/example_dags/example_subdag_operator.py',
            'example_subdag_operator.section-1': 'airflow/example_dags/subdags/subdag.py',
            'test_zip_dag': 'dags/test_zip.zip/test_zip.py'
        }

        for dag_id, path in expected.items():
            dag = dagbag.get_dag(dag_id)
            self.assertTrue(dag.fileloc.endswith(path))
Beispiel #2
0
    def test_get_existing_dag(self):
        """
        Test that we're able to parse some example DAGs and retrieve them
        """
        dagbag = models.DagBag(dag_folder=self.empty_dir,
                               include_examples=True)

        some_expected_dag_ids = [
            "example_bash_operator", "example_branch_operator"
        ]

        for dag_id in some_expected_dag_ids:
            dag = dagbag.get_dag(dag_id)

            self.assertIsNotNone(dag)
            self.assertEqual(dag_id, dag.dag_id)

        self.assertGreaterEqual(dagbag.size(), 7)
    def test_on_kill(self):
        """
        Test that ensures that clearing in the UI SIGTERMS
        the task
        """
        path = "/tmp/airflow_on_kill"
        try:
            os.unlink(path)
        except OSError:
            pass

        dagbag = models.DagBag(
            dag_folder=TEST_DAG_FOLDER,
            include_examples=False,
        )
        dag = dagbag.dags.get('test_on_kill')
        task = dag.get_task('task1')

        session = settings.Session()

        dag.clear()
        dag.create_dagrun(run_id="test",
                          state=State.RUNNING,
                          execution_date=DEFAULT_DATE,
                          start_date=DEFAULT_DATE,
                          session=session)
        ti = TI(task=task, execution_date=DEFAULT_DATE)
        job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True)

        runner = StandardTaskRunner(job1)
        runner.start()

        # Give the task some time to startup
        time.sleep(10)
        runner.terminate()

        # Wait some time for the result
        for _ in range(20):
            if os.path.exists(path):
                break
            time.sleep(2)

        with open(path, "r") as f:
            self.assertEqual("ON_KILL_TEST", f.readline())
    def test_heartbeat_failed_fast(self, mock_getpid):
        """
        Test that task heartbeat will sleep when it fails fast
        """
        mock_getpid.return_value = 1

        heartbeat_records = []

        def heartbeat_recorder():
            heartbeat_records.append(timezone.utcnow())

        with create_session() as session:
            dagbag = models.DagBag(
                dag_folder=TEST_DAG_FOLDER,
                include_examples=False,
            )
            dag_id = 'test_heartbeat_failed_fast'
            task_id = 'test_heartbeat_failed_fast_op'
            dag = dagbag.get_dag(dag_id)
            task = dag.get_task(task_id)

            dag.create_dagrun(run_id="test_heartbeat_failed_fast_run",
                              state=State.RUNNING,
                              execution_date=DEFAULT_DATE,
                              start_date=DEFAULT_DATE,
                              session=session)
            ti = TI(task=task, execution_date=DEFAULT_DATE)
            ti.refresh_from_db()
            ti.state = State.RUNNING
            ti.hostname = get_hostname()
            ti.pid = 1
            session.commit()

            job = LocalTaskJob(task_instance=ti,
                               executor=TestExecutor(do_update=False))
            job.heartrate = 2
            job.heartbeat = heartbeat_recorder
            job._execute()
            self.assertGreater(len(heartbeat_records), 1)
            for i in range(1, len(heartbeat_records)):
                time1 = heartbeat_records[i - 1]
                time2 = heartbeat_records[i]
                self.assertGreaterEqual((time2 - time1).total_seconds(),
                                        job.heartrate)
    def test_heartbeat_failed_fast(self, mock_getpid):
        """
        Test that task heartbeat will sleep when it fails fast
        """
        mock_getpid.return_value = 1
        self.mock_base_job_sleep.side_effect = time.sleep

        with create_session() as session:
            dagbag = models.DagBag(
                dag_folder=TEST_DAG_FOLDER,
                include_examples=False,
            )
            dag_id = 'test_heartbeat_failed_fast'
            task_id = 'test_heartbeat_failed_fast_op'
            dag = dagbag.get_dag(dag_id)
            task = dag.get_task(task_id)

            dag.create_dagrun(run_id="test_heartbeat_failed_fast_run",
                              state=State.RUNNING,
                              execution_date=DEFAULT_DATE,
                              start_date=DEFAULT_DATE,
                              session=session)
            ti = TI(task=task, execution_date=DEFAULT_DATE)
            ti.refresh_from_db()
            ti.state = State.RUNNING
            ti.hostname = get_hostname()
            ti.pid = 1
            session.commit()

            job = LocalTaskJob(task_instance=ti,
                               executor=MockExecutor(do_update=False))
            job.heartrate = 2
            heartbeat_records = []
            job.heartbeat_callback = lambda session: heartbeat_records.append(
                job.latest_heartbeat)
            job._execute()
            self.assertGreater(len(heartbeat_records), 2)
            for i in range(1, len(heartbeat_records)):
                time1 = heartbeat_records[i - 1]
                time2 = heartbeat_records[i]
                # Assert that difference small enough to avoid:
                # AssertionError: 1.996401 not greater than or equal to 2
                delta = (time2 - time1).total_seconds()
                self.assertAlmostEqual(delta, job.heartrate, delta=0.006)
Beispiel #6
0
def show_columns(selectedId, selectedNodeProps):
    if not selectedId or not selectedNodeProps:
        return Alert('Select a task to watch the output !')

    output_table = selectedNodeProps.get('output_table', '')
    if not output_table:
        return Alert(
            'This operator has no output_table property. Is it a DataDriver workflow ?'
        )
    dag_id = selectedNodeProps.get('parent_dag_id', '')

    from airflow import models, settings

    dagbag = models.DagBag(settings.DAGS_FOLDER)
    fileloc = dagbag.get_dag(dag_id).fileloc
    db = _get_db_from_datadriver_dag(fileloc)
    if db is None:
        return Alert("Object named 'db' not found in {}".format(fileloc))

    logging.info("DB type is {} \t\t {}".format(type(db), db))
    try:
        df = db.retrieve_table(output_table)
    except Exception as e:
        return Error(
            f"An exception occured when reading output_table {output_table} : {e}"
        )

    describe_datatable = plot.describe_dashtable(df)
    describe_bar_chart = plot.col_histograms(df)
    head_and_tail_tables = plot.head_and_tail_tables(df)
    tab_head_tail = Tab(label='Head and tail', children=[head_and_tail_tables])
    tab_bar_chart = Tab(label='Bar-chart by column',
                        children=[describe_bar_chart])
    tab_describe = Tab(label='Describe frame', children=[describe_datatable])

    return Panel(
        head="Select a Tab to see the statistics on the task's output",
        body=Tabs([tab_describe, tab_bar_chart, tab_head_tail],
                  colors={
                      "border": "white",
                      "primary": "#00a9c5",
                      "background": "#e0e0e0"
                  }))
Beispiel #7
0
    def test_depends_on_past(self):
        dagbag = models.DagBag()
        dag = dagbag.get_dag('test_depends_on_past')
        dag.clear()
        task = dag.tasks[0]
        run_date = task.start_date + datetime.timedelta(days=5)
        ti = TI(task, run_date)

        # depends_on_past prevents the run
        task.run(start_date=run_date, end_date=run_date)
        ti.refresh_from_db()
        self.assertIs(ti.state, None)

        # ignore first depends_on_past to allow the run
        task.run(start_date=run_date,
                 end_date=run_date,
                 ignore_first_depends_on_past=True)
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.SUCCESS)
Beispiel #8
0
    def _execute(self):
        dag_id = self.dag_id

        def signal_handler(signum, frame):
            logging.error("SIGINT (ctrl-c) received")
            sys.exit(1)

        signal.signal(signal.SIGINT, signal_handler)

        utils.pessimistic_connection_handling()

        # Sleep time (seconds) between master runs

        logging.basicConfig(level=logging.DEBUG)
        logging.info("Starting a master scheduler")

        # This should get new code
        dagbag = models.DagBag(self.subdir)
        executor = dagbag.executor
        executor.start()
        i = 0
        while (not self.test_mode) or i < 1:
            i += 1
            if i % self.refresh_dags_every == 0:
                dagbag.collect_dags(only_if_updated=False)
            else:
                dagbag.collect_dags(only_if_updated=True)
            if dag_id:
                dags = [dagbag.dags[dag_id]]
            else:
                dags = [
                    dag for dag in dagbag.dags.values() if not dag.parent_dag
                ]
            paused_dag_ids = dagbag.paused_dags()
            for dag in dags:
                if dag.dag_id in paused_dag_ids:
                    continue
                try:
                    self.process_dag(dag, executor)
                except Exception as e:
                    logging.exeption(e)
            self.heartbeat()
        executor.end()
Beispiel #9
0
 def _run_dag(self, dag_id=None):
     self.log.info("Attempting to run DAG: %s", self.dag_id)
     if not self.setup_called:
         raise AirflowException(
             "Please make sure to call super.setUp() in your "
             "test class!")
     dag_folder = self._get_dag_folder()
     dag_bag = models.DagBag(dag_folder=dag_folder, include_examples=False)
     self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     dag = dag_bag.get_dag(self.dag_id or dag_id)
     if dag is None:
         raise AirflowException(
             "The Dag {} could not be found. It's either an import problem or "
             "the dag {} was not symlinked to the DAGs folder. "
             "The content of the {} folder is {}".format(
                 self.dag_id, self.dag_id + ".py", dag_folder,
                 os.listdir(dag_folder)))
     dag.clear(reset_dag_runs=True)
     dag.run(ignore_first_depends_on_past=True, verbose=True)
Beispiel #10
0
def test_dag():
    with testing.postgresql.Postgresql() as postgresql:
        with patch.dict(
                os.environ, {
                    'API_V1_DB_URL': postgresql.url(),
                    'OUTPUT_FOLDER': 'tests/api_sync_v1/input'
                }):
            configuration.load_test_config()
            # the scheduler messages, which will show up if something
            # happens to screw up execution, are INFO level so save us
            # some headaches but switching to that loglevel here
            logging.basicConfig(level=logging.INFO)
            bag = models.DagBag()
            dag = bag.get_dag(dag_id='open_skills_master.api_v1_sync')
            # expire old DAG runs, otherwise the max of 16 will automatically get scheduled
            dag.dagrun_timeout = 1
            dag.clear()
            job = BackfillJob(
                dag=dag,
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
            )
            job.run()
            engine = create_engine(postgresql.url())
            session = sessionmaker(engine)()
            num_jobs = session.query(JobMaster).count()
            assert num_jobs > 1
            num_skills = session.query(SkillMaster).count()
            assert num_skills > 1
            num_importances = session.query(SkillImportance).count()
            assert num_importances > 1
            assert session.query(GeoTitleCount).count() > 1
            assert session.query(TitleCount).count() > 1

            # make sure non-temporal data doesn't
            # load twice for a different quarter
            new_date = datetime(2014, 5, 1)
            dag.clear(start_date=new_date, end_date=new_date)
            dag.run(start_date=new_date, end_date=new_date, local=True)
            assert session.query(JobMaster).count() == num_jobs
            assert session.query(SkillMaster).count() == num_skills
            assert session.query(SkillImportance).count() == num_importances
Beispiel #11
0
def test_dag_sample_w_template_actual_run():
    dagbag = models.DagBag(dag_folder=DAG_DIR, include_examples=False)
    dag = dagbag.get_dag(dag_id="dag_sample_w_template")  # type: models.DAG

    dag.run(
        start_date=DEFAULT_DATE,
        ignore_first_depends_on_past=True,
        verbose=True,
        executor=DebugExecutor(),
    )

    session = settings.Session()  # type: SASession
    dagruns = session.query(DagRun) \
        .filter(DagRun.dag_id == dag.dag_id) \
        .order_by(DagRun.execution_date) \
        .all()  # type: List[models.DagRun]

    assert len(dagruns) == 1
    assert dagruns[0].execution_date == DEFAULT_DATE
    assert dagruns[0].state == state.State.SUCCESS
 def setUp(self):
     self.dagbag = models.DagBag(
         dag_folder=TEST_DAG_FOLDER,
         include_examples=False,
     )
     try:
         subprocess.check_output(
             ['sudo', 'useradd', '-m', TEST_USER, '-g',
              str(os.getegid())])
     except OSError as e:
         if e.errno == errno.ENOENT:
             raise unittest.SkipTest(
                 "The 'useradd' command did not exist so unable to test "
                 "impersonation; Skipping Test. These tests can only be run on a "
                 "linux host that supports 'useradd'.")
         else:
             raise unittest.SkipTest(
                 "The 'useradd' command exited non-zero; Skipping tests. Does the "
                 "current user have permission to run 'useradd' without a password "
                 "prompt (check sudoers file)?")
Beispiel #13
0
    def test_get_dag_fileloc(self):
        """
        Test that fileloc is correctly set when we load example DAGs,
        specifically SubDAGs and packaged DAGs.
        """
        dagbag = models.DagBag(include_examples=True)

        expected = {
            'example_bash_operator':
            'airflow/example_dags/example_bash_operator.py',
            'example_subdag_operator':
            'airflow/example_dags/example_subdag_operator.py',
            'example_subdag_operator.section-1':
            'airflow/example_dags/subdags/subdag.py',
            'test_zip_dag': 'tests/dags/test_zip.zip/test_zip.py'
        }

        for dag_id, path in expected.items():
            dag = dagbag.get_dag(dag_id)
            self.assertTrue(dag.fileloc.endswith(path))
Beispiel #14
0
    def test_kill_zombies(self, mock_ti_handle_failure):
        """
        Test that kill zombies call TIs failure handler with proper context
        """
        dagbag = models.DagBag(dag_folder=self.empty_dir,
                               include_examples=True)
        with create_session() as session:
            session.query(TI).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)

            session.add(ti)
            session.commit()

            zombies = [SimpleTaskInstance(ti)]
            dagbag.kill_zombies(zombies)
            mock_ti_handle_failure.assert_called_once_with(
                ANY, conf.getboolean('core', 'unit_test_mode'), ANY)
Beispiel #15
0
    def run_dag(self,
                dag_id: str,
                dag_folder: str = DEFAULT_DAG_FOLDER) -> None:
        """
        Runs example dag by it's ID.

        :param dag_id: id of a DAG to be run
        :type dag_id: str
        :param dag_folder: directory where to look for the specific DAG. Relative to AIRFLOW_HOME.
        :type dag_folder: str
        """
        if os.environ.get("RUN_AIRFLOW_1_10"):
            # For system tests purpose we are mounting airflow/providers to /providers folder
            # So that we can get example_dags from there
            dag_folder = dag_folder.replace("/opt/airflow/airflow/providers",
                                            "/providers")
            temp_dir = mkdtemp()
            os.rmdir(temp_dir)
            shutil.copytree(dag_folder, temp_dir)
            dag_folder = temp_dir
            self.correct_imports_for_airflow_1_10(temp_dir)
        self.log.info("Looking for DAG: %s in %s", dag_id, dag_folder)
        dag_bag = models.DagBag(dag_folder=dag_folder, include_examples=False)
        dag = dag_bag.get_dag(dag_id)
        if dag is None:
            raise AirflowException(
                "The Dag {dag_id} could not be found. It's either an import problem,"
                "wrong dag_id or DAG is not in provided dag_folder."
                "The content of the {dag_folder} folder is {content}".format(
                    dag_id=dag_id,
                    dag_folder=dag_folder,
                    content=os.listdir(dag_folder),
                ))

        self.log.info("Attempting to run DAG: %s", dag_id)
        dag.clear(reset_dag_runs=True)
        try:
            dag.run(ignore_first_depends_on_past=True, verbose=True)
        except Exception:
            self._print_all_log_files()
            raise
Beispiel #16
0
def delete_dag(dag_id, keep_records_in_log=True):
    """
    :param dag_id: the dag_id of the DAG to delete
    :type dag_id: str
    :param keep_records_in_log: whether keep records of the given dag_id
        in the Log table in the backend database (for reasons like auditing).
        The default value is True.
    :type keep_records_in_log: bool
    """
    session = settings.Session()

    DM = models.DagModel
    dag = session.query(DM).filter(DM.dag_id == dag_id).first()
    if dag is None:
        raise DagNotFound("Dag id {} not found".format(dag_id))

    dagbag = models.DagBag()
    if dag_id in dagbag.dags:
        raise DagFileExists("Dag id {} is still in DagBag. "
                            "Remove the DAG file first.".format(dag_id))

    count = 0

    # noinspection PyUnresolvedReferences,PyProtectedMember
    for m in models.Base._decl_class_registry.values():
        if hasattr(m, "dag_id"):
            if keep_records_in_log and m.__name__ == 'Log':
                continue
            cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%"))
            count += session.query(m).filter(cond).delete(
                synchronize_session='fetch')

    if dag.is_subdag:
        p, c = dag_id.rsplit(".", 1)
        for m in models.DagRun, models.TaskFail, models.TaskInstance:
            count += session.query(m).filter(m.dag_id == p,
                                             m.task_id == c).delete()

    session.commit()

    return count
Beispiel #17
0
 def setUp(self):
     super(CompletionMonitorDagTest, self).setUp()
     models.Variable.set('DAG_ID', _DAG_ID)
     models.Variable.set('GCP_PROJECT_ID', _PROJECT_ID)
     models.Variable.set('QUEUE_LOCATION', _QUEUE_LOCATION)
     models.Variable.set('QUEUE_NAME', _QUEUE_NAME)
     models.Variable.set('TRY_COUNT_LIMIT', _TRY_COUNT_LIMIT)
     models.Variable.set('MONITOR_DATASET_ID', _MONITOR_DATASET_ID)
     models.Variable.set('MONITOR_TABLE_ID', _MONITOR_TABLE_ID)
     models.Variable.set('LAST_PROCESS_RESULT_QUERY_FILE_PATH',
                         _QUERY_FILE_PATH)
     models.Variable.set('DESTINATION_PUBSUB_TOPIC', _TOPIC_NAME)
     models.Variable.set('TIMEZONE_UTC_OFFSET', _TIMEZONE_UTC_OFFSET)
     models.Variable.set('FEED_DATASET_ID', _FEED_DATASET_ID)
     models.Variable.set('ITEMS_TABLE_ID', _ITEMS_TABLE_ID)
     models.Variable.set('EXPIRATION_TRACKING_TABLE_ID',
                         _EXPIRATION_TRACKING_TABLE_ID)
     models.Variable.set('ITEM_RESULTS_TABLE_ID', _ITEM_RESULTS_TABLE_ID)
     models.Variable.set('LOCK_BUCKET', _LOCK_BUCKET)
     self.dag_bag = models.DagBag(dag_folder='./')
     self.dag = self.dag_bag.dags.get(_DAG_ID)
Beispiel #18
0
    def _load_from_file(self, file_path):
        try:
            from airflow import models

            # Use DagBag module to load all dags from a given file
            dag_bag = models.DagBag(file_path, include_examples=False)

            # Now the DagBag object contains the 'dags' dict which maps between each dag id to the dag object
            return dag_bag.dags
        except Exception:
            logger.warning(
                "Failed to load dag from %s. Exception:", file_path, exc_info=True
            )
        except SystemExit:
            logger.warning(
                "Failed to load dag from %s, due to SystemExit",
                file_path,
                exc_info=True,
            )

        return None
Beispiel #19
0
    def test_mark_success_no_kill(self):
        """
        Test that ensures that mark_success in the UI doesn't cause
        the task to fail, and that the task exits
        """
        dagbag = models.DagBag(
            dag_folder=TEST_DAG_FOLDER,
            include_examples=False,
        )
        dag = dagbag.dags.get('test_mark_success')
        task = dag.get_task('task1')

        session = settings.Session()

        dag.clear()
        dag.create_dagrun(run_id="test",
                          state=State.RUNNING,
                          execution_date=DEFAULT_DATE,
                          start_date=DEFAULT_DATE,
                          session=session)
        ti = TI(task=task, execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True)
        process = multiprocessing.Process(target=job1.run)
        process.start()
        ti.refresh_from_db()
        for _ in range(0, 50):
            if ti.state == State.RUNNING:
                break
            time.sleep(0.1)
            ti.refresh_from_db()
        self.assertEqual(State.RUNNING, ti.state)
        ti.state = State.SUCCESS
        session.merge(ti)
        session.commit()

        process.join(timeout=10)
        self.assertFalse(process.is_alive())
        ti.refresh_from_db()
        self.assertEqual(State.SUCCESS, ti.state)
Beispiel #20
0
    def export_data_directly(
        self,
        since,
        include_logs,
        include_task_args,
        include_xcom,
        dag_ids,
        quantity,
        incomplete_offset,
        dags_only,
    ):
        from airflow import models, settings, conf
        from airflow.settings import STORE_SERIALIZED_DAGS
        from sqlalchemy import create_engine
        from sqlalchemy.orm import sessionmaker
        from dbnd_airflow_export.dbnd_airflow_export_plugin import get_airflow_data

        conf.set("core", "sql_alchemy_conn", value=self.sql_conn_string)
        dagbag = models.DagBag(
            self.dag_folder if self.dag_folder else settings.DAGS_FOLDER,
            include_examples=True,
            store_serialized_dags=STORE_SERIALIZED_DAGS,
        )

        engine = create_engine(self.sql_conn_string)
        session = sessionmaker(bind=engine)
        result = get_airflow_data(
            dagbag=dagbag,
            since=since,
            include_logs=include_logs,
            include_task_args=include_task_args,
            include_xcom=include_xcom,
            dag_ids=dag_ids,
            quantity=quantity,
            incomplete_offset=incomplete_offset,
            dags_only=dags_only,
            session=session(),
        )
        return result
Beispiel #21
0
def export_data_directly(sql_alchemy_conn, dag_folder, since, include_logs,
                         dag_ids, tasks):
    from airflow import models, settings, conf
    from airflow.settings import STORE_SERIALIZED_DAGS
    from sqlalchemy import create_engine
    from sqlalchemy.orm import sessionmaker

    conf.set("core", "sql_alchemy_conn", value=sql_alchemy_conn)
    dagbag = models.DagBag(
        dag_folder if dag_folder else settings.DAGS_FOLDER,
        include_examples=True,
        store_serialized_dags=STORE_SERIALIZED_DAGS,
    )

    engine = create_engine(sql_alchemy_conn)
    session = sessionmaker(bind=engine)
    return _handle_export_data(dagbag,
                               since,
                               include_logs,
                               dag_ids,
                               tasks,
                               session=session())
Beispiel #22
0
    def setUp(self):
        self.dagbag = models.DagBag(include_examples=True)
        self.dag1 = self.dagbag.dags['example_bash_operator']
        self.dag2 = self.dagbag.dags['example_subdag_operator']

        self.execution_dates = [days_ago(2), days_ago(1)]

        drs = _create_dagruns(self.dag1, self.execution_dates,
                              state=State.RUNNING,
                              run_id_template="scheduled__{}")
        for dr in drs:
            dr.dag = self.dag1
            dr.verify_integrity()

        drs = _create_dagruns(self.dag2,
                              [self.dag2.default_args['start_date']],
                              state=State.RUNNING,
                              run_id_template="scheduled__{}")

        for dr in drs:
            dr.dag = self.dag2
            dr.verify_integrity()
Beispiel #23
0
    def test_localtaskjob_double_trigger(self):
        dagbag = models.DagBag(
            dag_folder=TEST_DAG_FOLDER,
            include_examples=False,
        )
        dag = dagbag.dags.get('test_localtaskjob_double_trigger')
        task = dag.get_task('test_localtaskjob_double_trigger_task')

        session = settings.Session()

        dag.clear()
        dr = dag.create_dagrun(run_id="test",
                               state=State.SUCCESS,
                               execution_date=DEFAULT_DATE,
                               start_date=DEFAULT_DATE,
                               session=session)
        ti = dr.get_task_instance(task_id=task.task_id, session=session)
        ti.state = State.RUNNING
        ti.hostname = get_hostname()
        ti.pid = 1
        session.merge(ti)
        session.commit()

        ti_run = TI(task=task, execution_date=DEFAULT_DATE)
        ti_run.refresh_from_db()
        job1 = LocalTaskJob(task_instance=ti_run,
                            executor=SequentialExecutor())
        from airflow.task.task_runner.standard_task_runner import StandardTaskRunner
        with patch.object(StandardTaskRunner, 'start',
                          return_value=None) as mock_method:
            job1.run()
            mock_method.assert_not_called()

        ti = dr.get_task_instance(task_id=task.task_id, session=session)
        self.assertEqual(ti.pid, 1)
        self.assertEqual(ti.state, State.RUNNING)

        session.close()
    def list(self):
        title = "DAG Dependencies"

        if DAGDependenciesView.dagbag is None:
            DAGDependenciesView.dagbag = models.DagBag(settings.DAGS_FOLDER)

        if datetime.utcnow() > self.last_refresh + timedelta(
            seconds=self.refresh_interval
        ):
            DAGDependenciesView.dagbag.collect_dags()
            self.nodes, self.edges = self._generate_graph()
            self.last_refresh = datetime.utcnow()

        return self.render_template(
            "dag_dependencies.html",
            title=title,
            nodes=self.nodes,
            edges=self.edges,
            last_refresh=self.last_refresh.strftime("%Y-%m-%d %H:%M:%S"),
            arrange=conf.get("webserver", "dag_orientation"),
            width=request.args.get("width", "100%"),
            height=request.args.get("height", "800"),
        )
Beispiel #25
0
    def prepare_dagruns(self):
        dagbag = models.DagBag(include_examples=True)
        self.bash_dag = dagbag.dags['example_bash_operator']
        self.sub_dag = dagbag.dags['example_subdag_operator']
        self.xcom_dag = dagbag.dags['example_xcom']

        self.bash_dagrun = self.bash_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)

        self.sub_dagrun = self.sub_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)

        self.xcom_dagrun = self.xcom_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)
    def test_kill_zombies_doesn_nothing(self, mock_ti_handle_failure):
        """
        Test that kill zombies does nothing when job is running and received heartbeat
        """
        dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True)
        with create_session() as session:
            session.query(TI).delete()
            session.query(LJ).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)
            lj = LJ(ti)
            lj.latest_heartbeat = utcnow()
            lj.state = State.RUNNING
            lj.id = 1
            ti.job_id = lj.id

            session.add(lj)
            session.add(ti)
            session.commit()

            dagbag.kill_zombies()
            mock_ti_handle_failure.assert_not_called()
 def setUp(self):
     configuration.conf.load_test_config()
     self.dagbag = models.DagBag(dag_folder='/dev/null',
                                 include_examples=True)
     self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=self.args)
    def create(self, validated_data):

        # TODO: Importar Jinja 2
        # TODO: Crear el diccionario
        execution = Execution.objects.get(pk=validated_data['execution_id'])
        min_long, max_long, min_lat, max_lat = self.get_area(
            validated_data['parameters'])
        params = dict(self.get_kwargs(validated_data['parameters']))
        params['lat'] = (min_lat, max_lat)
        params['lon'] = (min_long, max_long)
        params['products'] = self.get_product(validated_data['parameters'])
        params['time_ranges'] = self.get_time_periods(
            validated_data['parameters'])
        params['execID'] = 'exec_{}'.format(str(
            validated_data['execution_id']))
        params['elimina_resultados_anteriores'] = True
        params['genera_mosaico'] = validated_data['generate_mosaic']

        # params['owner'] = Execution.executed_by.
        params['owner'] = "API-REST"
        # TODO: Cargar el template

        template_path = os.path.join(os.environ['TEMPLATE_PATH'],
                                     slugify(validated_data['algorithm_name']))
        generic_template_path = os.path.join(os.environ['TEMPLATE_PATH'],
                                             "generic-template")

        if execution.version is not None and execution.version.publishing_state == Version.PUBLISHED_STATE and os.path.exists(
                template_path):
            file_loader = FileSystemLoader(template_path)
            env = Environment(loader=file_loader)
            algorithm_template_path = '{}_{}.py'.format(
                slugify(validated_data['algorithm_name']),
                validated_data['version_id'])
            template = env.get_template(algorithm_template_path)
        else:
            file_loader = FileSystemLoader(generic_template_path)
            env = Environment(loader=file_loader)
            algorithm_template_path = '{}_{}.py'.format(
                "generic-template", "1.0")
            params['algorithm_name'] = slugify(
                validated_data['algorithm_name'])
            params['algorithm_version'] = validated_data['version_id']
            template = env.get_template(algorithm_template_path)

        # TODO: Renderizar el template
        airflow_dag_path = os.environ['AIRFLOW_DAG_PATH']
        execution_dag_path = '{}/exec_{}.py'.format(
            airflow_dag_path, str(validated_data['execution_id']))
        output = template.render(params=params)
        with open(execution_dag_path, 'w') as dag:
            dag.write("from airflow.operators import CompressFileSensor\n")
            dag.write("from cdcol_utils import other_utils\n")
            dag.write(output)
            dag.write(
                "\nsensor_fin_ejecucion = CompressFileSensor(task_id='sensor_fin_ejecucion',poke_interval=60, soft_fail=True,mode='reschedule', queue='util', dag=dag) \n"
            )
            dag.write(
                "comprimir_resultados = PythonOperator(task_id='comprimir_resultados',provide_context=True,python_callable=other_utils.compress_results,queue='util',op_kwargs={'execID': args['execID']},dag=dag) \n"
            )
            dag.write("sensor_fin_ejecucion >> comprimir_resultados \n")
        dag.close()
        execution.dag_id = params['execID']
        execution.save()

        # TODO: Ejecutar workflow
        bash_command1 = '/home/cubo/anaconda/bin/airflow list_dags'
        bash_command2 = '/home/cubo/anaconda/bin/airflow unpause' + params[
            'execID']

        subprocess.call(bash_command1.split())
        subprocess.call(bash_command2.split())

        dagbag = models.DagBag(settings.DAGS_FOLDER)
        dagbag.collect_dags()
        dagbag.process_file(filepath=execution_dag_path)

        args = argparse.Namespace()
        args.dag_id = params['execID']
        args.run_id = None
        args.exec_id = None
        args.conf = None
        args.exec_date = None
        args.subdir = None
        #cli.set_is_paused(False, args=args)
        cli.trigger_dag(args)

        # TODO: Modificar la ejecución en la base de datos

        # time_ranges = self.get_time_periods(validated_data['parameters'])
        #
        # gtask_parameters = {}
        # gtask_parameters['execID'] = str(validated_data['execution_id'])
        # gtask_parameters['algorithm'] = validated_data['algorithm_name']
        # gtask_parameters['version'] = validated_data['version_id']
        # gtask_parameters['output_expression'] = ''
        # gtask_parameters['product'], gtask_parameters['bands'] = self.get_product(validated_data['parameters'])
        # gtask_parameters = dict(self.get_kwargs(validated_data['parameters']), **gtask_parameters)
        #
        # gtask = import_module(os.environ['GEN_TASK_MOD'])
        # # flower = os.environ['FLOWER']

        # for key in gtask_parameters:
        #	print 'param \'' + key + '\': ' + str(gtask_parameters[key])

        # result = gtask.generic_task(min_long=min_long, min_lat=min_lat, **gtask_parameters)

        # if validated_data['is_gif']:
        #     gtask_parameters['min_lat'] = int(min_lat)
        #     gtask_parameters['min_long'] = int(min_long)
        #     result = group(
        #         gtask.generic_task.s(time_ranges=[("01-01-" + str(A), +"31-12-" + str(A))], **gtask_parameters) for A in
        #         xrange(int(time_ranges[0][0].split('-')[2]), int(time_ranges[0][1].split('-')[2]) + 1)).delay()
        #     for each_result in result.results:
        #         new_task = {
        #             'uuid': each_result.id,
        #             'state': '1',
        #             'execution_id': gtask_parameters['execID'],
        #             'state_updated_at': str(datetime.datetime.now()),
        #             'created_at': str(datetime.datetime.now()),
        #             'updated_at': str(datetime.datetime.now()),
        #             'start_date': str(datetime.date.today()),
        #             'end_date': str(datetime.date.today()),
        #
        #         }
        #         Task.objects.create(**new_task)
        # else:
        #     gtask_parameters['time_ranges'] = time_ranges
        #     result = group(gtask.generic_task.s(min_lat=Y, min_long=X, **gtask_parameters) for Y in
        #                    xrange(int(min_lat), int(max_lat)) for X in xrange(int(min_long), int(max_long))).delay()
        #     for each_result in result.results:
        #         # try:
        #         # 	task = json.loads(urlopen(flower + '/api/task/info/'+each_result.id).read())
        #         # except:
        #         # 	task = {'kwargs':''}
        #         new_task = {
        #             'uuid': each_result.id,
        #             'state': '1',
        #             'execution_id': gtask_parameters['execID'],
        #             'state_updated_at': str(datetime.datetime.now()),
        #             'created_at': str(datetime.datetime.now()),
        #             'updated_at': str(datetime.datetime.now()),
        #             'start_date': str(datetime.date.today()),
        #             'end_date': str(datetime.date.today()),
        #             # 'parameters': json.dumps(each_result.__dict__),
        #         }
        #         Task.objects.create(**new_task)

        return validated_data
Beispiel #29
0
 def setUp(self):
     self.dagbag = models.DagBag(include_examples=True)
     self.dag_id = 'example_bash_operator'
     self.dag = self.dagbag.dags[self.dag_id]
Beispiel #30
0
 def setUp(self):
     self.dagbag = models.DagBag(
         dag_folder=DEV_NULL, include_examples=True)
     self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=self.args)