コード例 #1
0
ファイル: cli.py プロジェクト: cjquinon/incubator-airflow
def pool(args):
    session = settings.Session()
    if args.get or (args.set and args.set[0]) or args.delete:
        name = args.get or args.delete or args.set[0]
    pool = (
        session.query(Pool)
        .filter(Pool.pool == name)
        .first())
    if pool and args.get:
        print("{} ".format(pool))
        return
    elif not pool and (args.get or args.delete):
        print("No pool named {} found".format(name))
    elif not pool and args.set:
        pool = Pool(
            pool=name,
            slots=args.set[1],
            description=args.set[2])
        session.add(pool)
        session.commit()
        print("{} ".format(pool))
    elif pool and args.set:
        pool.slots = args.set[1]
        pool.description = args.set[2]
        session.commit()
        print("{} ".format(pool))
        return
    elif pool and args.delete:
        session.query(Pool).filter_by(pool=args.delete).delete()
        session.commit()
        print("Pool {} deleted".format(name))
コード例 #2
0
def pool(args):
    session = settings.Session()
    if args.get or (args.set and args.set[0]) or args.delete:
        name = args.get or args.delete or args.set[0]
    pool = (session.query(Pool).filter(Pool.pool == name).first())
    if pool and args.get:
        print("{} ".format(pool))
        return
    elif not pool and (args.get or args.delete):
        print("No pool named {} found".format(name))
    elif not pool and args.set:
        pool = Pool(pool=name, slots=args.set[1], description=args.set[2])
        session.add(pool)
        session.commit()
        print("{} ".format(pool))
    elif pool and args.set:
        pool.slots = args.set[1]
        pool.description = args.set[2]
        session.commit()
        print("{} ".format(pool))
        return
    elif pool and args.delete:
        session.query(Pool).filter_by(pool=args.delete).delete()
        session.commit()
        print("Pool {} deleted".format(name))
コード例 #3
0
ファイル: pool.py プロジェクト: rounakdatta/airflow
def create_pool(name, slots, description, session=None):
    """Create a pool with given parameters."""
    if not (name and name.strip()):
        raise AirflowBadRequest("Pool name shouldn't be empty")

    try:
        slots = int(slots)
    except ValueError:
        raise AirflowBadRequest(f"Bad value for `slots`: {slots}")

    # Get the length of the pool column
    pool_name_length = Pool.pool.property.columns[0].type.length
    if len(name) > pool_name_length:
        raise AirflowBadRequest(
            f"Pool name can't be more than {pool_name_length} characters")

    session.expire_on_commit = False
    pool = session.query(Pool).filter_by(pool=name).first()
    if pool is None:
        pool = Pool(pool=name, slots=slots, description=description)
        session.add(pool)
    else:
        pool.slots = slots
        pool.description = description

    session.commit()

    return pool
コード例 #4
0
def add_default_pool_if_not_exists(session=None):
    """Add default pool if it does not exist."""
    if not Pool.get_pool(Pool.DEFAULT_POOL_NAME, session=session):
        default_pool = Pool(
            pool=Pool.DEFAULT_POOL_NAME,
            slots=conf.getint(section='core', key='default_pool_task_slot_count'),
            description="Default pool",
        )
        session.add(default_pool)
        session.commit()
コード例 #5
0
 def _setup_attrs(self, _setup_attrs_base):
     clear_db_pools()
     self.pools = [Pool.get_default_pool()]
     for i in range(self.USER_POOL_COUNT):
         name = f'experimental_{i + 1}'
         pool = Pool(
             pool=name,
             slots=i,
             description=name,
         )
         self.session.add(pool)
         self.pools.append(pool)
     self.session.commit()
     self.pool = self.pools[-1]
コード例 #6
0
 def setUp(self):
     super().setUp()
     clear_db_pools()
     self.pools = [Pool.get_default_pool()]
     for i in range(self.USER_POOL_COUNT):
         name = 'experimental_%s' % (i + 1)
         pool = Pool(
             pool=name,
             slots=i,
             description=name,
         )
         self.session.add(pool)
         self.pools.append(pool)
     self.session.commit()
     self.pool = self.pools[-1]
コード例 #7
0
ファイル: jobs.py プロジェクト: yiqideren/airflow
    def test_scheduler_pooled_tasks(self):
        """
        Test that the scheduler handles queued tasks correctly
        See issue #1299
        """
        session = settings.Session()
        if not (session.query(Pool).filter(
                Pool.pool == 'test_queued_pool').first()):
            pool = Pool(pool='test_queued_pool', slots=5)
            session.merge(pool)
            session.commit()
        session.close()

        dag_id = 'test_scheduled_queued_tasks'
        dag = self.dagbag.get_dag(dag_id)
        dag.clear()

        scheduler = SchedulerJob(dag_id, num_runs=10)
        scheduler.run()

        task_1 = dag.tasks[0]
        ti = TI(task_1, dag.start_date)
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.FAILED)

        dag.clear()
コード例 #8
0
ファイル: jobs.py プロジェクト: mpeshave/incubator-airflow
    def test_backfill_pooled_tasks(self):
        """
        Test that queued tasks are executed by BackfillJob

        Test for https://github.com/airbnb/airflow/pull/1225
        """
        session = settings.Session()
        pool = Pool(pool='test_backfill_pooled_task_pool', slots=1)
        session.add(pool)
        session.commit()

        dag = self.dagbag.get_dag('test_backfill_pooled_task_dag')
        dag.clear()

        job = BackfillJob(dag=dag,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE)

        # run with timeout because this creates an infinite loop if not
        # caught
        with timeout(seconds=30):
            job.run()

        ti = TI(task=dag.get_task('test_backfill_pooled_task'),
                execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.SUCCESS)
コード例 #9
0
def test_odd_name(session, admin_client, pool):
    pool['pool'] = 'test-pool<script></script>'
    session.add(Pool(**pool))
    session.commit()
    resp = admin_client.get('/pool/list/')
    check_content_in_response('test-pool&lt;script&gt;', resp)
    check_content_not_in_response('test-pool<script>', resp)
コード例 #10
0
def main(session=None):
    """Create clickstream DAG with branches for clickstream events grouped by type."""
    global default_args

    client = MongoClient()
    workflows = client.clickstream_configs()

    for workflow in workflows:
        default_args['app_id'] = workflow['_id']
        pool_name = "redshift_loader_{}_{}".format(workflow['_id'], 5)
        workflow['pool'] = pool_name

        # TODO: flip back to old schedule when done testing - 15 * * * *
        dag = DAG(dag_id=build_dag_id(workflow), default_args=default_args, schedule_interval='15 * * * *', catchup=False)
        globals()[workflow['_id']] = dag

        start = DummyOperator(task_id='start', dag=dag, resources=dict(organizationId='astronomer'))

        standard_events = StandardClickstreamEvents(workflow=workflow, dag=dag, upstream_task=start)
        standard_events.run()

        custom_events = CustomClickstreamEvents(workflow=workflow, dag=dag, upstream_task=start)
        custom_events.run()

        pool = Pool(pool=pool_name, slots=5)
        pool_query = session.query(Pool)
        pool_query = pool_query.filter(Pool.pool == pool_name)
        pool_query = pool_query.filter(Pool.slots == 5)
        pool_query_result = pool_query.limit(1).all()
        if len(pool_query_result) == 0:
            session.add(pool)
            session.commit()

    client.close()
    logger.info('Finished exporting clickstream DAGs.')
コード例 #11
0
ファイル: test_backfill_job.py プロジェクト: shivamx/airflow
    def test_backfill_pooled_tasks(self):
        """
        Test that queued tasks are executed by BackfillJob
        """
        session = settings.Session()
        pool = Pool(pool='test_backfill_pooled_task_pool', slots=1)
        session.add(pool)
        session.commit()
        session.close()

        dag = self.dagbag.get_dag('test_backfill_pooled_task_dag')
        dag.clear()

        executor = MockExecutor(do_update=True)
        job = BackfillJob(dag=dag,
                          start_date=DEFAULT_DATE,
                          end_date=DEFAULT_DATE,
                          executor=executor)

        # run with timeout because this creates an infinite loop if not
        # caught
        try:
            with timeout(seconds=5):
                job.run()
        except AirflowTaskTimeout:
            pass
        ti = TI(task=dag.get_task('test_backfill_pooled_task'),
                execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        self.assertEqual(ti.state, State.SUCCESS)
コード例 #12
0
ファイル: test_backfill_job.py プロジェクト: shivamx/airflow
    def test_backfill_respect_pool_limit(self, mock_log):
        session = settings.Session()

        slots = 2
        pool = Pool(
            pool='pool_with_two_slots',
            slots=slots,
        )
        session.add(pool)
        session.commit()

        dag = self._get_dummy_dag(
            dag_id='test_backfill_respect_pool_limit',
            pool=pool.pool,
        )

        executor = MockExecutor()

        job = BackfillJob(
            dag=dag,
            executor=executor,
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + datetime.timedelta(days=7),
        )

        job.run()

        self.assertTrue(0 < len(executor.history))

        pool_was_full_at_least_once = False
        num_running_task_instances = 0

        for running_task_instances in executor.history:
            self.assertLessEqual(len(running_task_instances), slots)
            num_running_task_instances += len(running_task_instances)
            if len(running_task_instances) == slots:
                pool_was_full_at_least_once = True

        self.assertEqual(8, num_running_task_instances)
        self.assertTrue(pool_was_full_at_least_once)

        times_dag_concurrency_limit_reached_in_debug = self._times_called_with(
            mock_log.debug,
            DagConcurrencyLimitReached,
        )

        times_pool_limit_reached_in_debug = self._times_called_with(
            mock_log.debug,
            NoAvailablePoolSlot,
        )

        times_task_concurrency_limit_reached_in_debug = self._times_called_with(
            mock_log.debug,
            TaskConcurrencyLimitReached,
        )

        self.assertEqual(0, times_task_concurrency_limit_reached_in_debug)
        self.assertEqual(0, times_dag_concurrency_limit_reached_in_debug)
        self.assertGreater(times_pool_limit_reached_in_debug, 0)
コード例 #13
0
 def factory(**values):
     pool = Pool(**{
         **POOL,
         **values
     })  # Passed in values override defaults.
     session.add(pool)
     session.commit()
     return pool
コード例 #14
0
def create_dbnd_pool():
    from airflow.utils.db import create_session
    from airflow.models import Pool

    print("Creating databand pool")
    with create_session() as session:
        pool_name = dbnd_config.get("airflow", "dbnd_pool")
        dbnd_pool = Pool(pool=pool_name, slots=-1)
        session.merge(dbnd_pool)
コード例 #15
0
    def setUp(self):
        super(TestPoolApiExperimental, self).setUp()
        app = application.create_app(testing=True)
        self.app = app.test_client()
        self.session = Session()

        clear_db_pools()
        self.pools = [Pool.get_default_pool()]
        for i in range(self.USER_POOL_COUNT):
            name = 'experimental_%s' % (i + 1)
            pool = Pool(
                pool=name,
                slots=i,
                description=name,
            )
            self.session.add(pool)
            self.pools.append(pool)
        self.session.commit()
        self.pool = self.pools[-1]
コード例 #16
0
ファイル: jobs.py プロジェクト: rrbarbosa/airflow
    def test_scheduler_verify_pool_full(self, mock_pool_full):
        """
        Test task instances not queued when pool is full
        """
        mock_pool_full.return_value = False

        dag = DAG(
            dag_id='test_scheduler_verify_pool_full',
            start_date=DEFAULT_DATE)

        DummyOperator(
            task_id='dummy',
            dag=dag,
            owner='airflow',
            pool='test_scheduler_verify_pool_full')

        session = settings.Session()
        pool = Pool(pool='test_scheduler_verify_pool_full', slots=1)
        session.add(pool)
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        # Create 2 dagruns, which will create 2 task instances.
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        self.assertEquals(dr.execution_date, DEFAULT_DATE)
        dr = scheduler.create_dag_run(dag)
        self.assertIsNotNone(dr)
        queue = []
        scheduler._process_task_instances(dag, queue=queue)
        self.assertEquals(len(queue), 2)
        dagbag = SimpleDagBag([dag])

        # Recreated part of the scheduler here, to kick off tasks -> executor
        for ti_key in queue:
            task = dag.get_task(ti_key[1])
            ti = models.TaskInstance(task, ti_key[2])
            # Task starts out in the scheduled state. All tasks in the
            # scheduled state will be sent to the executor
            ti.state = State.SCHEDULED

            # Also save this task instance to the DB.
            session.merge(ti)
            session.commit()

        scheduler._execute_task_instances(dagbag,
                                          (State.SCHEDULED,
                                           State.UP_FOR_RETRY))

        self.assertEquals(len(scheduler.executor.queued_tasks), 1)
コード例 #17
0
    def load_pools(
        config: dict,
        session: Session = None,
    ):
        pools: dict = config.get("pools", None)
        if pools is None:
            log.info("No pools found, skipping")
            return

        log.info("Loading pools from config...")
        for key in pools.keys():
            val = pools.get(key)

            pool = session.query(Pool).filter_by(pool=key).first()
            if pool is not None:
                log.info(f"Pool exists, skipping: {key}")
                continue

            log.info("Setting pool: " + key)
            pool = Pool(pool=key)
            if isinstance(val, dict):
                pool.description = val.get("description", "Loaded by zairflow")
                pool.slots = val.get("slots", -1)
            else:
                assert isinstance(val, (int, float))
                pool.description = "Loaded from zairflow init"
                pool.slots = val or -1
            session.add(pool)
        session.commit()
コード例 #18
0
ファイル: utils.py プロジェクト: databand-ai/dbnd
def create_airflow_pool(pool_name):
    from airflow.models import Pool
    from airflow.utils.db import create_session

    print("Creating Airflow pool '%s'" % pool_name)
    with create_session() as session:
        if session.query(Pool.pool).filter(Pool.pool == pool_name).scalar() is not None:
            return

        # -1 so we have endless pool
        dbnd_pool = Pool(pool=pool_name, slots=-1)
        session.merge(dbnd_pool)
        session.commit()
コード例 #19
0
ファイル: pool.py プロジェクト: ataki/incubator-airflow
def create_pool(name, slots, description, session=None):
    """Create a pool with a given parameters."""
    if not (name and name.strip()):
        raise PoolBadRequest("Pool name shouldn't be empty")

    try:
        slots = int(slots)
    except ValueError:
        raise PoolBadRequest("Bad value for `slots`: %s" % slots)

    session.expire_on_commit = False
    pool = session.query(Pool).filter_by(pool=name).first()
    if pool is None:
        pool = Pool(pool=name, slots=slots, description=description)
        session.add(pool)
    else:
        pool.slots = slots
        pool.description = description

    session.commit()

    return pool
コード例 #20
0
ファイル: pool.py プロジェクト: zsiqxx/airflow
def create_pool(name, slots, description, session=None):
    """Create a pool with a given parameters."""
    if not (name and name.strip()):
        raise AirflowBadRequest("Pool name shouldn't be empty")

    try:
        slots = int(slots)
    except ValueError:
        raise AirflowBadRequest("Bad value for `slots`: %s" % slots)

    session.expire_on_commit = False
    pool = session.query(Pool).filter_by(pool=name).first()
    if pool is None:
        pool = Pool(pool=name, slots=slots, description=description)
        session.add(pool)
    else:
        pool.slots = slots
        pool.description = description

    session.commit()

    return pool
コード例 #21
0
    def setUp(self):
        super(TestPoolApiExperimental, self).setUp()

        self.pools = []
        for i in range(2):
            name = 'experimental_%s' % (i + 1)
            pool = Pool(
                pool=name,
                slots=i,
                description=name,
            )
            self.session.add(pool)
            self.pools.append(pool)
        self.session.commit()
        self.pool = self.pools[0]
コード例 #22
0
def generate_init():
    """
    Function is checking names of tables in postgres other.
    Then information is combined with standard params and pushed as Airflow Variable object.
    Also pool is created.
    :return:
    """
    psql_hook = PostgresHook('airflow_docker_db')
    eng = psql_hook.get_sqlalchemy_engine()
    df = pd.read_sql(
        "select table_name from information_schema.tables where table_schema='other';",
        con=eng)
    table_list = df['table_name'].tolist()

    try:
        pool = Pool()
        pool.slots = 1
        pool.description = 'How many tasks can run at once'
        pool.pool = 'generate_tasks'
        session = Session()
        session.add(pool)
        session.commit()
    except Exception as ex:
        logging.info(f'Could not set pool. Details: {ex}')

        init_data = {
            'psql_conn_id': 'airflow_docker_db',
            'table_list': table_list,
            'pool': 'generate_tasks'
        }
    try:
        Variable.set(key='generate_tasks',
                     value=init_data,
                     serialize_json=True)
    except Exception as ex:
        logging.info(f'Could not set global variable. Details: {ex}')
コード例 #23
0
 def setUp(self):
     super(TestPoolApiExperimental, self).setUp()
     configuration.load_test_config()
     app = application.create_app(testing=True)
     self.app = app.test_client()
     self.session = Session()
     self.pools = []
     for i in range(2):
         name = 'experimental_%s' % (i + 1)
         pool = Pool(
             pool=name,
             slots=i,
             description=name,
         )
         self.session.add(pool)
         self.pools.append(pool)
     self.session.commit()
     self.pool = self.pools[0]
コード例 #24
0
def test_list(app, session, admin_client, pool):
    pool['pool'] = 'test-pool'
    session.add(Pool(**pool))
    session.commit()
    resp = admin_client.get('/pool/list/')
    # We should see this link
    with app.test_request_context():
        url = flask.url_for('TaskInstanceModelView.list',
                            _flt_3_pool='test-pool',
                            _flt_3_state='running')
        used_tag = flask.Markup("<a href='{url}'>{slots}</a>").format(url=url,
                                                                      slots=0)

        url = flask.url_for('TaskInstanceModelView.list',
                            _flt_3_pool='test-pool',
                            _flt_3_state='queued')
        queued_tag = flask.Markup("<a href='{url}'>{slots}</a>").format(
            url=url, slots=0)
    check_content_in_response(used_tag, resp)
    check_content_in_response(queued_tag, resp)
コード例 #25
0
    def test_scheduler_pooled_tasks(self):
        """
        Test that the scheduler handles queued tasks correctly
        See issue #1299
        """
        session = settings.Session()
        if not (session.query(Pool).filter(
                Pool.pool == 'test_queued_pool').first()):
            pool = Pool(pool='test_queued_pool', slots=5)
            session.merge(pool)
            session.commit()
        session.close()

        dag_id = 'test_scheduled_queued_tasks'
        dag = self.dagbag.get_dag(dag_id)
        dag.clear()

        scheduler = SchedulerJob(dag_id,
                                 num_runs=1,
                                 executor=TestExecutor(),
                                 **self.default_scheduler_args)
        scheduler.run()

        task_1 = dag.tasks[0]
        logging.info("Trying to find task {}".format(task_1))
        ti = TI(task_1, dag.start_date)
        ti.refresh_from_db()
        logging.error("TI is: {}".format(ti))
        self.assertEqual(ti.state, State.QUEUED)

        # now we use a DIFFERENT scheduler and executor
        # to simulate the num-runs CLI arg
        scheduler2 = SchedulerJob(dag_id,
                                  num_runs=5,
                                  executor=DEFAULT_EXECUTOR.__class__(),
                                  **self.default_scheduler_args)
        scheduler2.run()

        ti.refresh_from_db()
        self.assertEqual(ti.state, State.FAILED)
        dag.clear()
コード例 #26
0
def create_hive_pool(session: Optional[Session] = None) -> None:
    pool = Pool(pool=pool_templates['hive_name'],
                slots=1,
                description=pool_templates['hive_description'])
    session.add(pool)
コード例 #27
0
def set_default_pool_slots(slots):
    with create_session() as session:
        default_pool = Pool.get_default_pool(session)
        default_pool.slots = slots
コード例 #28
0
 def setUp(self):
     db.clear_db_pools()
     with create_session() as session:
         test_pool = Pool(pool='test_pool', slots=1)
         session.add(test_pool)
         session.commit()