Ejemplo n.º 1
0
    def test_dagrun_id_is_not_backfill(self):
        """
        Task instances whose dagrun ID is not a backfill dagrun ID should pass this dep.
        """
        dagrun = DagRun()
        dagrun.run_id = 'notbackfill_something'
        ti = Mock(get_dagrun=Mock(return_value=dagrun))
        self.assertTrue(DagrunIdDep().is_met(ti=ti))

        dagrun = DagRun()
        dagrun.run_id = None
        ti = Mock(get_dagrun=Mock(return_value=dagrun))
        self.assertTrue(DagrunIdDep().is_met(ti=ti))
Ejemplo n.º 2
0
 def test_dagrun_id_is_backfill(self):
     """
     Task instances whose dagrun ID is a backfill dagrun ID should fail this dep.
     """
     dagrun = DagRun()
     dagrun.run_id = BackfillJob.ID_PREFIX + '_something'
     ti = Mock(get_dagrun=Mock(return_value=dagrun))
     self.assertFalse(DagrunIdDep().is_met(ti=ti))
Ejemplo n.º 3
0
def create_dagrun_from_dbnd_run(
    databand_run,
    dag,
    execution_date,
    run_id,
    state=State.RUNNING,
    external_trigger=False,
    conf=None,
    session=None,
):
    """
    Create new DagRun and all relevant TaskInstances
    """
    dagrun = (session.query(DagRun).filter(
        DagRun.dag_id == dag.dag_id,
        DagRun.execution_date == execution_date).first())
    if dagrun is None:
        dagrun = DagRun(
            run_id=run_id,
            execution_date=execution_date,
            start_date=dag.start_date,
            _state=state,
            external_trigger=external_trigger,
            dag_id=dag.dag_id,
            conf=conf,
        )
        session.add(dagrun)
    else:
        logger.warning("Running with existing airflow dag run %s", dagrun)

    dagrun.dag = dag
    dagrun.run_id = run_id
    session.commit()

    # create the associated task instances
    # state is None at the moment of creation

    # dagrun.verify_integrity(session=session)
    # fetches [TaskInstance] again
    # tasks_skipped = databand_run.tasks_skipped

    # we can find a source of the completion, but also,
    # sometimes we don't know the source of the "complete"
    TI = TaskInstance
    tis = (session.query(TI).filter(TI.dag_id == dag.dag_id,
                                    TI.execution_date == execution_date).all())
    tis = {ti.task_id: ti for ti in tis}

    for af_task in dag.tasks:
        ti = tis.get(af_task.task_id)
        if ti is None:
            ti = TaskInstance(af_task, execution_date=execution_date)
            ti.start_date = timezone.utcnow()
            ti.end_date = timezone.utcnow()
            session.add(ti)
        task_run = databand_run.get_task_run_by_af_id(af_task.task_id)
        # all tasks part of the backfill are scheduled to dagrun

        # Set log file path to expected airflow log file path
        task_run.log.local_log_file.path = ti.log_filepath.replace(
            ".log", "/{0}.log".format(ti.try_number))
        if task_run.is_reused:
            # this task is completed and we don't need to run it anymore
            ti.state = State.SUCCESS

    session.commit()

    return dagrun