class TaskInstance(Base):  # type: ignore
    """Task instance class."""

    __tablename__ = "task_instance"

    task_id = Column(StringID(), primary_key=True)
    dag_id = Column(StringID(), primary_key=True)
    execution_date = Column(UtcDateTime, primary_key=True)
    start_date = Column(UtcDateTime)
    end_date = Column(UtcDateTime)
    duration = Column(Float)
    state = Column(String(20))
    _try_number = Column('try_number', Integer, default=0)
    max_tries = Column(Integer)
    hostname = Column(String(1000))
    unixname = Column(String(1000))
    job_id = Column(Integer)
    pool = Column(String(50), nullable=False)
    pool_slots = Column(Integer, default=1)
    queue = Column(String(256))
    priority_weight = Column(Integer)
    operator = Column(String(1000))
    queued_dttm = Column(UtcDateTime)
    queued_by_job_id = Column(Integer)
    pid = Column(Integer)
    executor_config = Column(PickleType(pickler=dill))
    external_executor_id = Column(StringID())
def upgrade():
    tables()
    dialect_name = op.get_bind().dialect.name

    op.drop_index('idx_task_fail_dag_task_date', table_name='task_fail')

    with op.batch_alter_table('task_fail') as batch_op:
        batch_op.add_column(
            sa.Column('map_index',
                      sa.Integer(),
                      server_default='-1',
                      nullable=False))
        batch_op.add_column(
            sa.Column('run_id', type_=StringID(), nullable=True))

    update_query = _update_value_from_dag_run(
        dialect_name=dialect_name,
        target_table=task_fail,
        target_column=task_fail.c.run_id,
        join_columns=['dag_id', 'execution_date'],
    )
    op.execute(update_query)
    with op.batch_alter_table('task_fail') as batch_op:
        batch_op.alter_column('run_id',
                              existing_type=StringID(),
                              existing_nullable=True,
                              nullable=False)
        batch_op.drop_column('execution_date')
        batch_op.create_foreign_key(
            'task_fail_ti_fkey',
            'task_instance',
            ['dag_id', 'task_id', 'run_id', 'map_index'],
            ['dag_id', 'task_id', 'run_id', 'map_index'],
            ondelete='CASCADE',
        )
def downgrade():
    """Unapply Remove smart sensors"""
    op.create_table(
        'sensor_instance',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('task_id', StringID(), nullable=False),
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('execution_date', TIMESTAMP, nullable=False),
        sa.Column('state', sa.String(length=20), nullable=True),
        sa.Column('try_number', sa.Integer(), nullable=True),
        sa.Column('start_date', TIMESTAMP, nullable=True),
        sa.Column('operator', sa.String(length=1000), nullable=False),
        sa.Column('op_classpath', sa.String(length=1000), nullable=False),
        sa.Column('hashcode', sa.BigInteger(), nullable=False),
        sa.Column('shardcode', sa.Integer(), nullable=False),
        sa.Column('poke_context', sa.Text(), nullable=False),
        sa.Column('execution_context', sa.Text(), nullable=True),
        sa.Column('created_at', TIMESTAMP, default=func.now, nullable=False),
        sa.Column('updated_at', TIMESTAMP, default=func.now, nullable=False),
        sa.PrimaryKeyConstraint('id'),
    )
    op.create_index('ti_primary_key', 'sensor_instance', ['dag_id', 'task_id', 'execution_date'], unique=True)
    op.create_index('si_hashcode', 'sensor_instance', ['hashcode'], unique=False)
    op.create_index('si_shardcode', 'sensor_instance', ['shardcode'], unique=False)
    op.create_index('si_state_shard', 'sensor_instance', ['state', 'shardcode'], unique=False)
    op.create_index('si_updated_at', 'sensor_instance', ['updated_at'], unique=False)
Beispiel #4
0
def upgrade():

    conn = op.get_bind()
    inspector = Inspector.from_engine(conn)
    tables = inspector.get_table_names()
    if 'sensor_instance' in tables:
        return

    op.create_table(
        'sensor_instance',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('task_id', StringID(), nullable=False),
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('execution_date', TIMESTAMP, nullable=False),
        sa.Column('state', sa.String(length=20), nullable=True),
        sa.Column('try_number', sa.Integer(), nullable=True),
        sa.Column('start_date', TIMESTAMP, nullable=True),
        sa.Column('operator', sa.String(length=1000), nullable=False),
        sa.Column('op_classpath', sa.String(length=1000), nullable=False),
        sa.Column('hashcode', sa.BigInteger(), nullable=False),
        sa.Column('shardcode', sa.Integer(), nullable=False),
        sa.Column('poke_context', sa.Text(), nullable=False),
        sa.Column('execution_context', sa.Text(), nullable=True),
        sa.Column('created_at', TIMESTAMP, default=func.now(), nullable=False),
        sa.Column('updated_at', TIMESTAMP, default=func.now(), nullable=False),
        sa.PrimaryKeyConstraint('id'),
    )
    op.create_index('ti_primary_key', 'sensor_instance', ['dag_id', 'task_id', 'execution_date'], unique=True)
    op.create_index('si_hashcode', 'sensor_instance', ['hashcode'], unique=False)
    op.create_index('si_shardcode', 'sensor_instance', ['shardcode'], unique=False)
    op.create_index('si_state_shard', 'sensor_instance', ['state', 'shardcode'], unique=False)
    op.create_index('si_updated_at', 'sensor_instance', ['updated_at'], unique=False)
def upgrade():
    # See 0e2a74e0fc9f_add_time_zone_awareness
    timestamp = TIMESTAMP
    if op.get_bind().dialect.name == 'mssql':
        # We need to keep this as it was for this old migration on mssql
        timestamp = sa.DateTime()

    op.create_table(
        TABLE_NAME,
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('task_id', StringID(), nullable=False),
        sa.Column('dag_id', StringID(), nullable=False),
        # use explicit server_default=None otherwise mysql implies defaults for first timestamp column
        sa.Column('execution_date',
                  timestamp,
                  nullable=False,
                  server_default=None),
        sa.Column('try_number', sa.Integer(), nullable=False),
        sa.Column('start_date', timestamp, nullable=False),
        sa.Column('end_date', timestamp, nullable=False),
        sa.Column('duration', sa.Integer(), nullable=False),
        sa.Column('reschedule_date', timestamp, nullable=False),
        sa.PrimaryKeyConstraint('id'),
        sa.ForeignKeyConstraint(
            ['task_id', 'dag_id', 'execution_date'],
            [
                'task_instance.task_id', 'task_instance.dag_id',
                'task_instance.execution_date'
            ],
            name='task_reschedule_dag_task_date_fkey',
        ),
    )
    op.create_index(INDEX_NAME,
                    TABLE_NAME, ['dag_id', 'task_id', 'execution_date'],
                    unique=False)
def _get_dagrun_table() -> Table:
    return Table(
        "dag_run",
        metadata,
        Column("id", Integer, primary_key=True),
        Column("dag_id", StringID(), nullable=False),
        Column("run_id", StringID(), nullable=False),
        Column("execution_date", TIMESTAMP, nullable=False),
    )
def _get_old_xcom_columns() -> Sequence[Column]:
    return [
        Column("key", StringID(length=512), nullable=False, primary_key=True),
        Column("value", LargeBinary),
        Column("timestamp", TIMESTAMP, nullable=False),
        Column("task_id", StringID(length=250), nullable=False, primary_key=True),
        Column("dag_id", StringID(length=250), nullable=False, primary_key=True),
        Column("execution_date", TIMESTAMP, nullable=False, primary_key=True),
    ]
def _get_new_xcom_columns() -> Sequence[Column]:
    return [
        Column("dag_run_id", Integer(), nullable=False),
        Column("task_id", StringID(), nullable=False),
        Column("key", StringID(length=512), nullable=False),
        Column("value", LargeBinary),
        Column("timestamp", TIMESTAMP, nullable=False),
        Column("dag_id", StringID(), nullable=False),
        Column("run_id", StringID(), nullable=False),
    ]
class TaskInstance(Base):  # type: ignore
    """Task Instance class."""

    __tablename__ = "task_instance"

    task_id = Column(StringID(), primary_key=True)
    dag_id = Column(StringID(), primary_key=True)
    execution_date = Column(sa.DateTime, primary_key=True)
    max_tries = Column(Integer)
    try_number = Column(Integer, default=0)
def tables():
    global task_instance, task_fail, dag_run
    metadata = sa.MetaData()
    task_instance = sa.Table(
        'task_instance',
        metadata,
        sa.Column('task_id', StringID()),
        sa.Column('dag_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('map_index', sa.Integer(), server_default='-1'),
        sa.Column('execution_date', TIMESTAMP),
    )
    task_fail = sa.Table(
        'task_fail',
        metadata,
        sa.Column('dag_id', StringID()),
        sa.Column('task_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('map_index', StringID()),
        sa.Column('execution_date', TIMESTAMP),
    )
    dag_run = sa.Table(
        'dag_run',
        metadata,
        sa.Column('dag_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('execution_date', TIMESTAMP),
    )
def upgrade():
    op.create_table(
        'task_fail',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('task_id', StringID(), nullable=False),
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('execution_date', sa.DateTime(), nullable=False),
        sa.Column('start_date', sa.DateTime(), nullable=True),
        sa.Column('end_date', sa.DateTime(), nullable=True),
        sa.Column('duration', sa.Integer(), nullable=True),
        sa.PrimaryKeyConstraint('id'),
    )
Beispiel #12
0
def upgrade():
    op.create_table(
        'dag_run',
        sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('dag_id', StringID(), nullable=True),
        sa.Column('execution_date', sa.DateTime(), nullable=True),
        sa.Column('state', sa.String(length=50), nullable=True),
        sa.Column('run_id', StringID(), nullable=True),
        sa.Column('external_trigger', sa.Boolean(), nullable=True),
        sa.PrimaryKeyConstraint('id'),
        sa.UniqueConstraint('dag_id', 'execution_date'),
        sa.UniqueConstraint('dag_id', 'run_id'),
    )
Beispiel #13
0
def upgrade():
    """Apply Increase length of ``Flask-AppBuilder`` ``ab_view_menu.name`` column"""
    conn = op.get_bind()
    inspector = Inspector.from_engine(conn)
    tables = inspector.get_table_names()

    if "ab_view_menu" in tables:
        if conn.dialect.name == "sqlite":
            op.execute("PRAGMA foreign_keys=off")
            op.execute("""
            CREATE TABLE IF NOT EXISTS ab_view_menu_dg_tmp
            (
                id INTEGER NOT NULL PRIMARY KEY,
                name VARCHAR(250) NOT NULL UNIQUE
            );
            """)
            op.execute(
                "INSERT INTO ab_view_menu_dg_tmp(id, name) select id, name from ab_view_menu;"
            )
            op.execute("DROP TABLE ab_view_menu")
            op.execute(
                "ALTER TABLE ab_view_menu_dg_tmp rename to ab_view_menu;")
            op.execute("PRAGMA foreign_keys=on")
        else:
            op.alter_column(
                table_name='ab_view_menu',
                column_name='name',
                type_=StringID(length=250),
                nullable=False,
            )
Beispiel #14
0
def downgrade():
    """Unapply make xcom pkey columns non-nullable"""
    conn = op.get_bind()
    with op.batch_alter_table('xcom') as bop:
        if conn.dialect.name == 'mssql':
            bop.drop_constraint('pk_xcom', 'primary')
        bop.alter_column("key", type_=StringID(length=512), nullable=True)
        bop.alter_column("execution_date", type_=TIMESTAMP, nullable=True)
Beispiel #15
0
class DagRun(Base):  # type: ignore
    """
    DagRun describes an instance of a Dag. It can be created
    by the scheduler (for regular runs) or by an external trigger
    """

    __tablename__ = "dag_run"

    id = Column(Integer, primary_key=True)
    dag_id = Column(StringID())
    execution_date = Column(UtcDateTime, default=timezone.utcnow)
    start_date = Column(UtcDateTime, default=timezone.utcnow)
    end_date = Column(UtcDateTime)
    _state = Column('state', String(50), default=State.RUNNING)
    run_id = Column(StringID())
    external_trigger = Column(Boolean, default=True)
    run_type = Column(String(50), nullable=False)
    conf = Column(PickleType)
Beispiel #16
0
def upgrade():
    """Apply make xcom pkey columns non-nullable"""
    conn = op.get_bind()
    with op.batch_alter_table('xcom') as bop:
        bop.alter_column("key", type_=StringID(length=512), nullable=False)
        bop.alter_column("execution_date", type_=TIMESTAMP, nullable=False)
        if conn.dialect.name == 'mssql':
            bop.create_primary_key(
                'pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date'])
def upgrade():
    op.create_table(
        'dag_stats',
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('state', sa.String(length=50), nullable=False),
        sa.Column('count', sa.Integer(), nullable=False, default=0),
        sa.Column('dirty', sa.Boolean(), nullable=False, default=False),
        sa.PrimaryKeyConstraint('dag_id', 'state'),
    )
def upgrade():
    """Upgrade version."""
    json_type = sa.JSON
    conn = op.get_bind()

    if conn.dialect.name != "postgresql":
        # Mysql 5.7+/MariaDB 10.2.3 has JSON support. Rather than checking for
        # versions, check for the function existing.
        try:
            conn.execute("SELECT JSON_VALID(1)").fetchone()
        except (sa.exc.OperationalError, sa.exc.ProgrammingError):
            json_type = sa.Text

    op.create_table(
        'serialized_dag',
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('fileloc', sa.String(length=2000), nullable=False),
        sa.Column('fileloc_hash', sa.Integer(), nullable=False),
        sa.Column('data', json_type(), nullable=False),
        sa.Column('last_updated', sa.DateTime(), nullable=False),
        sa.PrimaryKeyConstraint('dag_id'),
    )
    op.create_index('idx_fileloc_hash', 'serialized_dag', ['fileloc_hash'])

    if conn.dialect.name == "mysql":
        conn.execute("SET time_zone = '+00:00'")
        cur = conn.execute("SELECT @@explicit_defaults_for_timestamp")
        res = cur.fetchall()
        if res[0][0] == 0:
            raise Exception(
                "Global variable explicit_defaults_for_timestamp needs to be on (1) for mysql"
            )

        op.alter_column(
            table_name="serialized_dag",
            column_name="last_updated",
            type_=mysql.TIMESTAMP(fsp=6),
            nullable=False,
        )
    else:
        # sqlite and mssql datetime are fine as is.  Therefore, not converting
        if conn.dialect.name in ("sqlite", "mssql"):
            return

        # we try to be database agnostic, but not every db (e.g. sqlserver)
        # supports per session time zones
        if conn.dialect.name == "postgresql":
            conn.execute("set timezone=UTC")

        op.alter_column(
            table_name="serialized_dag",
            column_name="last_updated",
            type_=sa.TIMESTAMP(timezone=True),
        )
def upgrade():
    """Apply Add ``DagOwnerAttributes`` table"""
    op.create_table(
        'dag_owner_attributes',
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('owner', sa.String(length=500), nullable=False),
        sa.Column('link', sa.String(length=500), nullable=False),
        sa.ForeignKeyConstraint(['dag_id'], ['dag.dag_id'],
                                ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('dag_id', 'owner'),
    )
Beispiel #20
0
def upgrade():
    """Apply Add DagTags table"""
    op.create_table(
        'dag_tag',
        sa.Column('name', sa.String(length=100), nullable=False),
        sa.Column('dag_id', StringID(), nullable=False),
        sa.ForeignKeyConstraint(
            ['dag_id'],
            ['dag.dag_id'],
        ),
        sa.PrimaryKeyConstraint('name', 'dag_id'),
    )
Beispiel #21
0
def upgrade():
    """Apply Add ``RenderedTaskInstanceFields`` table"""
    json_type = sa.JSON
    conn = op.get_bind()

    if conn.dialect.name != "postgresql":
        # Mysql 5.7+/MariaDB 10.2.3 has JSON support. Rather than checking for
        # versions, check for the function existing.
        try:
            conn.execute("SELECT JSON_VALID(1)").fetchone()
        except (sa.exc.OperationalError, sa.exc.ProgrammingError):
            json_type = sa.Text

    op.create_table(
        TABLE_NAME,
        sa.Column('dag_id', StringID(), nullable=False),
        sa.Column('task_id', StringID(), nullable=False),
        sa.Column('execution_date', sa.TIMESTAMP(timezone=True), nullable=False),
        sa.Column('rendered_fields', json_type(), nullable=False),
        sa.PrimaryKeyConstraint('dag_id', 'task_id', 'execution_date'),
    )
def downgrade():
    """Unapply Make XCom primary key columns non-nullable"""
    conn = op.get_bind()
    with op.batch_alter_table('xcom') as bop:
        # regardless of what the model defined, the `key` and `execution_date`
        # columns were always non-nullable for mysql, sqlite and postgres, so leave them alone

        if conn.dialect.name == 'mssql':
            bop.drop_constraint('pk_xcom', 'primary')
            # execution_date and key wasn't nullable in the other databases
            bop.alter_column("key", type_=StringID(length=512), nullable=True)
            bop.alter_column("execution_date", type_=TIMESTAMP, nullable=True)
Beispiel #23
0
def upgrade():
    """Apply Add DagWarning model"""
    op.create_table(
        'dag_warning',
        sa.Column('dag_id', StringID(), primary_key=True),
        sa.Column('warning_type', sa.String(length=50), primary_key=True),
        sa.Column('message', sa.Text(), nullable=False),
        sa.Column('timestamp', TIMESTAMP, nullable=False),
        sa.ForeignKeyConstraint(
            ('dag_id', ),
            ['dag.dag_id'],
            name='dcw_dag_id_fkey',
            ondelete='CASCADE',
        ),
    )
Beispiel #24
0
def upgrade():
    tables()
    dialect_name = op.get_bind().dialect.name

    with op.batch_alter_table('rendered_task_instance_fields') as batch_op:
        batch_op.add_column(sa.Column('map_index', sa.Integer(), server_default='-1', nullable=False))
        rendered_task_instance_fields.append_column(
            sa.Column('map_index', sa.Integer(), server_default='-1', nullable=False)
        )
        batch_op.add_column(sa.Column('run_id', type_=StringID(), nullable=True))

    update_query = _multi_table_update(
        dialect_name, rendered_task_instance_fields, rendered_task_instance_fields.c.run_id
    )
    op.execute(update_query)
    with op.batch_alter_table(
        'rendered_task_instance_fields', copy_from=rendered_task_instance_fields
    ) as batch_op:
        if dialect_name == 'mssql':
            constraints = get_mssql_table_constraints(op.get_bind(), 'rendered_task_instance_fields')
            pk, _ = constraints['PRIMARY KEY'].popitem()
            batch_op.drop_constraint(pk, type_='primary')
        elif dialect_name != 'sqlite':
            batch_op.drop_constraint('rendered_task_instance_fields_pkey', type_='primary')
        batch_op.alter_column('run_id', existing_type=StringID(), existing_nullable=True, nullable=False)
        batch_op.drop_column('execution_date')
        batch_op.create_primary_key(
            'rendered_task_instance_fields_pkey', ['dag_id', 'task_id', 'run_id', 'map_index']
        )
        batch_op.create_foreign_key(
            'rtif_ti_fkey',
            'task_instance',
            ['dag_id', 'task_id', 'run_id', 'map_index'],
            ['dag_id', 'task_id', 'run_id', 'map_index'],
            ondelete='CASCADE',
        )
Beispiel #25
0
def tables(for_downgrade=False):
    import sqlalchemy_jsonfield

    global task_instance, rendered_task_instance_fields, dag_run
    metadata = sa.MetaData()
    task_instance = sa.Table(
        'task_instance',
        metadata,
        sa.Column('task_id', StringID()),
        sa.Column('dag_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('execution_date', TIMESTAMP),
    )
    rendered_task_instance_fields = sa.Table(
        'rendered_task_instance_fields',
        metadata,
        sa.Column('dag_id', StringID()),
        sa.Column('task_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('execution_date', TIMESTAMP),
        sa.Column('rendered_fields', sqlalchemy_jsonfield.JSONField(), nullable=False),
        sa.Column('k8s_pod_yaml', sqlalchemy_jsonfield.JSONField(), nullable=True),
    )

    if for_downgrade:
        rendered_task_instance_fields.append_column(
            sa.Column('map_index', sa.Integer(), server_default='-1'),
        )
        rendered_task_instance_fields.append_constraint(
            ForeignKeyConstraint(
                ['dag_id', 'run_id'],
                ["dag_run.dag_id", "dag_run.run_id"],
                name='rtif_dag_run_fkey',
                ondelete="CASCADE",
            ),
        )
    dag_run = sa.Table(
        'dag_run',
        metadata,
        sa.Column('dag_id', StringID()),
        sa.Column('run_id', StringID()),
        sa.Column('execution_date', TIMESTAMP),
    )
Beispiel #26
0
def _create_dataset_dag_run_queue_table():
    op.create_table(
        'dataset_dag_run_queue',
        sa.Column('dataset_id', Integer, primary_key=True, nullable=False),
        sa.Column('target_dag_id', StringID(), primary_key=True, nullable=False),
        sa.Column('created_at', TIMESTAMP, default=func.now, nullable=False),
        sa.ForeignKeyConstraint(
            ('dataset_id',),
            ['dataset.id'],
            name="ddrq_dataset_fkey",
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ('target_dag_id',),
            ['dag.dag_id'],
            name="ddrq_dag_fkey",
            ondelete="CASCADE",
        ),
    )
Beispiel #27
0
def upgrade():
    conn = op.get_bind()
    inspector = inspect(conn)
    tables = inspector.get_table_names()

    if 'connection' not in tables:
        op.create_table(
            'connection',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('conn_id', StringID(), nullable=True),
            sa.Column('conn_type', sa.String(length=500), nullable=True),
            sa.Column('host', sa.String(length=500), nullable=True),
            sa.Column('schema', sa.String(length=500), nullable=True),
            sa.Column('login', sa.String(length=500), nullable=True),
            sa.Column('password', sa.String(length=500), nullable=True),
            sa.Column('port', sa.Integer(), nullable=True),
            sa.Column('extra', sa.String(length=5000), nullable=True),
            sa.PrimaryKeyConstraint('id'),
        )
    if 'dag' not in tables:
        op.create_table(
            'dag',
            sa.Column('dag_id', StringID(), nullable=False),
            sa.Column('is_paused', sa.Boolean(), nullable=True),
            sa.Column('is_subdag', sa.Boolean(), nullable=True),
            sa.Column('is_active', sa.Boolean(), nullable=True),
            sa.Column('last_scheduler_run', sa.DateTime(), nullable=True),
            sa.Column('last_pickled', sa.DateTime(), nullable=True),
            sa.Column('last_expired', sa.DateTime(), nullable=True),
            sa.Column('scheduler_lock', sa.Boolean(), nullable=True),
            sa.Column('pickle_id', sa.Integer(), nullable=True),
            sa.Column('fileloc', sa.String(length=2000), nullable=True),
            sa.Column('owners', sa.String(length=2000), nullable=True),
            sa.PrimaryKeyConstraint('dag_id'),
        )
    if 'dag_pickle' not in tables:
        op.create_table(
            'dag_pickle',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('pickle', sa.PickleType(), nullable=True),
            sa.Column('created_dttm', sa.DateTime(), nullable=True),
            sa.Column('pickle_hash', sa.BigInteger(), nullable=True),
            sa.PrimaryKeyConstraint('id'),
        )
    if 'import_error' not in tables:
        op.create_table(
            'import_error',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('timestamp', sa.DateTime(), nullable=True),
            sa.Column('filename', sa.String(length=1024), nullable=True),
            sa.Column('stacktrace', sa.Text(), nullable=True),
            sa.PrimaryKeyConstraint('id'),
        )
    if 'job' not in tables:
        op.create_table(
            'job',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('dag_id', sa.String(length=250), nullable=True),
            sa.Column('state', sa.String(length=20), nullable=True),
            sa.Column('job_type', sa.String(length=30), nullable=True),
            sa.Column('start_date', sa.DateTime(), nullable=True),
            sa.Column('end_date', sa.DateTime(), nullable=True),
            sa.Column('latest_heartbeat', sa.DateTime(), nullable=True),
            sa.Column('executor_class', sa.String(length=500), nullable=True),
            sa.Column('hostname', sa.String(length=500), nullable=True),
            sa.Column('unixname', sa.String(length=1000), nullable=True),
            sa.PrimaryKeyConstraint('id'),
        )
        op.create_index('job_type_heart',
                        'job', ['job_type', 'latest_heartbeat'],
                        unique=False)
    if 'log' not in tables:
        op.create_table(
            'log',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('dttm', sa.DateTime(), nullable=True),
            sa.Column('dag_id', StringID(), nullable=True),
            sa.Column('task_id', StringID(), nullable=True),
            sa.Column('event', sa.String(length=30), nullable=True),
            sa.Column('execution_date', sa.DateTime(), nullable=True),
            sa.Column('owner', sa.String(length=500), nullable=True),
            sa.PrimaryKeyConstraint('id'),
        )
    if 'sla_miss' not in tables:
        op.create_table(
            'sla_miss',
            sa.Column('task_id', StringID(), nullable=False),
            sa.Column('dag_id', StringID(), nullable=False),
            sa.Column('execution_date', sa.DateTime(), nullable=False),
            sa.Column('email_sent', sa.Boolean(), nullable=True),
            sa.Column('timestamp', sa.DateTime(), nullable=True),
            sa.Column('description', sa.Text(), nullable=True),
            sa.PrimaryKeyConstraint('task_id', 'dag_id', 'execution_date'),
        )
    if 'slot_pool' not in tables:
        op.create_table(
            'slot_pool',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('pool', StringID(length=50), nullable=True),
            sa.Column('slots', sa.Integer(), nullable=True),
            sa.Column('description', sa.Text(), nullable=True),
            sa.PrimaryKeyConstraint('id'),
            sa.UniqueConstraint('pool'),
        )
    if 'task_instance' not in tables:
        op.create_table(
            'task_instance',
            sa.Column('task_id', StringID(), nullable=False),
            sa.Column('dag_id', StringID(), nullable=False),
            sa.Column('execution_date', sa.DateTime(), nullable=False),
            sa.Column('start_date', sa.DateTime(), nullable=True),
            sa.Column('end_date', sa.DateTime(), nullable=True),
            sa.Column('duration', sa.Integer(), nullable=True),
            sa.Column('state', sa.String(length=20), nullable=True),
            sa.Column('try_number', sa.Integer(), nullable=True),
            sa.Column('hostname', sa.String(length=1000), nullable=True),
            sa.Column('unixname', sa.String(length=1000), nullable=True),
            sa.Column('job_id', sa.Integer(), nullable=True),
            sa.Column('pool', sa.String(length=50), nullable=True),
            sa.Column('queue', sa.String(length=50), nullable=True),
            sa.Column('priority_weight', sa.Integer(), nullable=True),
            sa.PrimaryKeyConstraint('task_id', 'dag_id', 'execution_date'),
        )
        op.create_index('ti_dag_state',
                        'task_instance', ['dag_id', 'state'],
                        unique=False)
        op.create_index('ti_pool',
                        'task_instance', ['pool', 'state', 'priority_weight'],
                        unique=False)
        op.create_index('ti_state_lkp',
                        'task_instance',
                        ['dag_id', 'task_id', 'execution_date', 'state'],
                        unique=False)

    if 'user' not in tables:
        op.create_table(
            'user',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('username', StringID(), nullable=True),
            sa.Column('email', sa.String(length=500), nullable=True),
            sa.PrimaryKeyConstraint('id'),
            sa.UniqueConstraint('username'),
        )
    if 'variable' not in tables:
        op.create_table(
            'variable',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('key', StringID(), nullable=True),
            sa.Column('val', sa.Text(), nullable=True),
            sa.PrimaryKeyConstraint('id'),
            sa.UniqueConstraint('key'),
        )
    if 'chart' not in tables:
        op.create_table(
            'chart',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('label', sa.String(length=200), nullable=True),
            sa.Column('conn_id', sa.String(length=250), nullable=False),
            sa.Column('user_id', sa.Integer(), nullable=True),
            sa.Column('chart_type', sa.String(length=100), nullable=True),
            sa.Column('sql_layout', sa.String(length=50), nullable=True),
            sa.Column('sql', sa.Text(), nullable=True),
            sa.Column('y_log_scale', sa.Boolean(), nullable=True),
            sa.Column('show_datatable', sa.Boolean(), nullable=True),
            sa.Column('show_sql', sa.Boolean(), nullable=True),
            sa.Column('height', sa.Integer(), nullable=True),
            sa.Column('default_params', sa.String(length=5000), nullable=True),
            sa.Column('x_is_date', sa.Boolean(), nullable=True),
            sa.Column('iteration_no', sa.Integer(), nullable=True),
            sa.Column('last_modified', sa.DateTime(), nullable=True),
            sa.ForeignKeyConstraint(
                ['user_id'],
                ['user.id'],
            ),
            sa.PrimaryKeyConstraint('id'),
        )
    if 'xcom' not in tables:
        op.create_table(
            'xcom',
            sa.Column('id', sa.Integer(), nullable=False),
            sa.Column('key', StringID(length=512), nullable=True),
            sa.Column('value', sa.PickleType(), nullable=True),
            sa.Column('timestamp',
                      sa.DateTime(),
                      default=func.now,
                      nullable=False),
            sa.Column('execution_date', sa.DateTime(), nullable=False),
            sa.Column('task_id', StringID(), nullable=False),
            sa.Column('dag_id', StringID(), nullable=False),
            sa.PrimaryKeyConstraint('id'),
        )
Beispiel #28
0
def downgrade():
    """Unapply Change ``TaskInstance`` and ``TaskReschedule`` tables from execution_date to run_id."""
    dialect_name = op.get_bind().dialect.name
    dt_type = TIMESTAMP
    string_id_col_type = StringID()

    op.add_column('task_instance', sa.Column('execution_date', dt_type, nullable=True))
    op.add_column('task_reschedule', sa.Column('execution_date', dt_type, nullable=True))

    update_query = _multi_table_update(dialect_name, task_instance, task_instance.c.execution_date)
    op.execute(update_query)

    update_query = _multi_table_update(dialect_name, task_reschedule, task_reschedule.c.execution_date)
    op.execute(update_query)

    with op.batch_alter_table('task_reschedule', schema=None) as batch_op:
        batch_op.alter_column('execution_date', existing_type=dt_type, existing_nullable=True, nullable=False)

        # Can't drop PK index while there is a FK referencing it
        batch_op.drop_constraint('task_reschedule_ti_fkey', type_='foreignkey')
        batch_op.drop_constraint('task_reschedule_dr_fkey', type_='foreignkey')
        batch_op.drop_index('idx_task_reschedule_dag_task_run')

    with op.batch_alter_table('task_instance', schema=None) as batch_op:
        batch_op.drop_constraint('task_instance_pkey', type_='primary')
        batch_op.alter_column('execution_date', existing_type=dt_type, existing_nullable=True, nullable=False)
        batch_op.alter_column(
            'dag_id', existing_type=string_id_col_type, existing_nullable=True, nullable=True
        )

        batch_op.create_primary_key('task_instance_pkey', ['dag_id', 'task_id', 'execution_date'])

        batch_op.drop_constraint('task_instance_dag_run_fkey', type_='foreignkey')
        batch_op.drop_index('ti_dag_run')
        batch_op.drop_index('ti_state_lkp')
        batch_op.create_index('ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state'])
        batch_op.create_index('ti_dag_date', ['dag_id', 'execution_date'], unique=False)

        batch_op.drop_column('run_id')

    with op.batch_alter_table('task_reschedule', schema=None) as batch_op:
        batch_op.drop_column('run_id')
        batch_op.create_index(
            'idx_task_reschedule_dag_task_date',
            ['dag_id', 'task_id', 'execution_date'],
            unique=False,
        )
        # Can only create FK once there is an index on these columns
        batch_op.create_foreign_key(
            'task_reschedule_dag_task_date_fkey',
            'task_instance',
            ['dag_id', 'task_id', 'execution_date'],
            ['dag_id', 'task_id', 'execution_date'],
            ondelete='CASCADE',
        )

    if dialect_name == "mssql":

        with op.batch_alter_table('dag_run', schema=None) as batch_op:
            batch_op.drop_constraint('dag_run_dag_id_execution_date_key', 'unique')
            batch_op.drop_constraint('dag_run_dag_id_run_id_key', 'unique')
            batch_op.drop_index('dag_id_state')
            batch_op.drop_index('idx_dag_run_running_dags')
            batch_op.drop_index('idx_dag_run_queued_dags')

            batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=True)
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=True)
            batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=True)

            batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False)
            batch_op.create_index('idx_dag_run_dag_id', ['dag_id'])
            batch_op.create_index(
                'idx_dag_run_running_dags',
                ["state", "dag_id"],
                mssql_where=sa.text("state='running'"),
            )
            batch_op.create_index(
                'idx_dag_run_queued_dags',
                ["state", "dag_id"],
                mssql_where=sa.text("state='queued'"),
            )
        op.execute(
            """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_execution_date
                    ON dag_run(dag_id,execution_date)
                    WHERE dag_id IS NOT NULL and execution_date is not null"""
        )
        op.execute(
            """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_run_id
                     ON dag_run(dag_id,run_id)
                     WHERE dag_id IS NOT NULL and run_id is not null"""
        )
    else:
        with op.batch_alter_table('dag_run', schema=None) as batch_op:
            batch_op.drop_index('dag_id_state')
            batch_op.alter_column('run_id', existing_type=sa.VARCHAR(length=250), nullable=True)
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=True)
            batch_op.alter_column('dag_id', existing_type=sa.VARCHAR(length=250), nullable=True)
            batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False)
Beispiel #29
0
def upgrade():
    """Apply Change ``TaskInstance`` and ``TaskReschedule`` tables from execution_date to run_id."""
    conn = op.get_bind()
    dialect_name = conn.dialect.name

    dt_type = TIMESTAMP
    string_id_col_type = StringID()

    if dialect_name == 'sqlite':
        naming_convention = {
            "uq": "%(table_name)s_%(column_0_N_name)s_key",
        }
        # The naming_convention force the previously un-named UNIQUE constraints to have the right name
        with op.batch_alter_table(
            'dag_run', naming_convention=naming_convention, recreate="always"
        ) as batch_op:
            batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False)
            batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False)
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False)
    elif dialect_name == 'mysql':
        with op.batch_alter_table('dag_run') as batch_op:
            batch_op.alter_column(
                'dag_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False
            )
            batch_op.alter_column(
                'run_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False
            )
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False)
            batch_op.drop_constraint('dag_id', 'unique')
            batch_op.drop_constraint('dag_id_2', 'unique')
            batch_op.create_unique_constraint(
                'dag_run_dag_id_execution_date_key', ['dag_id', 'execution_date']
            )
            batch_op.create_unique_constraint('dag_run_dag_id_run_id_key', ['dag_id', 'run_id'])
    elif dialect_name == 'mssql':

        with op.batch_alter_table('dag_run') as batch_op:
            batch_op.drop_index('idx_not_null_dag_id_execution_date')
            batch_op.drop_index('idx_not_null_dag_id_run_id')

            batch_op.drop_index('dag_id_state')
            batch_op.drop_index('idx_dag_run_dag_id')
            batch_op.drop_index('idx_dag_run_running_dags')
            batch_op.drop_index('idx_dag_run_queued_dags')

            batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False)
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False)
            batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False)

            # _Somehow_ mssql was missing these constraints entirely
            batch_op.create_unique_constraint(
                'dag_run_dag_id_execution_date_key', ['dag_id', 'execution_date']
            )
            batch_op.create_unique_constraint('dag_run_dag_id_run_id_key', ['dag_id', 'run_id'])

            batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False)
            batch_op.create_index('idx_dag_run_dag_id', ['dag_id'])
            batch_op.create_index(
                'idx_dag_run_running_dags',
                ["state", "dag_id"],
                mssql_where=sa.text("state='running'"),
            )
            batch_op.create_index(
                'idx_dag_run_queued_dags',
                ["state", "dag_id"],
                mssql_where=sa.text("state='queued'"),
            )
    else:
        # Make sure DagRun id columns are non-nullable
        with op.batch_alter_table('dag_run', schema=None) as batch_op:
            batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False)
            batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False)
            batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False)

    # First create column nullable
    op.add_column('task_instance', sa.Column('run_id', type_=string_id_col_type, nullable=True))
    op.add_column('task_reschedule', sa.Column('run_id', type_=string_id_col_type, nullable=True))

    #
    # TaskReschedule has a FK to TaskInstance, so we have to update that before
    # we can drop the TI.execution_date column

    update_query = _multi_table_update(dialect_name, task_reschedule, task_reschedule.c.run_id)
    op.execute(update_query)

    with op.batch_alter_table('task_reschedule', schema=None) as batch_op:
        batch_op.alter_column(
            'run_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False
        )

        batch_op.drop_constraint('task_reschedule_dag_task_date_fkey', 'foreignkey')
        if dialect_name == "mysql":
            # Mysql creates an index and a constraint -- we have to drop both
            batch_op.drop_index('task_reschedule_dag_task_date_fkey')
            batch_op.alter_column(
                'dag_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False
            )
        batch_op.drop_index('idx_task_reschedule_dag_task_date')

    # Then update the new column by selecting the right value from DagRun
    # But first we will drop and recreate indexes to make it faster
    if dialect_name == 'postgresql':
        # Recreate task_instance, without execution_date and with dagrun.run_id
        op.execute(
            """
            CREATE TABLE new_task_instance AS SELECT
                ti.task_id,
                ti.dag_id,
                dag_run.run_id,
                ti.start_date,
                ti.end_date,
                ti.duration,
                ti.state,
                ti.try_number,
                ti.hostname,
                ti.unixname,
                ti.job_id,
                ti.pool,
                ti.queue,
                ti.priority_weight,
                ti.operator,
                ti.queued_dttm,
                ti.pid,
                ti.max_tries,
                ti.executor_config,
                ti.pool_slots,
                ti.queued_by_job_id,
                ti.external_executor_id,
                ti.trigger_id,
                ti.trigger_timeout,
                ti.next_method,
                ti.next_kwargs
            FROM task_instance ti
            INNER JOIN dag_run ON dag_run.dag_id = ti.dag_id AND dag_run.execution_date = ti.execution_date;
        """
        )
        op.drop_table('task_instance')
        op.rename_table('new_task_instance', 'task_instance')

        # Fix up columns after the 'create table as select'
        with op.batch_alter_table('task_instance', schema=None) as batch_op:
            batch_op.alter_column(
                'pool', existing_type=string_id_col_type, existing_nullable=True, nullable=False
            )
            batch_op.alter_column('max_tries', existing_type=sa.Integer(), server_default="-1")
            batch_op.alter_column(
                'pool_slots', existing_type=sa.Integer(), existing_nullable=True, nullable=False
            )
    else:
        update_query = _multi_table_update(dialect_name, task_instance, task_instance.c.run_id)
        op.execute(update_query)

    with op.batch_alter_table('task_instance', schema=None) as batch_op:
        if dialect_name != 'postgresql':
            # TODO: Is this right for non-postgres?
            if dialect_name == 'mssql':
                constraints = get_table_constraints(conn, "task_instance")
                pk, _ = constraints['PRIMARY KEY'].popitem()
                batch_op.drop_constraint(pk, type_='primary')
            elif dialect_name not in ('sqlite'):
                batch_op.drop_constraint('task_instance_pkey', type_='primary')
            batch_op.drop_index('ti_dag_date')
            batch_op.drop_index('ti_state_lkp')
            batch_op.drop_column('execution_date')

        # Then make it non-nullable
        batch_op.alter_column(
            'run_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False
        )
        batch_op.alter_column(
            'dag_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False
        )

        batch_op.create_primary_key('task_instance_pkey', ['dag_id', 'task_id', 'run_id'])
        batch_op.create_foreign_key(
            'task_instance_dag_run_fkey',
            'dag_run',
            ['dag_id', 'run_id'],
            ['dag_id', 'run_id'],
            ondelete='CASCADE',
        )

        batch_op.create_index('ti_dag_run', ['dag_id', 'run_id'])
        batch_op.create_index('ti_state_lkp', ['dag_id', 'task_id', 'run_id', 'state'])
        if dialect_name == 'postgresql':
            batch_op.create_index('ti_dag_state', ['dag_id', 'state'])
            batch_op.create_index('ti_job_id', ['job_id'])
            batch_op.create_index('ti_pool', ['pool', 'state', 'priority_weight'])
            batch_op.create_index('ti_state', ['state'])
            batch_op.create_foreign_key(
                'task_instance_trigger_id_fkey', 'trigger', ['trigger_id'], ['id'], ondelete="CASCADE"
            )
            batch_op.create_index('ti_trigger_id', ['trigger_id'])

    with op.batch_alter_table('task_reschedule', schema=None) as batch_op:
        batch_op.drop_column('execution_date')
        batch_op.create_index(
            'idx_task_reschedule_dag_task_run',
            ['dag_id', 'task_id', 'run_id'],
            unique=False,
        )
        # _Now_ there is a unique constraint on the columns in TI we can re-create the FK from TaskReschedule
        batch_op.create_foreign_key(
            'task_reschedule_ti_fkey',
            'task_instance',
            ['dag_id', 'task_id', 'run_id'],
            ['dag_id', 'task_id', 'run_id'],
            ondelete='CASCADE',
        )

        # https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1785-database-engine-error?view=sql-server-ver15
        ondelete = 'CASCADE' if dialect_name != 'mssql' else 'NO ACTION'
        batch_op.create_foreign_key(
            'task_reschedule_dr_fkey',
            'dag_run',
            ['dag_id', 'run_id'],
            ['dag_id', 'run_id'],
            ondelete=ondelete,
        )
Beispiel #30
0
def upgrade():
    """Apply Add ``root_dag_id`` to ``DAG``"""
    op.add_column('dag', sa.Column('root_dag_id', StringID(), nullable=True))
    op.create_index('idx_root_dag_id', 'dag', ['root_dag_id'], unique=False)