class TaskInstance(Base): # type: ignore """Task instance class.""" __tablename__ = "task_instance" task_id = Column(StringID(), primary_key=True) dag_id = Column(StringID(), primary_key=True) execution_date = Column(UtcDateTime, primary_key=True) start_date = Column(UtcDateTime) end_date = Column(UtcDateTime) duration = Column(Float) state = Column(String(20)) _try_number = Column('try_number', Integer, default=0) max_tries = Column(Integer) hostname = Column(String(1000)) unixname = Column(String(1000)) job_id = Column(Integer) pool = Column(String(50), nullable=False) pool_slots = Column(Integer, default=1) queue = Column(String(256)) priority_weight = Column(Integer) operator = Column(String(1000)) queued_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) pid = Column(Integer) executor_config = Column(PickleType(pickler=dill)) external_executor_id = Column(StringID())
def upgrade(): tables() dialect_name = op.get_bind().dialect.name op.drop_index('idx_task_fail_dag_task_date', table_name='task_fail') with op.batch_alter_table('task_fail') as batch_op: batch_op.add_column( sa.Column('map_index', sa.Integer(), server_default='-1', nullable=False)) batch_op.add_column( sa.Column('run_id', type_=StringID(), nullable=True)) update_query = _update_value_from_dag_run( dialect_name=dialect_name, target_table=task_fail, target_column=task_fail.c.run_id, join_columns=['dag_id', 'execution_date'], ) op.execute(update_query) with op.batch_alter_table('task_fail') as batch_op: batch_op.alter_column('run_id', existing_type=StringID(), existing_nullable=True, nullable=False) batch_op.drop_column('execution_date') batch_op.create_foreign_key( 'task_fail_ti_fkey', 'task_instance', ['dag_id', 'task_id', 'run_id', 'map_index'], ['dag_id', 'task_id', 'run_id', 'map_index'], ondelete='CASCADE', )
def downgrade(): """Unapply Remove smart sensors""" op.create_table( 'sensor_instance', sa.Column('id', sa.Integer(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.Column('execution_date', TIMESTAMP, nullable=False), sa.Column('state', sa.String(length=20), nullable=True), sa.Column('try_number', sa.Integer(), nullable=True), sa.Column('start_date', TIMESTAMP, nullable=True), sa.Column('operator', sa.String(length=1000), nullable=False), sa.Column('op_classpath', sa.String(length=1000), nullable=False), sa.Column('hashcode', sa.BigInteger(), nullable=False), sa.Column('shardcode', sa.Integer(), nullable=False), sa.Column('poke_context', sa.Text(), nullable=False), sa.Column('execution_context', sa.Text(), nullable=True), sa.Column('created_at', TIMESTAMP, default=func.now, nullable=False), sa.Column('updated_at', TIMESTAMP, default=func.now, nullable=False), sa.PrimaryKeyConstraint('id'), ) op.create_index('ti_primary_key', 'sensor_instance', ['dag_id', 'task_id', 'execution_date'], unique=True) op.create_index('si_hashcode', 'sensor_instance', ['hashcode'], unique=False) op.create_index('si_shardcode', 'sensor_instance', ['shardcode'], unique=False) op.create_index('si_state_shard', 'sensor_instance', ['state', 'shardcode'], unique=False) op.create_index('si_updated_at', 'sensor_instance', ['updated_at'], unique=False)
def upgrade(): conn = op.get_bind() inspector = Inspector.from_engine(conn) tables = inspector.get_table_names() if 'sensor_instance' in tables: return op.create_table( 'sensor_instance', sa.Column('id', sa.Integer(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.Column('execution_date', TIMESTAMP, nullable=False), sa.Column('state', sa.String(length=20), nullable=True), sa.Column('try_number', sa.Integer(), nullable=True), sa.Column('start_date', TIMESTAMP, nullable=True), sa.Column('operator', sa.String(length=1000), nullable=False), sa.Column('op_classpath', sa.String(length=1000), nullable=False), sa.Column('hashcode', sa.BigInteger(), nullable=False), sa.Column('shardcode', sa.Integer(), nullable=False), sa.Column('poke_context', sa.Text(), nullable=False), sa.Column('execution_context', sa.Text(), nullable=True), sa.Column('created_at', TIMESTAMP, default=func.now(), nullable=False), sa.Column('updated_at', TIMESTAMP, default=func.now(), nullable=False), sa.PrimaryKeyConstraint('id'), ) op.create_index('ti_primary_key', 'sensor_instance', ['dag_id', 'task_id', 'execution_date'], unique=True) op.create_index('si_hashcode', 'sensor_instance', ['hashcode'], unique=False) op.create_index('si_shardcode', 'sensor_instance', ['shardcode'], unique=False) op.create_index('si_state_shard', 'sensor_instance', ['state', 'shardcode'], unique=False) op.create_index('si_updated_at', 'sensor_instance', ['updated_at'], unique=False)
def upgrade(): # See 0e2a74e0fc9f_add_time_zone_awareness timestamp = TIMESTAMP if op.get_bind().dialect.name == 'mssql': # We need to keep this as it was for this old migration on mssql timestamp = sa.DateTime() op.create_table( TABLE_NAME, sa.Column('id', sa.Integer(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), # use explicit server_default=None otherwise mysql implies defaults for first timestamp column sa.Column('execution_date', timestamp, nullable=False, server_default=None), sa.Column('try_number', sa.Integer(), nullable=False), sa.Column('start_date', timestamp, nullable=False), sa.Column('end_date', timestamp, nullable=False), sa.Column('duration', sa.Integer(), nullable=False), sa.Column('reschedule_date', timestamp, nullable=False), sa.PrimaryKeyConstraint('id'), sa.ForeignKeyConstraint( ['task_id', 'dag_id', 'execution_date'], [ 'task_instance.task_id', 'task_instance.dag_id', 'task_instance.execution_date' ], name='task_reschedule_dag_task_date_fkey', ), ) op.create_index(INDEX_NAME, TABLE_NAME, ['dag_id', 'task_id', 'execution_date'], unique=False)
def _get_dagrun_table() -> Table: return Table( "dag_run", metadata, Column("id", Integer, primary_key=True), Column("dag_id", StringID(), nullable=False), Column("run_id", StringID(), nullable=False), Column("execution_date", TIMESTAMP, nullable=False), )
def _get_old_xcom_columns() -> Sequence[Column]: return [ Column("key", StringID(length=512), nullable=False, primary_key=True), Column("value", LargeBinary), Column("timestamp", TIMESTAMP, nullable=False), Column("task_id", StringID(length=250), nullable=False, primary_key=True), Column("dag_id", StringID(length=250), nullable=False, primary_key=True), Column("execution_date", TIMESTAMP, nullable=False, primary_key=True), ]
def _get_new_xcom_columns() -> Sequence[Column]: return [ Column("dag_run_id", Integer(), nullable=False), Column("task_id", StringID(), nullable=False), Column("key", StringID(length=512), nullable=False), Column("value", LargeBinary), Column("timestamp", TIMESTAMP, nullable=False), Column("dag_id", StringID(), nullable=False), Column("run_id", StringID(), nullable=False), ]
class TaskInstance(Base): # type: ignore """Task Instance class.""" __tablename__ = "task_instance" task_id = Column(StringID(), primary_key=True) dag_id = Column(StringID(), primary_key=True) execution_date = Column(sa.DateTime, primary_key=True) max_tries = Column(Integer) try_number = Column(Integer, default=0)
def tables(): global task_instance, task_fail, dag_run metadata = sa.MetaData() task_instance = sa.Table( 'task_instance', metadata, sa.Column('task_id', StringID()), sa.Column('dag_id', StringID()), sa.Column('run_id', StringID()), sa.Column('map_index', sa.Integer(), server_default='-1'), sa.Column('execution_date', TIMESTAMP), ) task_fail = sa.Table( 'task_fail', metadata, sa.Column('dag_id', StringID()), sa.Column('task_id', StringID()), sa.Column('run_id', StringID()), sa.Column('map_index', StringID()), sa.Column('execution_date', TIMESTAMP), ) dag_run = sa.Table( 'dag_run', metadata, sa.Column('dag_id', StringID()), sa.Column('run_id', StringID()), sa.Column('execution_date', TIMESTAMP), )
def upgrade(): op.create_table( 'task_fail', sa.Column('id', sa.Integer(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.Column('execution_date', sa.DateTime(), nullable=False), sa.Column('start_date', sa.DateTime(), nullable=True), sa.Column('end_date', sa.DateTime(), nullable=True), sa.Column('duration', sa.Integer(), nullable=True), sa.PrimaryKeyConstraint('id'), )
def upgrade(): op.create_table( 'dag_run', sa.Column('id', sa.Integer(), nullable=False), sa.Column('dag_id', StringID(), nullable=True), sa.Column('execution_date', sa.DateTime(), nullable=True), sa.Column('state', sa.String(length=50), nullable=True), sa.Column('run_id', StringID(), nullable=True), sa.Column('external_trigger', sa.Boolean(), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('dag_id', 'execution_date'), sa.UniqueConstraint('dag_id', 'run_id'), )
def upgrade(): """Apply Increase length of ``Flask-AppBuilder`` ``ab_view_menu.name`` column""" conn = op.get_bind() inspector = Inspector.from_engine(conn) tables = inspector.get_table_names() if "ab_view_menu" in tables: if conn.dialect.name == "sqlite": op.execute("PRAGMA foreign_keys=off") op.execute(""" CREATE TABLE IF NOT EXISTS ab_view_menu_dg_tmp ( id INTEGER NOT NULL PRIMARY KEY, name VARCHAR(250) NOT NULL UNIQUE ); """) op.execute( "INSERT INTO ab_view_menu_dg_tmp(id, name) select id, name from ab_view_menu;" ) op.execute("DROP TABLE ab_view_menu") op.execute( "ALTER TABLE ab_view_menu_dg_tmp rename to ab_view_menu;") op.execute("PRAGMA foreign_keys=on") else: op.alter_column( table_name='ab_view_menu', column_name='name', type_=StringID(length=250), nullable=False, )
def downgrade(): """Unapply make xcom pkey columns non-nullable""" conn = op.get_bind() with op.batch_alter_table('xcom') as bop: if conn.dialect.name == 'mssql': bop.drop_constraint('pk_xcom', 'primary') bop.alter_column("key", type_=StringID(length=512), nullable=True) bop.alter_column("execution_date", type_=TIMESTAMP, nullable=True)
class DagRun(Base): # type: ignore """ DagRun describes an instance of a Dag. It can be created by the scheduler (for regular runs) or by an external trigger """ __tablename__ = "dag_run" id = Column(Integer, primary_key=True) dag_id = Column(StringID()) execution_date = Column(UtcDateTime, default=timezone.utcnow) start_date = Column(UtcDateTime, default=timezone.utcnow) end_date = Column(UtcDateTime) _state = Column('state', String(50), default=State.RUNNING) run_id = Column(StringID()) external_trigger = Column(Boolean, default=True) run_type = Column(String(50), nullable=False) conf = Column(PickleType)
def upgrade(): """Apply make xcom pkey columns non-nullable""" conn = op.get_bind() with op.batch_alter_table('xcom') as bop: bop.alter_column("key", type_=StringID(length=512), nullable=False) bop.alter_column("execution_date", type_=TIMESTAMP, nullable=False) if conn.dialect.name == 'mssql': bop.create_primary_key( 'pk_xcom', ['dag_id', 'task_id', 'key', 'execution_date'])
def upgrade(): op.create_table( 'dag_stats', sa.Column('dag_id', StringID(), nullable=False), sa.Column('state', sa.String(length=50), nullable=False), sa.Column('count', sa.Integer(), nullable=False, default=0), sa.Column('dirty', sa.Boolean(), nullable=False, default=False), sa.PrimaryKeyConstraint('dag_id', 'state'), )
def upgrade(): """Upgrade version.""" json_type = sa.JSON conn = op.get_bind() if conn.dialect.name != "postgresql": # Mysql 5.7+/MariaDB 10.2.3 has JSON support. Rather than checking for # versions, check for the function existing. try: conn.execute("SELECT JSON_VALID(1)").fetchone() except (sa.exc.OperationalError, sa.exc.ProgrammingError): json_type = sa.Text op.create_table( 'serialized_dag', sa.Column('dag_id', StringID(), nullable=False), sa.Column('fileloc', sa.String(length=2000), nullable=False), sa.Column('fileloc_hash', sa.Integer(), nullable=False), sa.Column('data', json_type(), nullable=False), sa.Column('last_updated', sa.DateTime(), nullable=False), sa.PrimaryKeyConstraint('dag_id'), ) op.create_index('idx_fileloc_hash', 'serialized_dag', ['fileloc_hash']) if conn.dialect.name == "mysql": conn.execute("SET time_zone = '+00:00'") cur = conn.execute("SELECT @@explicit_defaults_for_timestamp") res = cur.fetchall() if res[0][0] == 0: raise Exception( "Global variable explicit_defaults_for_timestamp needs to be on (1) for mysql" ) op.alter_column( table_name="serialized_dag", column_name="last_updated", type_=mysql.TIMESTAMP(fsp=6), nullable=False, ) else: # sqlite and mssql datetime are fine as is. Therefore, not converting if conn.dialect.name in ("sqlite", "mssql"): return # we try to be database agnostic, but not every db (e.g. sqlserver) # supports per session time zones if conn.dialect.name == "postgresql": conn.execute("set timezone=UTC") op.alter_column( table_name="serialized_dag", column_name="last_updated", type_=sa.TIMESTAMP(timezone=True), )
def upgrade(): """Apply Add ``DagOwnerAttributes`` table""" op.create_table( 'dag_owner_attributes', sa.Column('dag_id', StringID(), nullable=False), sa.Column('owner', sa.String(length=500), nullable=False), sa.Column('link', sa.String(length=500), nullable=False), sa.ForeignKeyConstraint(['dag_id'], ['dag.dag_id'], ondelete='CASCADE'), sa.PrimaryKeyConstraint('dag_id', 'owner'), )
def upgrade(): """Apply Add DagTags table""" op.create_table( 'dag_tag', sa.Column('name', sa.String(length=100), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.ForeignKeyConstraint( ['dag_id'], ['dag.dag_id'], ), sa.PrimaryKeyConstraint('name', 'dag_id'), )
def upgrade(): """Apply Add ``RenderedTaskInstanceFields`` table""" json_type = sa.JSON conn = op.get_bind() if conn.dialect.name != "postgresql": # Mysql 5.7+/MariaDB 10.2.3 has JSON support. Rather than checking for # versions, check for the function existing. try: conn.execute("SELECT JSON_VALID(1)").fetchone() except (sa.exc.OperationalError, sa.exc.ProgrammingError): json_type = sa.Text op.create_table( TABLE_NAME, sa.Column('dag_id', StringID(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('execution_date', sa.TIMESTAMP(timezone=True), nullable=False), sa.Column('rendered_fields', json_type(), nullable=False), sa.PrimaryKeyConstraint('dag_id', 'task_id', 'execution_date'), )
def downgrade(): """Unapply Make XCom primary key columns non-nullable""" conn = op.get_bind() with op.batch_alter_table('xcom') as bop: # regardless of what the model defined, the `key` and `execution_date` # columns were always non-nullable for mysql, sqlite and postgres, so leave them alone if conn.dialect.name == 'mssql': bop.drop_constraint('pk_xcom', 'primary') # execution_date and key wasn't nullable in the other databases bop.alter_column("key", type_=StringID(length=512), nullable=True) bop.alter_column("execution_date", type_=TIMESTAMP, nullable=True)
def upgrade(): """Apply Add DagWarning model""" op.create_table( 'dag_warning', sa.Column('dag_id', StringID(), primary_key=True), sa.Column('warning_type', sa.String(length=50), primary_key=True), sa.Column('message', sa.Text(), nullable=False), sa.Column('timestamp', TIMESTAMP, nullable=False), sa.ForeignKeyConstraint( ('dag_id', ), ['dag.dag_id'], name='dcw_dag_id_fkey', ondelete='CASCADE', ), )
def upgrade(): tables() dialect_name = op.get_bind().dialect.name with op.batch_alter_table('rendered_task_instance_fields') as batch_op: batch_op.add_column(sa.Column('map_index', sa.Integer(), server_default='-1', nullable=False)) rendered_task_instance_fields.append_column( sa.Column('map_index', sa.Integer(), server_default='-1', nullable=False) ) batch_op.add_column(sa.Column('run_id', type_=StringID(), nullable=True)) update_query = _multi_table_update( dialect_name, rendered_task_instance_fields, rendered_task_instance_fields.c.run_id ) op.execute(update_query) with op.batch_alter_table( 'rendered_task_instance_fields', copy_from=rendered_task_instance_fields ) as batch_op: if dialect_name == 'mssql': constraints = get_mssql_table_constraints(op.get_bind(), 'rendered_task_instance_fields') pk, _ = constraints['PRIMARY KEY'].popitem() batch_op.drop_constraint(pk, type_='primary') elif dialect_name != 'sqlite': batch_op.drop_constraint('rendered_task_instance_fields_pkey', type_='primary') batch_op.alter_column('run_id', existing_type=StringID(), existing_nullable=True, nullable=False) batch_op.drop_column('execution_date') batch_op.create_primary_key( 'rendered_task_instance_fields_pkey', ['dag_id', 'task_id', 'run_id', 'map_index'] ) batch_op.create_foreign_key( 'rtif_ti_fkey', 'task_instance', ['dag_id', 'task_id', 'run_id', 'map_index'], ['dag_id', 'task_id', 'run_id', 'map_index'], ondelete='CASCADE', )
def tables(for_downgrade=False): import sqlalchemy_jsonfield global task_instance, rendered_task_instance_fields, dag_run metadata = sa.MetaData() task_instance = sa.Table( 'task_instance', metadata, sa.Column('task_id', StringID()), sa.Column('dag_id', StringID()), sa.Column('run_id', StringID()), sa.Column('execution_date', TIMESTAMP), ) rendered_task_instance_fields = sa.Table( 'rendered_task_instance_fields', metadata, sa.Column('dag_id', StringID()), sa.Column('task_id', StringID()), sa.Column('run_id', StringID()), sa.Column('execution_date', TIMESTAMP), sa.Column('rendered_fields', sqlalchemy_jsonfield.JSONField(), nullable=False), sa.Column('k8s_pod_yaml', sqlalchemy_jsonfield.JSONField(), nullable=True), ) if for_downgrade: rendered_task_instance_fields.append_column( sa.Column('map_index', sa.Integer(), server_default='-1'), ) rendered_task_instance_fields.append_constraint( ForeignKeyConstraint( ['dag_id', 'run_id'], ["dag_run.dag_id", "dag_run.run_id"], name='rtif_dag_run_fkey', ondelete="CASCADE", ), ) dag_run = sa.Table( 'dag_run', metadata, sa.Column('dag_id', StringID()), sa.Column('run_id', StringID()), sa.Column('execution_date', TIMESTAMP), )
def _create_dataset_dag_run_queue_table(): op.create_table( 'dataset_dag_run_queue', sa.Column('dataset_id', Integer, primary_key=True, nullable=False), sa.Column('target_dag_id', StringID(), primary_key=True, nullable=False), sa.Column('created_at', TIMESTAMP, default=func.now, nullable=False), sa.ForeignKeyConstraint( ('dataset_id',), ['dataset.id'], name="ddrq_dataset_fkey", ondelete="CASCADE", ), sa.ForeignKeyConstraint( ('target_dag_id',), ['dag.dag_id'], name="ddrq_dag_fkey", ondelete="CASCADE", ), )
def upgrade(): conn = op.get_bind() inspector = inspect(conn) tables = inspector.get_table_names() if 'connection' not in tables: op.create_table( 'connection', sa.Column('id', sa.Integer(), nullable=False), sa.Column('conn_id', StringID(), nullable=True), sa.Column('conn_type', sa.String(length=500), nullable=True), sa.Column('host', sa.String(length=500), nullable=True), sa.Column('schema', sa.String(length=500), nullable=True), sa.Column('login', sa.String(length=500), nullable=True), sa.Column('password', sa.String(length=500), nullable=True), sa.Column('port', sa.Integer(), nullable=True), sa.Column('extra', sa.String(length=5000), nullable=True), sa.PrimaryKeyConstraint('id'), ) if 'dag' not in tables: op.create_table( 'dag', sa.Column('dag_id', StringID(), nullable=False), sa.Column('is_paused', sa.Boolean(), nullable=True), sa.Column('is_subdag', sa.Boolean(), nullable=True), sa.Column('is_active', sa.Boolean(), nullable=True), sa.Column('last_scheduler_run', sa.DateTime(), nullable=True), sa.Column('last_pickled', sa.DateTime(), nullable=True), sa.Column('last_expired', sa.DateTime(), nullable=True), sa.Column('scheduler_lock', sa.Boolean(), nullable=True), sa.Column('pickle_id', sa.Integer(), nullable=True), sa.Column('fileloc', sa.String(length=2000), nullable=True), sa.Column('owners', sa.String(length=2000), nullable=True), sa.PrimaryKeyConstraint('dag_id'), ) if 'dag_pickle' not in tables: op.create_table( 'dag_pickle', sa.Column('id', sa.Integer(), nullable=False), sa.Column('pickle', sa.PickleType(), nullable=True), sa.Column('created_dttm', sa.DateTime(), nullable=True), sa.Column('pickle_hash', sa.BigInteger(), nullable=True), sa.PrimaryKeyConstraint('id'), ) if 'import_error' not in tables: op.create_table( 'import_error', sa.Column('id', sa.Integer(), nullable=False), sa.Column('timestamp', sa.DateTime(), nullable=True), sa.Column('filename', sa.String(length=1024), nullable=True), sa.Column('stacktrace', sa.Text(), nullable=True), sa.PrimaryKeyConstraint('id'), ) if 'job' not in tables: op.create_table( 'job', sa.Column('id', sa.Integer(), nullable=False), sa.Column('dag_id', sa.String(length=250), nullable=True), sa.Column('state', sa.String(length=20), nullable=True), sa.Column('job_type', sa.String(length=30), nullable=True), sa.Column('start_date', sa.DateTime(), nullable=True), sa.Column('end_date', sa.DateTime(), nullable=True), sa.Column('latest_heartbeat', sa.DateTime(), nullable=True), sa.Column('executor_class', sa.String(length=500), nullable=True), sa.Column('hostname', sa.String(length=500), nullable=True), sa.Column('unixname', sa.String(length=1000), nullable=True), sa.PrimaryKeyConstraint('id'), ) op.create_index('job_type_heart', 'job', ['job_type', 'latest_heartbeat'], unique=False) if 'log' not in tables: op.create_table( 'log', sa.Column('id', sa.Integer(), nullable=False), sa.Column('dttm', sa.DateTime(), nullable=True), sa.Column('dag_id', StringID(), nullable=True), sa.Column('task_id', StringID(), nullable=True), sa.Column('event', sa.String(length=30), nullable=True), sa.Column('execution_date', sa.DateTime(), nullable=True), sa.Column('owner', sa.String(length=500), nullable=True), sa.PrimaryKeyConstraint('id'), ) if 'sla_miss' not in tables: op.create_table( 'sla_miss', sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.Column('execution_date', sa.DateTime(), nullable=False), sa.Column('email_sent', sa.Boolean(), nullable=True), sa.Column('timestamp', sa.DateTime(), nullable=True), sa.Column('description', sa.Text(), nullable=True), sa.PrimaryKeyConstraint('task_id', 'dag_id', 'execution_date'), ) if 'slot_pool' not in tables: op.create_table( 'slot_pool', sa.Column('id', sa.Integer(), nullable=False), sa.Column('pool', StringID(length=50), nullable=True), sa.Column('slots', sa.Integer(), nullable=True), sa.Column('description', sa.Text(), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('pool'), ) if 'task_instance' not in tables: op.create_table( 'task_instance', sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.Column('execution_date', sa.DateTime(), nullable=False), sa.Column('start_date', sa.DateTime(), nullable=True), sa.Column('end_date', sa.DateTime(), nullable=True), sa.Column('duration', sa.Integer(), nullable=True), sa.Column('state', sa.String(length=20), nullable=True), sa.Column('try_number', sa.Integer(), nullable=True), sa.Column('hostname', sa.String(length=1000), nullable=True), sa.Column('unixname', sa.String(length=1000), nullable=True), sa.Column('job_id', sa.Integer(), nullable=True), sa.Column('pool', sa.String(length=50), nullable=True), sa.Column('queue', sa.String(length=50), nullable=True), sa.Column('priority_weight', sa.Integer(), nullable=True), sa.PrimaryKeyConstraint('task_id', 'dag_id', 'execution_date'), ) op.create_index('ti_dag_state', 'task_instance', ['dag_id', 'state'], unique=False) op.create_index('ti_pool', 'task_instance', ['pool', 'state', 'priority_weight'], unique=False) op.create_index('ti_state_lkp', 'task_instance', ['dag_id', 'task_id', 'execution_date', 'state'], unique=False) if 'user' not in tables: op.create_table( 'user', sa.Column('id', sa.Integer(), nullable=False), sa.Column('username', StringID(), nullable=True), sa.Column('email', sa.String(length=500), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('username'), ) if 'variable' not in tables: op.create_table( 'variable', sa.Column('id', sa.Integer(), nullable=False), sa.Column('key', StringID(), nullable=True), sa.Column('val', sa.Text(), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('key'), ) if 'chart' not in tables: op.create_table( 'chart', sa.Column('id', sa.Integer(), nullable=False), sa.Column('label', sa.String(length=200), nullable=True), sa.Column('conn_id', sa.String(length=250), nullable=False), sa.Column('user_id', sa.Integer(), nullable=True), sa.Column('chart_type', sa.String(length=100), nullable=True), sa.Column('sql_layout', sa.String(length=50), nullable=True), sa.Column('sql', sa.Text(), nullable=True), sa.Column('y_log_scale', sa.Boolean(), nullable=True), sa.Column('show_datatable', sa.Boolean(), nullable=True), sa.Column('show_sql', sa.Boolean(), nullable=True), sa.Column('height', sa.Integer(), nullable=True), sa.Column('default_params', sa.String(length=5000), nullable=True), sa.Column('x_is_date', sa.Boolean(), nullable=True), sa.Column('iteration_no', sa.Integer(), nullable=True), sa.Column('last_modified', sa.DateTime(), nullable=True), sa.ForeignKeyConstraint( ['user_id'], ['user.id'], ), sa.PrimaryKeyConstraint('id'), ) if 'xcom' not in tables: op.create_table( 'xcom', sa.Column('id', sa.Integer(), nullable=False), sa.Column('key', StringID(length=512), nullable=True), sa.Column('value', sa.PickleType(), nullable=True), sa.Column('timestamp', sa.DateTime(), default=func.now, nullable=False), sa.Column('execution_date', sa.DateTime(), nullable=False), sa.Column('task_id', StringID(), nullable=False), sa.Column('dag_id', StringID(), nullable=False), sa.PrimaryKeyConstraint('id'), )
def downgrade(): """Unapply Change ``TaskInstance`` and ``TaskReschedule`` tables from execution_date to run_id.""" dialect_name = op.get_bind().dialect.name dt_type = TIMESTAMP string_id_col_type = StringID() op.add_column('task_instance', sa.Column('execution_date', dt_type, nullable=True)) op.add_column('task_reschedule', sa.Column('execution_date', dt_type, nullable=True)) update_query = _multi_table_update(dialect_name, task_instance, task_instance.c.execution_date) op.execute(update_query) update_query = _multi_table_update(dialect_name, task_reschedule, task_reschedule.c.execution_date) op.execute(update_query) with op.batch_alter_table('task_reschedule', schema=None) as batch_op: batch_op.alter_column('execution_date', existing_type=dt_type, existing_nullable=True, nullable=False) # Can't drop PK index while there is a FK referencing it batch_op.drop_constraint('task_reschedule_ti_fkey', type_='foreignkey') batch_op.drop_constraint('task_reschedule_dr_fkey', type_='foreignkey') batch_op.drop_index('idx_task_reschedule_dag_task_run') with op.batch_alter_table('task_instance', schema=None) as batch_op: batch_op.drop_constraint('task_instance_pkey', type_='primary') batch_op.alter_column('execution_date', existing_type=dt_type, existing_nullable=True, nullable=False) batch_op.alter_column( 'dag_id', existing_type=string_id_col_type, existing_nullable=True, nullable=True ) batch_op.create_primary_key('task_instance_pkey', ['dag_id', 'task_id', 'execution_date']) batch_op.drop_constraint('task_instance_dag_run_fkey', type_='foreignkey') batch_op.drop_index('ti_dag_run') batch_op.drop_index('ti_state_lkp') batch_op.create_index('ti_state_lkp', ['dag_id', 'task_id', 'execution_date', 'state']) batch_op.create_index('ti_dag_date', ['dag_id', 'execution_date'], unique=False) batch_op.drop_column('run_id') with op.batch_alter_table('task_reschedule', schema=None) as batch_op: batch_op.drop_column('run_id') batch_op.create_index( 'idx_task_reschedule_dag_task_date', ['dag_id', 'task_id', 'execution_date'], unique=False, ) # Can only create FK once there is an index on these columns batch_op.create_foreign_key( 'task_reschedule_dag_task_date_fkey', 'task_instance', ['dag_id', 'task_id', 'execution_date'], ['dag_id', 'task_id', 'execution_date'], ondelete='CASCADE', ) if dialect_name == "mssql": with op.batch_alter_table('dag_run', schema=None) as batch_op: batch_op.drop_constraint('dag_run_dag_id_execution_date_key', 'unique') batch_op.drop_constraint('dag_run_dag_id_run_id_key', 'unique') batch_op.drop_index('dag_id_state') batch_op.drop_index('idx_dag_run_running_dags') batch_op.drop_index('idx_dag_run_queued_dags') batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=True) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=True) batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=True) batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False) batch_op.create_index('idx_dag_run_dag_id', ['dag_id']) batch_op.create_index( 'idx_dag_run_running_dags', ["state", "dag_id"], mssql_where=sa.text("state='running'"), ) batch_op.create_index( 'idx_dag_run_queued_dags', ["state", "dag_id"], mssql_where=sa.text("state='queued'"), ) op.execute( """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_execution_date ON dag_run(dag_id,execution_date) WHERE dag_id IS NOT NULL and execution_date is not null""" ) op.execute( """CREATE UNIQUE NONCLUSTERED INDEX idx_not_null_dag_id_run_id ON dag_run(dag_id,run_id) WHERE dag_id IS NOT NULL and run_id is not null""" ) else: with op.batch_alter_table('dag_run', schema=None) as batch_op: batch_op.drop_index('dag_id_state') batch_op.alter_column('run_id', existing_type=sa.VARCHAR(length=250), nullable=True) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=True) batch_op.alter_column('dag_id', existing_type=sa.VARCHAR(length=250), nullable=True) batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False)
def upgrade(): """Apply Change ``TaskInstance`` and ``TaskReschedule`` tables from execution_date to run_id.""" conn = op.get_bind() dialect_name = conn.dialect.name dt_type = TIMESTAMP string_id_col_type = StringID() if dialect_name == 'sqlite': naming_convention = { "uq": "%(table_name)s_%(column_0_N_name)s_key", } # The naming_convention force the previously un-named UNIQUE constraints to have the right name with op.batch_alter_table( 'dag_run', naming_convention=naming_convention, recreate="always" ) as batch_op: batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False) batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False) elif dialect_name == 'mysql': with op.batch_alter_table('dag_run') as batch_op: batch_op.alter_column( 'dag_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False ) batch_op.alter_column( 'run_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False ) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False) batch_op.drop_constraint('dag_id', 'unique') batch_op.drop_constraint('dag_id_2', 'unique') batch_op.create_unique_constraint( 'dag_run_dag_id_execution_date_key', ['dag_id', 'execution_date'] ) batch_op.create_unique_constraint('dag_run_dag_id_run_id_key', ['dag_id', 'run_id']) elif dialect_name == 'mssql': with op.batch_alter_table('dag_run') as batch_op: batch_op.drop_index('idx_not_null_dag_id_execution_date') batch_op.drop_index('idx_not_null_dag_id_run_id') batch_op.drop_index('dag_id_state') batch_op.drop_index('idx_dag_run_dag_id') batch_op.drop_index('idx_dag_run_running_dags') batch_op.drop_index('idx_dag_run_queued_dags') batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False) batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False) # _Somehow_ mssql was missing these constraints entirely batch_op.create_unique_constraint( 'dag_run_dag_id_execution_date_key', ['dag_id', 'execution_date'] ) batch_op.create_unique_constraint('dag_run_dag_id_run_id_key', ['dag_id', 'run_id']) batch_op.create_index('dag_id_state', ['dag_id', 'state'], unique=False) batch_op.create_index('idx_dag_run_dag_id', ['dag_id']) batch_op.create_index( 'idx_dag_run_running_dags', ["state", "dag_id"], mssql_where=sa.text("state='running'"), ) batch_op.create_index( 'idx_dag_run_queued_dags', ["state", "dag_id"], mssql_where=sa.text("state='queued'"), ) else: # Make sure DagRun id columns are non-nullable with op.batch_alter_table('dag_run', schema=None) as batch_op: batch_op.alter_column('dag_id', existing_type=string_id_col_type, nullable=False) batch_op.alter_column('execution_date', existing_type=dt_type, nullable=False) batch_op.alter_column('run_id', existing_type=string_id_col_type, nullable=False) # First create column nullable op.add_column('task_instance', sa.Column('run_id', type_=string_id_col_type, nullable=True)) op.add_column('task_reschedule', sa.Column('run_id', type_=string_id_col_type, nullable=True)) # # TaskReschedule has a FK to TaskInstance, so we have to update that before # we can drop the TI.execution_date column update_query = _multi_table_update(dialect_name, task_reschedule, task_reschedule.c.run_id) op.execute(update_query) with op.batch_alter_table('task_reschedule', schema=None) as batch_op: batch_op.alter_column( 'run_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False ) batch_op.drop_constraint('task_reschedule_dag_task_date_fkey', 'foreignkey') if dialect_name == "mysql": # Mysql creates an index and a constraint -- we have to drop both batch_op.drop_index('task_reschedule_dag_task_date_fkey') batch_op.alter_column( 'dag_id', existing_type=sa.String(length=ID_LEN), type_=string_id_col_type, nullable=False ) batch_op.drop_index('idx_task_reschedule_dag_task_date') # Then update the new column by selecting the right value from DagRun # But first we will drop and recreate indexes to make it faster if dialect_name == 'postgresql': # Recreate task_instance, without execution_date and with dagrun.run_id op.execute( """ CREATE TABLE new_task_instance AS SELECT ti.task_id, ti.dag_id, dag_run.run_id, ti.start_date, ti.end_date, ti.duration, ti.state, ti.try_number, ti.hostname, ti.unixname, ti.job_id, ti.pool, ti.queue, ti.priority_weight, ti.operator, ti.queued_dttm, ti.pid, ti.max_tries, ti.executor_config, ti.pool_slots, ti.queued_by_job_id, ti.external_executor_id, ti.trigger_id, ti.trigger_timeout, ti.next_method, ti.next_kwargs FROM task_instance ti INNER JOIN dag_run ON dag_run.dag_id = ti.dag_id AND dag_run.execution_date = ti.execution_date; """ ) op.drop_table('task_instance') op.rename_table('new_task_instance', 'task_instance') # Fix up columns after the 'create table as select' with op.batch_alter_table('task_instance', schema=None) as batch_op: batch_op.alter_column( 'pool', existing_type=string_id_col_type, existing_nullable=True, nullable=False ) batch_op.alter_column('max_tries', existing_type=sa.Integer(), server_default="-1") batch_op.alter_column( 'pool_slots', existing_type=sa.Integer(), existing_nullable=True, nullable=False ) else: update_query = _multi_table_update(dialect_name, task_instance, task_instance.c.run_id) op.execute(update_query) with op.batch_alter_table('task_instance', schema=None) as batch_op: if dialect_name != 'postgresql': # TODO: Is this right for non-postgres? if dialect_name == 'mssql': constraints = get_table_constraints(conn, "task_instance") pk, _ = constraints['PRIMARY KEY'].popitem() batch_op.drop_constraint(pk, type_='primary') elif dialect_name not in ('sqlite'): batch_op.drop_constraint('task_instance_pkey', type_='primary') batch_op.drop_index('ti_dag_date') batch_op.drop_index('ti_state_lkp') batch_op.drop_column('execution_date') # Then make it non-nullable batch_op.alter_column( 'run_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False ) batch_op.alter_column( 'dag_id', existing_type=string_id_col_type, existing_nullable=True, nullable=False ) batch_op.create_primary_key('task_instance_pkey', ['dag_id', 'task_id', 'run_id']) batch_op.create_foreign_key( 'task_instance_dag_run_fkey', 'dag_run', ['dag_id', 'run_id'], ['dag_id', 'run_id'], ondelete='CASCADE', ) batch_op.create_index('ti_dag_run', ['dag_id', 'run_id']) batch_op.create_index('ti_state_lkp', ['dag_id', 'task_id', 'run_id', 'state']) if dialect_name == 'postgresql': batch_op.create_index('ti_dag_state', ['dag_id', 'state']) batch_op.create_index('ti_job_id', ['job_id']) batch_op.create_index('ti_pool', ['pool', 'state', 'priority_weight']) batch_op.create_index('ti_state', ['state']) batch_op.create_foreign_key( 'task_instance_trigger_id_fkey', 'trigger', ['trigger_id'], ['id'], ondelete="CASCADE" ) batch_op.create_index('ti_trigger_id', ['trigger_id']) with op.batch_alter_table('task_reschedule', schema=None) as batch_op: batch_op.drop_column('execution_date') batch_op.create_index( 'idx_task_reschedule_dag_task_run', ['dag_id', 'task_id', 'run_id'], unique=False, ) # _Now_ there is a unique constraint on the columns in TI we can re-create the FK from TaskReschedule batch_op.create_foreign_key( 'task_reschedule_ti_fkey', 'task_instance', ['dag_id', 'task_id', 'run_id'], ['dag_id', 'task_id', 'run_id'], ondelete='CASCADE', ) # https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1785-database-engine-error?view=sql-server-ver15 ondelete = 'CASCADE' if dialect_name != 'mssql' else 'NO ACTION' batch_op.create_foreign_key( 'task_reschedule_dr_fkey', 'dag_run', ['dag_id', 'run_id'], ['dag_id', 'run_id'], ondelete=ondelete, )
def upgrade(): """Apply Add ``root_dag_id`` to ``DAG``""" op.add_column('dag', sa.Column('root_dag_id', StringID(), nullable=True)) op.create_index('idx_root_dag_id', 'dag', ['root_dag_id'], unique=False)