def test_0_9_22_postgres_pre_run_partition(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path(__file__, "snapshot_0_9_22_pre_run_partition/postgres/pg_dump.txt"), ], env=env, ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def simple_solid(_): return 1 @pipeline def simple_pipeline(): simple_solid() tags = {PARTITION_NAME_TAG: "my_partition", PARTITION_SET_TAG: "my_partition_set"} with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision="3e0770016702"), ): execute_pipeline(simple_pipeline, tags=tags, instance=instance) # ensure migration is run instance.upgrade() result = execute_pipeline(simple_pipeline, tags=tags, instance=instance) assert result.success
def test_0_6_6_postgres(hostname, conn_string): # Init a fresh postgres with a 0.6.6 snapshot engine = create_engine(conn_string) engine.execute('drop schema public cascade;') engine.execute('create schema public;') env = os.environ.copy() env['PGPASSWORD'] = '******' subprocess.check_call( [ 'psql', '-h', hostname, '-p', '5432', '-U', 'test', '-f', file_relative_path(__file__, 'snapshot_0_6_6/postgres/pg_dump.txt'), ], env=env, ) run_id = '089287c5-964d-44c0-b727-357eb7ba522e' with seven.TemporaryDirectory() as tempdir: # Create the dagster.yaml with open(file_relative_path(__file__, 'dagster.yaml'), 'r') as template_fd: with open(os.path.join(tempdir, 'dagster.yaml'), 'w') as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Runs will appear in DB, but event logs need migration runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) assert instance.all_logs(run_id) == [] # Post migration, event logs appear in DB instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) assert len(instance.all_logs(run_id)) == 89
def test_0_10_0_schedule_wipe(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_10_0_wipe_schedules/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) with pytest.raises(DagsterInstanceMigrationRequired): with DagsterInstance.from_config(tempdir) as instance: instance.optimize_for_dagit(statement_timeout=500) with DagsterInstance.from_config(tempdir) as instance: instance.upgrade() with DagsterInstance.from_config(tempdir) as upgraded_instance: assert len(upgraded_instance.all_stored_job_state()) == 0
def test_0_10_0_schedule_wipe(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path(__file__, "snapshot_0_10_0_wipe_schedules/postgres/pg_dump.txt"), ], env=env, ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) with pytest.raises(DagsterInstanceMigrationRequired): with DagsterInstance.from_config(tempdir) as instance: instance.optimize_for_dagit(statement_timeout=500) with DagsterInstance.from_config(tempdir) as instance: instance.upgrade() with DagsterInstance.from_config(tempdir) as upgraded_instance: assert len(upgraded_instance.all_stored_job_state()) == 0
def test_0_12_0_add_mode_column(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_11_16_pre_add_mode_column/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Ensure that you don't get a migration required exception if not trying to use the # migration-required column. assert len(instance.get_runs()) == 1 @solid def basic(): pass @pipeline def noop_pipeline(): basic() # Ensure that you don't get a migration required exception when running a pipeline # pre-migration. result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 2 # Ensure that migration required exception throws, since you are trying to use the # migration-required column. with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision="7cba9eeaaf1d"), ): instance.get_runs(filters=PipelineRunsFilter(mode="the_mode")) instance.upgrade() result = execute_pipeline(noop_pipeline, instance=instance) assert result.success assert len(instance.get_runs()) == 3
def test_0_6_6_postgres(hostname, conn_string): # Init a fresh postgres with a 0.6.6 snapshot engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path(__file__, "snapshot_0_6_6/postgres/pg_dump.txt"), ], env=env, ) run_id = "089287c5-964d-44c0-b727-357eb7ba522e" with seven.TemporaryDirectory() as tempdir: # Create the dagster.yaml with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Runs will appear in DB, but event logs need migration runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) assert instance.all_logs(run_id) == [] # Post migration, event logs appear in DB instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) assert len(instance.all_logs(run_id)) == 89
def test_0_10_6_add_bulk_actions_table(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_10_6_add_bulk_actions_table/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) with pytest.raises(DagsterInstanceMigrationRequired): with DagsterInstance.from_config(tempdir) as instance: instance.get_backfills() with DagsterInstance.from_config(tempdir) as instance: instance.upgrade() with DagsterInstance.from_config(tempdir) as upgraded_instance: assert len(upgraded_instance.get_backfills()) == 0
def instance_for_test(overrides=None, set_dagster_home=True, temp_dir=None): with ExitStack() as stack: if not temp_dir: temp_dir = stack.enter_context(tempfile.TemporaryDirectory()) # If using the default run launcher, wait for any grpc processes that created runs # during test disposal to finish, since they might also be using this instance's tempdir instance_overrides = merge_dicts( { "run_launcher": { "class": "DefaultRunLauncher", "module": "dagster.core.launcher.default_run_launcher", "config": { "wait_for_processes": True, }, }, "telemetry": { "enabled": False }, }, (overrides if overrides else {}), ) if set_dagster_home: stack.enter_context( environ({ "DAGSTER_HOME": temp_dir, "DAGSTER_DISABLE_TELEMETRY": "yes" })) with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd: yaml.dump(instance_overrides, fd, default_flow_style=False) with DagsterInstance.from_config(temp_dir) as instance: try: yield instance except: sys.stderr.write( "Test raised an exception, attempting to clean up instance:" + serializable_error_info_from_exc_info( sys.exc_info()).to_string() + "\n") raise finally: cleanup_test_instance(instance)
def test_0_9_22_postgres_pre_run_partition(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_9_22_pre_run_partition/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def simple_solid(_): return 1 @pipeline def simple_pipeline(): simple_solid() tags = { PARTITION_NAME_TAG: "my_partition", PARTITION_SET_TAG: "my_partition_set" } with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision="3e0770016702"), ): execute_pipeline(simple_pipeline, tags=tags, instance=instance) # ensure migration is run instance.upgrade() result = execute_pipeline(simple_pipeline, tags=tags, instance=instance) assert result.success
def test_0_13_12_add_start_time_end_time(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_13_12_pre_start_time_end_time/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Ensure that you don't get a migration required exception if not trying to use the # migration-required column. assert len(instance.get_runs()) == 1 # Ensure that you don't get a migration required exception when running a pipeline # pre-migration. result = execute_pipeline(reconstructable(get_the_job), instance=instance) assert result.success assert len(instance.get_runs()) == 2 instance.upgrade() instance.reindex() result = execute_pipeline(reconstructable(get_the_job), instance=instance) assert result.success assert len(instance.get_runs()) == 3 latest_run_record = instance.get_run_records()[0] assert latest_run_record.end_time > latest_run_record.start_time # Verify that historical records also get updated via data migration earliest_run_record = instance.get_run_records()[-1] assert earliest_run_record.end_time > earliest_run_record.start_time
def test_compute_log_manager_from_config(s3_bucket): s3_prefix = 'foobar' dagster_yaml = ''' compute_logs: module: dagster_aws.s3.compute_log_manager class: S3ComputeLogManager config: bucket: "{s3_bucket}" local_dir: "/tmp/cool" prefix: "{s3_prefix}" '''.format(s3_bucket=s3_bucket, s3_prefix=s3_prefix) with seven.TemporaryDirectory() as tempdir: with open(os.path.join(tempdir, 'dagster.yaml'), 'wb') as f: f.write(six.ensure_binary(dagster_yaml)) instance = DagsterInstance.from_config(tempdir) assert instance.compute_log_manager._s3_bucket == s3_bucket # pylint: disable=protected-access assert instance.compute_log_manager._s3_prefix == s3_prefix # pylint: disable=protected-access
def test_compute_log_manager_from_config(gcs_bucket): s3_prefix = "foobar" dagster_yaml = """ compute_logs: module: dagster_gcp.gcs.compute_log_manager class: GCSComputeLogManager config: bucket: "{bucket}" local_dir: "/tmp/cool" prefix: "{prefix}" """.format(bucket=gcs_bucket, prefix=s3_prefix) with tempfile.TemporaryDirectory() as tempdir: with open(os.path.join(tempdir, "dagster.yaml"), "wb") as f: f.write(six.ensure_binary(dagster_yaml)) instance = DagsterInstance.from_config(tempdir) assert isinstance(instance.compute_log_manager, GCSComputeLogManager)
def test_compute_log_manager_from_config(mock_s3_bucket): s3_prefix = "foobar" dagster_yaml = """ compute_logs: module: dagster_aws.s3.compute_log_manager class: S3ComputeLogManager config: bucket: "{s3_bucket}" local_dir: "/tmp/cool" prefix: "{s3_prefix}" """.format(s3_bucket=mock_s3_bucket.name, s3_prefix=s3_prefix) with tempfile.TemporaryDirectory() as tempdir: with open(os.path.join(tempdir, "dagster.yaml"), "wb") as f: f.write(dagster_yaml.encode("utf-8")) instance = DagsterInstance.from_config(tempdir) assert (instance.compute_log_manager._s3_bucket # pylint: disable=protected-access == mock_s3_bucket.name) assert instance.compute_log_manager._s3_prefix == s3_prefix # pylint: disable=protected-access
def test_instigators_table_backcompat(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path(__file__, "snapshot_0_14_6_instigators_table.sql"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) assert not instance.schedule_storage.has_instigators_table() instance.upgrade() assert instance.schedule_storage.has_instigators_table()
def test_0_9_22_postgres_pre_asset_partition(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path( __file__, "snapshot_0_9_22_pre_asset_partition/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def asset_solid(_): yield AssetMaterialization(asset_key=AssetKey( ["path", "to", "asset"]), partition="partition_1") yield Output(1) @pipeline def asset_pipeline(): asset_solid() with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision="c9159e740d7e"), ): execute_pipeline(asset_pipeline, instance=instance) # ensure migration is run instance.upgrade() result = execute_pipeline(asset_pipeline, instance=instance) assert result.success
def test_0_11_0_add_asset_details(hostname, conn_string): _reconstruct_from_file( hostname, conn_string, file_relative_path(__file__, "snapshot_0_11_0_pre_asset_details/postgres/pg_dump.txt"), ) with tempfile.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) with DagsterInstance.from_config(tempdir) as instance: storage = instance._event_storage with pytest.raises( DagsterInstanceMigrationRequired, match=_migration_regex("event log", current_revision="3e71cf573ba6"), ): storage.get_asset_tags(AssetKey(["test"])) storage.all_asset_keys() instance.upgrade() storage.get_asset_tags(AssetKey(["test"])) storage.all_asset_keys()
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string): engine = create_engine(conn_string) engine.execute('drop schema public cascade;') engine.execute('create schema public;') env = os.environ.copy() env['PGPASSWORD'] = '******' subprocess.check_call( [ 'psql', '-h', hostname, '-p', '5432', '-U', 'test', '-f', file_relative_path( __file__, 'snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt' ), ], env=env, ) run_id = 'd5f89349-7477-4fab-913e-0925cef0a959' with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, 'dagster.yaml'), 'r') as template_fd: with open(os.path.join(tempdir, 'dagster.yaml'), 'w') as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises(DagsterInstanceMigrationRequired, match=_migration_regex(current_revision=None)): execute_pipeline(noop_pipeline, instance=instance) # ensure migration is run instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert runs[0].run_id == run_id run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def test_0_7_6_postgres_pre_add_pipeline_snapshot(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/postgres/pg_dump.txt" ), ], env=env, ) run_id = "d5f89349-7477-4fab-913e-0925cef0a959" with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises(DagsterInstanceMigrationRequired, match=_migration_regex("run", current_revision=None)): execute_pipeline(noop_pipeline, instance=instance) # ensure migration is run instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 assert runs[0].run_id == run_id run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def test_0_7_6_postgres_pre_event_log_migration(hostname, conn_string): engine = create_engine(conn_string) engine.execute("drop schema public cascade;") engine.execute("create schema public;") env = os.environ.copy() env["PGPASSWORD"] = "******" subprocess.check_call( [ "psql", "-h", hostname, "-p", "5432", "-U", "test", "-f", file_relative_path( __file__, "snapshot_0_7_6_pre_event_log_migration/postgres/pg_dump.txt"), ], env=env, ) run_id = "ca7f1e33-526d-4f75-9bc5-3e98da41ab97" with seven.TemporaryDirectory() as tempdir: with open(file_relative_path(__file__, "dagster.yaml"), "r") as template_fd: with open(os.path.join(tempdir, "dagster.yaml"), "w") as target_fd: template = template_fd.read().format(hostname=hostname) target_fd.write(template) instance = DagsterInstance.from_config(tempdir) # Runs will appear in DB, but event logs need migration runs = instance.get_runs() assert len(runs) == 1 assert instance.get_run_by_id(run_id) # Make sure the schema is migrated instance.upgrade() assert isinstance(instance._event_storage, SqlEventLogStorage) events_by_id = instance._event_storage.get_logs_for_run_by_log_id( run_id) assert len(events_by_id) == 40 step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) == 0 # run the event_log data migration migrate_event_log_data(instance=instance) step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( run_id, record_id) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) > 0