def _get_instance(): if is_dagster_home_set(): with DagsterInstance.get() as instance: yield instance else: with tempfile.TemporaryDirectory() as tempdir: click.echo( f"Using temporary directory {tempdir} for storage. This will be removed when dagit exits.\n" "To persist information across sessions, set the environment variable DAGSTER_HOME to a directory to use.\n" ) with DagsterInstance.local_temp(tempdir) as instance: yield instance
def get_instance_for_service(service_name): if is_dagster_home_set(): with DagsterInstance.get() as instance: yield instance else: # make the temp dir in the cwd since default temp dir roots # have issues with FS notif based event log watching with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir: click.echo( f"Using temporary directory {tempdir} for storage. This will be removed when {service_name} exits.\n" "To persist information across sessions, set the environment variable DAGSTER_HOME to a directory to use.\n" ) with DagsterInstance.local_temp(tempdir) as instance: yield instance
def _make_airflow_dag( recon_repo, job_name, run_config=None, mode=None, instance=None, dag_id=None, dag_description=None, dag_kwargs=None, op_kwargs=None, operator=DagsterPythonOperator, ): check.inst_param(recon_repo, "recon_repo", ReconstructableRepository) check.str_param(job_name, "job_name") run_config = check.opt_dict_param(run_config, "run_config", key_type=str) mode = check.opt_str_param(mode, "mode") # Default to use the (persistent) system temp directory rather than a TemporaryDirectory, # which would not be consistent between Airflow task invocations. if instance is None: if is_dagster_home_set(): instance = DagsterInstance.get() else: instance = DagsterInstance.local_temp( tempdir=seven.get_system_temp_directory()) check.inst_param(instance, "instance", DagsterInstance) # Only used for Airflow; internally we continue to use pipeline.name dag_id = check.opt_str_param(dag_id, "dag_id", _rename_for_airflow(job_name)) dag_description = check.opt_str_param(dag_description, "dag_description", _make_dag_description(job_name)) check.class_param(operator, "operator", superclass=BaseOperator) dag_kwargs = dict( {"default_args": DEFAULT_ARGS}, **check.opt_dict_param(dag_kwargs, "dag_kwargs", key_type=str), ) op_kwargs = check.opt_dict_param(op_kwargs, "op_kwargs", key_type=str) dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs) pipeline = recon_repo.get_definition().get_pipeline(job_name) if mode is None: mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, run_config, mode=mode) tasks = {} coalesced_plan = coalesce_execution_steps(execution_plan) for solid_handle, solid_steps in coalesced_plan.items(): step_keys = [step.key for step in solid_steps] operator_parameters = DagsterOperatorParameters( recon_repo=recon_repo, pipeline_name=job_name, run_config=run_config, mode=mode, task_id=solid_handle, step_keys=step_keys, dag=dag, instance_ref=instance.get_ref(), op_kwargs=op_kwargs, pipeline_snapshot=pipeline.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()), ) task = operator(operator_parameters) tasks[solid_handle] = task for solid_step in solid_steps: for step_input in solid_step.step_inputs: for key in step_input.dependency_keys: prev_solid_handle = execution_plan.get_step_by_key( key).solid_handle.to_string() if solid_handle != prev_solid_handle: tasks[prev_solid_handle].set_downstream(task) return (dag, [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])