def test_pass_on_injected_format(self): task = TTextDataTask( text_data=target( scenario_path("data/some_unknown_ext.myext"), config=file.txt ) ) assert_run_task(task)
def test_definition_inplace_param(self): @task def t_f_call(a=parameter[int]): assert a == 6 t_f_call(a=6) assert_run_task(t_f_call.t(a=6))
def test__by_day_simple_local(self): with dbnd_config({ ProductionIdsAndData.task_env: "local", FetchIds.task_enabled_in_prod: True, FetchData.task_enabled_in_prod: True, }): assert_run_task(ByDayExamplePipeline(period="2d"))
def test_params_inherited_parse(self): target = DummyWrapper( override={ DummyTask.expected_param: "0.1", DummyTask.expected_timedelta_param: "4d", }) assert_run_task(target)
def test_simple_defaults(self): @task def t_f_defaults(a=5): assert a == 5 t_f_defaults() assert_run_task(t_f_defaults.t())
def test_custom_decorator_usage(self): @my_experiment def run_splits(previous_exp=1): logging.warning("Running splits!!! %s", previous_exp) return 1, 2, 1 @my_experiment def my_experiement(alpha=0.2, previous_exp=1): logging.warning("My previous exp = %s", previous_exp) logging.warning(" Running some splits") t = run_splits.t() t.dbnd_run() logging.warning(" Done some splits") return 1, 2, t.result.read_pickle() my_exp = my_experiement.t(alpha=0.4) # wee can't support creating same task under different dags # for the second time - dag will not be added to task # my_exp2 = my_experiement.t(previous_exp=my_exp.my_ratio) assert_run_task(my_exp)
def test_custom_partition_from_ctor(self): task = TTask( task_output_path_format= "{root}/{env_label}/{task_family}{task_class_version}_custom/" "{output_name}{output_ext}/date={task_target_date}") assert_run_task(task) assert "TTask_custom/t_output.csv/" in str(task.t_output)
def test_generated_output_dict(self): def _get_all_splits( task, task_output): # type: (Task, ParameterBase) -> dict result = {} target = task_output.build_target(task) for i in range(task.parts): name = "part_%s" % i result[name] = ( target.partition(name="train_%s" % name), target.partition(name="test_%s" % name), ) return result class TGeneratedOutputs(PythonTask): parts = parameter.value(3) splits = output.csv.folder(output_factory=_get_all_splits) def run(self): for key, split in self.splits.items(): train, test = split train.write(key) test.write(key) assert_run_task(TGeneratedOutputs())
def test_spark_inline_same_context(self): from pyspark.sql import SparkSession from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline with SparkSession.builder.getOrCreate() as sc: with config({SparkLocalEngineConfig.enable_spark_context_inplace: True}): assert_run_task(word_count_inline.t(text=__file__))
def test_inline_call_with_inline_band(self, target_1_2): @task def t_f_2nd(a): # also, no typing return t_f_b(a) @band def t_f_inline_band(a): # also, no typing return t_f_2nd(a) @task def t_f_1st(a): # type: (DataList[str])-> List[str] x = t_f_inline_band(a) assert x == ["s_1", "s_2"] return x @band def t_f_band(a): # type: (DataList[str])-> FileTarget x = t_f_1st(a) assert isinstance(x, FileTarget) return x assert_run_task(t_f_band.t(a=target_1_2))
def test_simple_no_call(self): @task def t_f_nocall(a=5): assert a == 6 t_f_nocall(a=6) assert_run_task(t_f_nocall.t(a=6))
def test_deco_ret_task(self): @band def ret_dict(): v = TTask(t_param=1) return v assert_run_task(ret_dict.t())
def test_task_artifacts(self, matplot_figure, tmpdir): lorem = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt\n" data = tmpdir / "data.txt" data.write(lorem) artifact_dir = tmpdir.mkdir("dir") sub_file = artifact_dir.mkdir("subdir").join("sub_file") sub_file.write(lorem) class TTaskArtifacts(TTask): def run(self): self.log_artifact("my_tmp_file", str(data)) self.log_artifact("my_figure", matplot_figure) self.log_artifact("my_dir", str(artifact_dir) + "/") super(TTaskArtifacts, self).run() task = TTaskArtifacts() assert_run_task(task) actual = task._meta_output.list_partitions() actual_strings = list(map(str, actual)) assert any( ["my_tmp_file" in os.path.basename(s) for s in actual_strings]) assert any( ["my_figure" in os.path.basename(s) for s in actual_strings]) assert any(["sub_file" in os.path.basename(s) for s in actual_strings])
def test_simple_with_call(self): @task() def t_f_call(a=5): assert a == 6 t_f_call(a=6) assert_run_task(t_f_call.t(a=6))
def test_definition_inplace_output(self): @task def t_f_call(a=parameter[int], f_output=output[Target]): f_output.write(str(a)) return None assert_run_task(t_f_call.t(a=6))
def test_task_input_via_band1(self, file_on_disk): class TTAskWithInputTask1(PipelineTask): t_output = output.data def band(self): self.t_output = TTaskWithInput(t_input=file_on_disk.path) assert_run_task(TTAskWithInputTask1())
def test_wine_quality_deco_simple_all(self): task = wine_quality_decorators.predict_wine_quality.t( alpha=0.5, override={ wine_quality_decorators.fetch_data.t.task_env: "local_prod" }, ) assert_run_task(task)
def test_wine_quality_deco_simple_all(self): with dbnd_config( {"local_prod": {"_from": "local", "env_label": "prod", "production": True}} ): task = wine_quality.predict_wine_quality.t( alpha=0.5, override={wine_quality.fetch_data.t.task_env: "local_prod"} ) assert_run_task(task)
def test_inline_call_with_res(self, target_1_2): @task def t_f_parent(a): # type: (DataList[str])-> None x = t_f_b(a) assert x == ["s_1", "s_2"] assert_run_task(t_f_parent.t(a=target_1_2))
def test_prod_immutable_output_example(self): with dbnd_config({ FetchIds.task_enabled_in_prod: True, FetchData.task_enabled_in_prod: True }): task = ProductionIdsAndData( task_env=get_databand_context().env.clone(production=True)) assert_run_task(task)
def test_word_count_inline(self): with dbnd_config(disable_tracker_api()): assert_run_task( word_count_inline.t( text=TEXT_FILE, task_version=str(random.random()), override=conf_override, ))
def test_wine_quality_deco_search(self): task = wine_quality_decorators.predict_wine_quality_parameter_search.t( alpha_step=0.5, override={ wine_quality_decorators.predict_wine_quality.t.data: data_repo.wines }, ) assert_run_task(task)
def test_custom_parition(self): class CustomOutputsTTask(TTask): _conf__base_output_path_fmt = ( "{root}/{env_label}/{task_family}{task_class_version}_custom/" "{output_name}{output_ext}/date={task_target_date}") task = CustomOutputsTTask() assert_run_task(task) assert "CustomOutputsTTask_custom/t_output.csv/" in str(task.t_output)
def test_input_filename(self, target_1_2): @task def t_f_path(a): # type: (PathStr) -> str assert target_1_2.path == a return a t_f_path(a=target_1_2.path) assert_run_task(t_f_path.t(a=target_1_2))
def test_run_all_task(self): task = T22_function_with_different_inputs.f_test_flow.task() # this code runs outsider @band context, we need to explicitly state .task(), # otherwise function will be executed in place # so 'task' is a Task object, it's a definition of the Pipeline, it still not executed assert_run_task(task) actual = task.result.load(str) assert actual == "OK"
def test_word_count_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline assert_run_task( word_count_inline.t( text=config.get("livy_tests", "text"), task_version=str(random.random()), override=conf_override, ))
def test_io(self): from dbnd_test_scenarios.spark.spark_io_inline import dataframes_io_pandas_spark assert_run_task( dataframes_io_pandas_spark.t( text=TEXT_FILE, task_version=str(random.random()), override=conf_override, ) )
def test_band_ret_task(self): class TMultipleOutputsPipeline(PipelineTask): t_types = parameter.value([1, 2]) t_output = output def band(self): self.t_output = {t: TTask(t_param=t).t_output for t in self.t_types} task = TMultipleOutputsPipeline() assert_run_task(task)
def test_word_count_inline(self): from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline assert_run_task( word_count_inline.t( text=TEXT_FILE, task_version=str(random.random()), override=conf_override, ) )
def test_spark_inline_same_context(self): from pyspark.sql import SparkSession from dbnd_examples.orchestration.dbnd_spark.word_count import word_count_inline from dbnd_spark.local.local_spark_config import SparkLocalEngineConfig with SparkSession.builder.getOrCreate() as sc: with config({SparkLocalEngineConfig.enable_spark_context_inplace: True}): task_instance = word_count_inline.t(text=__file__) assert_run_task(task_instance)