コード例 #1
0
ファイル: test_local.py プロジェクト: turbaszek/dbnd
    def test_copy(self):
        src = os.path.join(self.path, "src.txt")
        dest = os.path.join(self.path, "newdir", "dest.txt")

        target(src).open("w").close()
        self.fs.copy(src, dest)
        assert os.path.exists(src)
        assert os.path.exists(dest)
コード例 #2
0
def t_f_pathlib(a, b):
    # type: (Path, Path) -> str
    assert isinstance(a, Path), type(a)
    assert isinstance(b, Path), type(b)

    target(str(b)).mkdir_parent()
    with open(str(b), "w") as fp:
        fp.write("ok")
    return str(a)
コード例 #3
0
 def _process_xcom_value(value):
     if isinstance(value, BaseOperator):
         value = build_xcom_str(value)
         upstream_task_ids.append(value.task_id)
     if isinstance(value, XComStr):
         upstream_task_ids.append(value.task_id)
         return target("xcom://%s" % value)
     if self._is_jinja_arg(value):
         return target("jinja://%s" % value)
     return value
コード例 #4
0
    def test_create_dataset(self):
        sample = target(sample_path("sample_data.csv")).as_pandas.read_csv()

        for x in range(2, 5, 1):
            replication = pow(100, x)
            logger.info("Sampling %s", replication)
            sample = pd.concat([sample] * 100)

            t = target(sample_generated(replication, file.csv))
            t.as_pandas.to_csv(sample)
        logger.info("Sample %s -> %s ", replication, os.stat(t.path).st_size)
コード例 #5
0
    def test_cache(self, pandas_data_frame):
        t = target(self.path)
        df = pandas_data_frame
        t.as_pandas.to_parquet(df, cache=True)
        df_cache = t.as_pandas.read_parquet(no_copy_on_read=True)
        assert id(df) == id(df_cache)

        # validate real read
        actual = target(t.path).as_pandas.read_parquet()

        assert_frame_equal(actual, df)
コード例 #6
0
    def test_dict_parse(self):
        p = parameter[Dict[str, Path]]._p  # type: ParameterDefinition
        data = {"a": target("/a"), "b": target("/b")}
        actual = p.calc_init_value(data)

        for k, v in actual.items():
            assert isinstance(v, Target)

        runtime_value = p.calc_runtime_value(actual, None)

        for k, v in runtime_value.items():
            assert isinstance(v, Path)
コード例 #7
0
    def test_list_parse(self):
        p = parameter[List[Path]]._p  # type: ParameterDefinition
        data = [target("/a"), target("/b")]
        actual = p.calc_init_value(data)

        for l in actual:
            assert isinstance(l, Target)

        runtime_value = p.calc_runtime_value(actual, None)

        for l in runtime_value:
            assert isinstance(l, Path)
コード例 #8
0
ファイル: test_local_target.py プロジェクト: cxz/dbnd
 def test_copy(self):
     t = target(self.path)
     f = t.open("w")
     test_data = "test"
     f.write(test_data)
     f.close()
     assert os.path.exists(self.path)
     assert not os.path.exists(self.copy)
     t.copy(self.copy)
     assert os.path.exists(self.path)
     assert os.path.exists(self.copy)
     assert t.open("r").read() == target(self.copy).open("r").read()
コード例 #9
0
    def test_task_metrics_simple(self, tmpdir, pandas_data_frame):
        metrics_folder = target(str(tmpdir))

        task_run = Mock()
        task_run.meta_files = TaskRunMetaFiles(metrics_folder)
        t = FileTrackingStore()
        tr_tracker = TaskRunTracker(task_run=task_run, tracking_store=t)
        tr_tracker.settings.features.get_value_meta_conf = Mock(
            return_value=ValueMetaConf.enabled()
        )
        tr_tracker.log_metric("a", 1)
        tr_tracker.log_metric("a_string", "1")
        tr_tracker.log_metric("a_list", [1, 3])
        tr_tracker.log_metric("a_tuple", (1, 2))

        user_metrics = TaskRunMetricsFileStoreReader(
            metrics_folder
        ).get_all_metrics_values(MetricSource.user)

        assert user_metrics == {
            "a": 1.0,
            "a_list": "[1, 3]",
            "a_string": 1.0,
            "a_tuple": "(1, 2)",
        }
コード例 #10
0
    def __init__(self, task_run):
        super(TaskRunLogManager, self).__init__(task_run)

        if hasattr(task_run.task, "airflow_log_file"):
            self.local_log_file = target(task_run.task.airflow_log_file)
        else:
            self.local_log_file = self.task_run.local_task_run_root.partition(
                name="%s.log" % task_run.attempt_number
            )

        if os.getenv("DBND__LOG_SPARK"):
            self.local_spark_log_file = self.task_run.local_task_run_root.partition(
                name="%s-spark.log" % task_run.attempt_number
            )

        self.local_heartbeat_log_file = self.task_run.local_task_run_root.partition(
            name="%s.heartbeat.log" % task_run.attempt_number
        )
        self.remote_log_file = None
        if not isinstance(self.task.task_env, LocalEnvConfig):
            self.remote_log_file = self.task_run.attempt_folder.partition(
                "%s.log" % task_run.attempt_number
            )

        # file handler for task log
        # if set -> we are in the context of capturing
        self._log_task_run_into_file_active = False
コード例 #11
0
 def test_pass_on_injected_format(self):
     task = TTextDataTask(
         text_data=target(
             scenario_path("data/some_unknown_ext.myext"), config=file.txt
         )
     )
     assert_run_task(task)
コード例 #12
0
def raise_failure(failure):
    if failure == "missing_params":
        return TTaskMissingParamsMultiple()
    elif failure == "read_error":
        return target("not_exists").read()
    else:
        raise Exception("just an error")
コード例 #13
0
ファイル: test_pandas_io.py プロジェクト: databand-ai/dbnd
    def test_pandas_task(self, tmpdir, pandas_data_frame):
        class PandasTask(PythonTask):
            some_param = parameter[str]
            p_input = data.target
            p_output = output.json

            def run(self):
                data = self.p_input.read_df()

                logger.warning("Data at run %s: %s", self.task_name, data)

                data.to_target(self.p_output)

        pandas_target = target(str(tmpdir.join("pandas.csv")))
        pandas_data_frame.to_target(pandas_target)

        task = PandasTask(p_input=pandas_target.path,
                          some_param=1,
                          task_name="first")
        task_second = PandasTask(p_input=task.p_output,
                                 some_param=2,
                                 task_name="second")

        task_second.dbnd_run()

        assert task_second.p_output
        print(task_second.p_output.read_df())
コード例 #14
0
ファイル: gdbserver.py プロジェクト: mprc-pku/riscv-tests
def main():
    parser = argparse.ArgumentParser(
        description="Test that gdb can talk to a RISC-V target.",
        epilog="""
            Example command line from the real world:
            Run all RegsTest cases against a physical FPGA, with custom openocd command:
            ./gdbserver.py --freedom-e300 --server_cmd "$HOME/SiFive/openocd/src/openocd -s $HOME/SiFive/openocd/tcl -d" Simple
            """)
    targets.add_target_options(parser)

    testlib.add_test_run_options(parser)

    # TODO: remove global
    global parsed  # pylint: disable=global-statement
    parsed = parser.parse_args()
    target = targets.target(parsed)

    testlib.print_log_names = parsed.print_log_names

    if parsed.xlen:
        target.xlen = parsed.xlen

    module = sys.modules[__name__]

    return testlib.run_all_tests(module, target, parsed)
コード例 #15
0
    def load_task_params_from_task_band(self, task_band, task_params):
        task_band_value = target(task_band).as_object.read_json()

        new_params = []
        found = []

        source = "task_band.json"
        for p_value in task_params:
            if p_value.name not in task_band_value or p_value.name == "result":
                new_params.append(p_value)
                continue

            value = p_value.parameter.calc_init_value(
                task_band_value[p_value.name])
            found.append(p_value.name)
            new_parameter_value = ParameterValue(
                parameter=p_value.parameter,
                source=source,
                source_value=value,
                value=value,
            )
            new_params.append(new_parameter_value)

        logger.info("Loading task '{task_family}' from {task_band}:\n"
                    "\tfields taken:\t{found}".format(
                        task_family=self.task_family,
                        task_band=task_band,
                        found=",".join(found)))
        return new_params
コード例 #16
0
    def test_task_metrics_simple(self, tmpdir, pandas_data_frame):
        metrics_folder = target(str(tmpdir))

        task_run = Mock()
        task_run.meta_files = TaskRunMetaFiles(metrics_folder)
        t = FileTrackingStore()
        tr_tracker = TaskRunTracker(task_run=task_run, tracking_store=t)
        tr_tracker.log_metric("a", 1)
        tr_tracker.log_metric("a_string", "1")
        tr_tracker.log_metric("a_list", [1, 3])
        tr_tracker.log_metric("a_tuple", (1, 2))
        tr_tracker.log_dataframe("df", pandas_data_frame)

        actual = TaskRunMetricsFileStoreReader(
            metrics_folder).get_all_metrics_values()

        print(actual)
        assert actual == {
            "a": 1.0,
            "a_list": "[1, 3]",
            "a_string": 1.0,
            "a_tuple": "(1, 2)",
            "df.preview": "Names  Births",
            "df.schema": "{",
            "df.shape": "[5, 2]",
            "df.shape_0_": 5.0,
            "df.shape_1_": 2.0,
        }
コード例 #17
0
ファイル: test_pandas_formats.py プロジェクト: turbaszek/dbnd
    def validate(self, df, target_config, read_kwargs=None, write_kwargs=None):
        target_read_f, target_write_f = _get_read_write_functions(target_config)
        write_kwargs = write_kwargs or {}
        read_kwargs = read_kwargs or {}

        t = self.get_target(target_config)

        # regular write - should automatically select format
        df.to_target(t, **write_kwargs)

        # now we read with automatic read
        actual_1 = t.read_df(**read_kwargs)
        assert_frame_equal(actual_1, df)
        assert id(actual_1) != id(df)

        # use explicit function
        actual_2 = target_read_f(t, **read_kwargs)
        assert_frame_equal(actual_2, df)
        assert id(actual_2) != id(df)

        # validate  with different target
        t_2 = target(t.path)
        actual = target_read_f(t_2, **read_kwargs)
        assert_frame_equal(actual, df)

        # validate write via pandas ctrl
        t_3 = self.get_target(target_config, name="local_file_2")
        assert not t_3.exists()
        target_write_f(t_3, df, **write_kwargs)
        actual = target_read_f(t_3, **read_kwargs)
        assert_frame_equal(actual, df)
コード例 #18
0
    def test_task_metrics_histograms(self, tmpdir, pandas_data_frame):
        metrics_folder = target(str(tmpdir))

        task_run = Mock()
        task_run.meta_files = TaskRunMetaFiles(metrics_folder)
        t = FileTrackingStore()
        tr_tracker = TaskRunTracker(task_run=task_run, tracking_store=t)
        tr_tracker.settings.tracking.get_value_meta_conf = Mock(
            return_value=ValueMetaConf.enabled()
        )
        tr_tracker.log_data("df", pandas_data_frame, meta_conf=ValueMetaConf.enabled())

        hist_metrics = TaskRunMetricsFileStoreReader(
            metrics_folder
        ).get_all_metrics_values(MetricSource.histograms)

        expected_preview = (
            "   Names  Births  Married\n"
            "     Bob     968     True\n"
            " Jessica     155    False\n"
            "    Mary      77     True\n"
            "    John     578    False\n"
            "     Mel     973     True"
        )

        # std value varies in different py versions due to float precision fluctuation
        df_births_std = hist_metrics["df.Births.std"]
        assert df_births_std == pytest.approx(428.4246)
コード例 #19
0
ファイル: tracking_store_file.py プロジェクト: lbtanh/dbnd
    def log_artifact(self, task_run, name, artifact, artifact_target):
        artifact_target.mkdir_parent()

        if isinstance(artifact, six.string_types):
            from targets.dir_target import DirTarget

            artifact_target_source = target(artifact)
            if isinstance(artifact_target_source, DirTarget):
                artifact_target_source.copy(artifact_target)
            else:
                data = artifact_target_source.read()
                artifact_target.write(data)

            return artifact_target

        if PYPLOT_INSTALLED and isinstance(artifact, Figure):
            temp = BytesIO()
            artifact.savefig(temp)
            temp.seek(0)
            artifact_target.write(temp.read(), mode="wb")
            return artifact_target

        raise DatabandRuntimeError(
            "Could not recognize artifact of type %s, must be string or matplotlib Figure"
            % type(artifact))
コード例 #20
0
ファイル: cmd_execute.py プロジェクト: kalebinn/dbnd
def execute(ctx, dbnd_run, disable_tracking_api, expected_dbnd_version,
            expected_python_version):
    """Execute databand primitives"""
    if expected_python_version and expected_dbnd_version:
        spark_python_version = get_python_version()
        spark_dbnd_version = get_dbnd_version()
        if expected_python_version != spark_python_version:
            warn(
                "You submitted job using Python {} but the Spark cluster uses Python {}. To "
                "assure execution consistency use the same version in both places. Execution will"
                "continue but it may fail due to version mismatch.".format(
                    expected_python_version, spark_python_version),
                DbndVersionsClashWarning,
            )
        if expected_dbnd_version != spark_dbnd_version:
            warn(
                "You submitted job using dbnd {} but the Spark cluster uses dbnd {}. To "
                "assure execution consistency use the same version in both places. Execution will"
                "continue but it may fail due to version mismatch.".format(
                    expected_dbnd_version, spark_dbnd_version),
                DbndVersionsClashWarning,
            )

    from targets import target

    with env_context(**{ENV_DBND__TRACKING: "False"}):
        run = RunExecutor.load_run(dump_file=target(dbnd_run),
                                   disable_tracking_api=disable_tracking_api)
        ctx.obj = {"run": run, "disable_tracking_api": disable_tracking_api}
コード例 #21
0
 def _local_multitarget():
     return MultiTarget([
         target(
             os.path.join(DBND_LOCAL_ROOT, LOCAL_SYNC_CACHE_NAME),
             subtarget.path.lstrip("/"),
         ) for subtarget in my_multitarget.targets
     ])
コード例 #22
0
ファイル: task_meta_factory.py プロジェクト: kalebinn/dbnd
    def load_task_params_from_task_band(self, task_band, task_params):
        task_band_value = target(task_band).as_object.read_json()

        new_params = {}
        found = []
        source = "task_band.json"
        for name, p_value in iteritems(task_params):
            if name not in task_band_value or name == RESULT_PARAM:
                new_params[name] = p_value
                continue

            value = p_value.parameter.calc_init_value(task_band_value[name])
            found.append(name)
            new_parameter_value = ParameterValue(
                parameter=p_value.parameter,
                source=source,
                source_value=value,
                value=value,
            )
            new_params[new_parameter_value.name] = new_parameter_value

        logger.info("Loading task '{task_family}' from {task_band}:\n"
                    "\tfields taken:\t{found}".format(
                        task_family=self.task_family,
                        task_band=task_band,
                        found=",".join(found)))
        return new_params
コード例 #23
0
    def test_no_cache_for_csv(self, pandas_data_frame):
        t = target(self.path)

        df = pandas_data_frame
        t.as_pandas.to_csv(df, index=False, cache=True)
        from_cache = t.as_pandas.read_csv(no_copy_on_read=True)
        assert id(from_cache) != id(df)
コード例 #24
0
 def test_word_count_pyspark(self):
     logging.info("Running %s", WordCountPySparkTask)
     actual = WordCountPySparkTask(
         text=TEXT_FILE, task_version=str(random.random()), override=conf_override
     )
     actual.dbnd_run()
     print(target(actual.counters.path, "part-00000").read())
コード例 #25
0
 def test_dbnd_run(self, tmpdir):
     t = targets.target(tmpdir.join("task_output"))
     args = [
         TTask.get_task_family(), "-r", "t_param=10", "-r",
         "t_output=" + t.path
     ]
     run_dbnd_subprocess_test(args)
     assert t.exists()
コード例 #26
0
    def _build_base_pod(self) -> k8s.V1Pod:
        from kubernetes.client import ApiClient

        basis_pod_yaml = target(self.pod_yaml).read()
        basis_pod_dict = yaml.safe_load(basis_pod_yaml) or {}
        api_client = ApiClient()
        return api_client._ApiClient__deserialize_model(
            basis_pod_dict, k8s.V1Pod)
コード例 #27
0
def partner_file_data_location(name, task_target_date):
    rand = random()
    if rand < 0.2:
        partner_file = "data/big_file.csv"
    else:
        partner_file = "data/small_file.csv"

    return target(partner_file)
コード例 #28
0
ファイル: test_custom_parameters.py プロジェクト: lbtanh/dbnd
    def test_universal_feature_store_hdf5(self, tmpdir, sample_feature_store):
        write_target = target(tmpdir, "output.hdf5")
        universal = MyFeatureStoreValueTypeUniversal()
        universal.save_to_target(target=write_target,
                                 value=sample_feature_store)

        actual = universal.load_from_target(target=write_target)
        assert_frame_equal(actual.features, sample_feature_store.features)
コード例 #29
0
    def sync(self, local_file):
        if not local_file:
            #  should return None, not empty string to be compatible with airflow code
            return None

        if not isinstance(local_file, Target):
            local_file = target(local_file)
        return str(self._sync(local_file))
コード例 #30
0
ファイル: cmd_execute.py プロジェクト: ipattarapong/dbnd
def execute(ctx, dbnd_run, disable_tracking_api):
    """Execute databand primitives"""
    from dbnd._core.run.databand_run import DatabandRun
    from targets import target

    run = DatabandRun.load_run(dump_file=target(dbnd_run),
                               disable_tracking_api=disable_tracking_api)
    ctx.obj = {"run": run, "disable_tracking_api": disable_tracking_api}
コード例 #31
0
ファイル: gdbserver.py プロジェクト: riscv/riscv-tests
def main():
    parser = argparse.ArgumentParser(
            description="Test that gdb can talk to a RISC-V target.",
            epilog="""
            Example command line from the real world:
            Run all RegsTest cases against a physical FPGA, with custom openocd command:
            ./gdbserver.py --freedom-e300 --server_cmd "$HOME/SiFive/openocd/src/openocd -s $HOME/SiFive/openocd/tcl -d" Simple
            """)
    targets.add_target_options(parser)

    testlib.add_test_run_options(parser)

    # TODO: remove global
    global parsed   # pylint: disable=global-statement
    parsed = parser.parse_args()
    target = targets.target(parsed)
    testlib.print_log_names = parsed.print_log_names

    module = sys.modules[__name__]

    return testlib.run_all_tests(module, target, parsed)