def dws_lineage_table(self, line):
     import pandas as pd  # TODO: support case where pandas wasn't installed
     parser = DwsMagicParseArgs(
         "dws_lineage_table",
         description="Show a table of lineage for the workspace's resources"
     )
     parser.add_argument(
         '--snapshot',
         default=None,
         type=str,
         help=
         "If specified, print lineage as of the specified snapshot hash or tag"
     )
     try:
         args = parser.parse_magic_line(line)
     except DwsMagicArgParseExit:
         return  # user asked for help
     if self.disabled:
         display(
             Markdown(
                 "DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel."
             ))
         return
     rows = [
         r for r in make_lineage_table(self.dws_jupyter_info.workspace_dir,
                                       args.snapshot)
     ]
     return pd.DataFrame(
         rows, columns=['Resource', 'Lineage Type', 'Details',
                        'Inputs']).set_index('Resource')
Ejemplo n.º 2
0
    def test_import(self):
        self._setup_exported_resource()
        #os.mkdir(WS_DIR)
        self._setup_initial_repo(create_resources="code,results",
                                 hostname="test-host")
        self._run_dws(['add', 'git', '--imported', EXPORTED_RESOURCE_DIR])
        with open(CODE_FILE, "w") as f:
            f.write("print('hello')\n")
        builder = (LineageBuilder().with_workspace_directory(
            WS_DIR).with_step_name("code.py").with_parameters({
                "a": 5
            }).with_input_path(
                join(EXPORTED_RESOURCE_DIR,
                     'im_data.csv')).as_results_step(RESULTS_DIR))
        with builder.eval() as lineage:
            lineage.write_results({"accuracy": 0.95, "recall": 0.8})
        tlist = make_lineage_table(WS_DIR, verbose=True)
        expected_refs = frozenset([
            'results', 'exported-resource:/im_data.csv',
            'source-data:/data.csv'
        ])
        actual_refs = frozenset([t[0] for t in tlist])
        self.assertEqual(expected_refs, actual_refs)
        graph_output_file = join(TEMPDIR, 'graph_pre_snapshot.html')
        make_lineage_graph(graph_output_file, WS_DIR, verbose=True)

        self._run_dws(["snapshot", "tag1"])
        tlist = make_lineage_table(WS_DIR, tag_or_hash='tag1', verbose=True)
        actual_refs = frozenset([t[0] for t in tlist])
        self.assertEqual(expected_refs, actual_refs)
        graph_output_file = join(TEMPDIR, 'graph_post_snapshot.html')
        make_lineage_graph(graph_output_file,
                           WS_DIR,
                           tag_or_hash='tag1',
                           verbose=True)

        # verify that an update of the exported resource followed by a pull works correctly
        self._update_exported_resource()
        self._run_dws(['pull'])
        tlist = make_lineage_table(WS_DIR, verbose=True)
        actual_refs = frozenset([t[0] for t in tlist])
        expected_refs = frozenset(
            ['exported-resource:/im_data.csv', 'source-data:/data2.csv'])
        self.assertEqual(expected_refs, actual_refs)
Ejemplo n.º 3
0
    def test_import(self):
        self._setup_exported_resource()
        os.mkdir(WS_DIR)
        self._setup_initial_repo(git_resources="code,results",
                                 hostname="test-host")
        imported_dir = join(WS_DIR, 'exported-resource')
        print(
            "cmd would be: dws add rclone --imported 'localfs:%s' ./exported-resource"
            % EXPORTED_RESOURCE_DIR)
        self._run_dws([
            'add', 'rclone', '--imported', 'localfs:' + EXPORTED_RESOURCE_DIR,
            './exported-resource'
        ])
        with open(CODE_FILE, "w") as f:
            f.write("print('hello')\n")
        builder = (LineageBuilder().with_workspace_directory(
            WS_DIR).with_step_name("code.py").with_parameters({
                "a": 5
            }).with_input_path(join(
                imported_dir, 'im_data.csv')).as_results_step(RESULTS_DIR))
        with builder.eval() as lineage:
            lineage.write_results({"accuracy": 0.95, "recall": 0.8})
        tlist = make_lineage_table(WS_DIR, verbose=True)
        expected_refs = frozenset([
            'results', 'exported-resource:/im_data.csv',
            'source-data:/data.csv'
        ])
        actual_refs = frozenset([t[0] for t in tlist])
        self.assertEqual(expected_refs, actual_refs)
        graph_output_file = join(TEMPDIR, 'graph_pre_snapshot.html')
        make_lineage_graph(graph_output_file, WS_DIR, verbose=True)

        self._run_dws(["snapshot", "tag1"])
        tlist = make_lineage_table(WS_DIR, tag_or_hash='tag1', verbose=True)
        actual_refs = frozenset([t[0] for t in tlist])
        self.assertEqual(expected_refs, actual_refs)
        graph_output_file = join(TEMPDIR, 'graph_post_snapshot.html')
        make_lineage_graph(graph_output_file,
                           WS_DIR,
                           tag_or_hash='tag1',
                           verbose=True)