Exemple #1
0
    def test_sync_hdfs_store(self, mock_get_fs_fn):
        mock_fs = mock.Mock()
        mock_get_fs_fn.return_value = lambda: mock_fs

        hdfs_root = '/user/test/output'
        store = HDFSStore(hdfs_root)

        run_id = 'run_001'
        get_local_output_dir = store.get_local_output_dir_fn(run_id)
        sync_to_store = store.sync_fn(run_id)
        run_root = store.get_run_path(run_id)

        def touch(fname, times=None):
            with open(fname, 'a'):
                os.utime(fname, times)

        with get_local_output_dir() as local_dir:
            touch(os.path.join(local_dir, 'a.txt'), (1330712280, 1330712280))
            sync_to_store(local_dir)
            mock_fs.upload.assert_called_with(os.path.join(run_root, 'a.txt'),
                                              mock.ANY)

            touch(os.path.join(local_dir, 'b.txt'), (1330712280, 1330712280))
            sync_to_store(local_dir)
            mock_fs.upload.assert_called_with(os.path.join(run_root, 'b.txt'),
                                              mock.ANY)

            subdir = os.path.join(local_dir, 'subdir')
            os.mkdir(subdir)
            touch(os.path.join(subdir, 'c.txt'), (1330712280, 1330712280))
            sync_to_store(local_dir)
            mock_fs.upload.assert_called_with(
                os.path.join(run_root, 'subdir/c.txt'), mock.ANY)

            touch(os.path.join(local_dir, 'a.txt'), (1330712292, 1330712292))
            touch(os.path.join(local_dir, 'b.txt'), (1330712292, 1330712292))
            assert mock_fs.upload.call_count == 3

            sync_to_store(local_dir)
            assert mock_fs.upload.call_count == 5
Exemple #2
0
    def test_hdfs_store_parse_url(self, mock_get_filesystem_fn):
        # Case 1: full path
        hdfs_root = 'hdfs://namenode01:8020/user/test/output'
        store = HDFSStore(hdfs_root)
        assert store.path_prefix() == 'hdfs://namenode01:8020', hdfs_root
        assert store.get_full_path(
            '/user/test/output'
        ) == 'hdfs://namenode01:8020/user/test/output', hdfs_root
        assert store.get_localized_path(
            'hdfs://namenode01:8020/user/test/output'
        ) == '/user/test/output', hdfs_root
        assert store._hdfs_kwargs['host'] == 'namenode01', hdfs_root
        assert store._hdfs_kwargs['port'] == 8020, hdfs_root

        # Case 2: no host and port
        hdfs_root = 'hdfs:///user/test/output'
        store = HDFSStore(hdfs_root)
        assert store.path_prefix() == 'hdfs://', hdfs_root
        assert store.get_full_path(
            '/user/test/output') == 'hdfs:///user/test/output', hdfs_root
        assert store.get_localized_path(
            'hdfs:///user/test/output') == '/user/test/output', hdfs_root
        assert store._hdfs_kwargs['host'] == 'default', hdfs_root
        assert store._hdfs_kwargs['port'] == 0, hdfs_root

        # Case 3: no prefix
        hdfs_root = '/user/test/output'
        store = HDFSStore(hdfs_root)
        assert store.path_prefix() == 'hdfs://', hdfs_root
        assert store.get_full_path(
            '/user/test/output') == 'hdfs:///user/test/output', hdfs_root
        assert store.get_localized_path(
            'hdfs:///user/test/output') == '/user/test/output', hdfs_root
        assert store._hdfs_kwargs['host'] == 'default', hdfs_root
        assert store._hdfs_kwargs['port'] == 0, hdfs_root

        # Case 4: no namespace
        hdfs_root = 'hdfs://namenode01:8020/user/test/output'
        store = HDFSStore(hdfs_root)
        assert store.path_prefix() == 'hdfs://namenode01:8020', hdfs_root
        assert store.get_full_path(
            '/user/test/output'
        ) == 'hdfs://namenode01:8020/user/test/output', hdfs_root
        assert store.get_localized_path(
            'hdfs://namenode01:8020/user/test/output'
        ) == '/user/test/output', hdfs_root
        assert store._hdfs_kwargs['host'] == 'namenode01', hdfs_root
        assert store._hdfs_kwargs['port'] == 8020, hdfs_root

        # Case 5: bad prefix
        with pytest.raises(ValueError):
            hdfs_root = 'file:///user/test/output'
            HDFSStore(hdfs_root)

        # Case 6: override paths, no prefix
        hdfs_root = '/user/prefix'
        store = HDFSStore(hdfs_root,
                          train_path='/user/train_path',
                          val_path='/user/val_path',
                          test_path='/user/test_path')
        assert store.get_train_data_path(
        ) == 'hdfs:///user/train_path', hdfs_root
        assert store.get_val_data_path() == 'hdfs:///user/val_path', hdfs_root
        assert store.get_test_data_path(
        ) == 'hdfs:///user/test_path', hdfs_root

        # Case 7: override paths, prefix
        hdfs_root = 'hdfs:///user/prefix'
        store = HDFSStore(hdfs_root,
                          train_path='hdfs:///user/train_path',
                          val_path='hdfs:///user/val_path',
                          test_path='hdfs:///user/test_path')
        assert store.get_train_data_path(
        ) == 'hdfs:///user/train_path', hdfs_root
        assert store.get_val_data_path() == 'hdfs:///user/val_path', hdfs_root
        assert store.get_test_data_path(
        ) == 'hdfs:///user/test_path', hdfs_root