Beispiel #1
0
def test_log_artifacts(hdfs_system_mock):
    os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222'
    os.environ['MLFLOW_KERBEROS_USER'] = '******'

    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')

    with TempDir() as root_dir:
        with open(root_dir.path("file_one.txt"), "w") as f:
            f.write('PyArrow Works once')

        os.mkdir(root_dir.path("subdir"))
        with open(root_dir.path("subdir/file_two.txt"), "w") as f:
            f.write('PyArrow Works two')

        repo.log_artifacts(root_dir._path)

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host='default',
                                                 kerb_ticket='/tmp/krb5cc_22222222', port=0,
                                                 user='******')

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_has_calls(calls=[call('/some_path/maybe/path/file_one.txt', 'wb'),
                                          call('/some_path/maybe/path/subdir/file_two.txt', 'wb')],
                                   any_order=True)
        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_has_calls(calls=[call(b'PyArrow Works once'),
                                           call(b'PyArrow Works two')],
                                    any_order=True)
Beispiel #2
0
def test_list_artifacts_nested(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs:://host/some/path')

    expected = [FileInfo('model/conda.yaml', False, 33),
                FileInfo('model/model.pkl', False, 33),
                FileInfo('model/MLmodel', False, 33)]

    hdfs_system_mock.return_value.ls.return_value = [{
            'kind': 'file',
            'name': 'hdfs://host/some/path/model/conda.yaml',
            'size': 33,
            },
            {
            'kind': 'file',
            'name': 'hdfs://host/some/path/model/model.pkl',
            'size': 33,
            },
            {
            'kind': 'file',
            'name': 'hdfs://host/some/path/model/MLmodel',
            'size': 33,
            }]

    actual = repo.list_artifacts('model')

    assert actual == expected
Beispiel #3
0
def test_list_artifacts_root(hdfs_system_mock):
    repo = HdfsArtifactRepository('hdfs://host/some/path')

    expected = [FileInfo('model', True, 0)]

    hdfs_system_mock.return_value.ls.return_value = [{
            'kind': 'directory',
            'name': 'hdfs://host/some/path/model',
            'size': 0,
            }]

    actual = repo.list_artifacts()

    assert actual == expected
Beispiel #4
0
def test_log_artifact_viewfs(hdfs_system_mock):
    repo = HdfsArtifactRepository('viewfs://host_name/mypath')

    with TempDir() as tmp_dir:
        local_file = tmp_dir.path('sample_file')
        with open(local_file, "w") as f:
            f.write('PyArrow Works')

        repo.log_artifact(local_file, 'more_path/some')

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host="viewfs://host_name",
                                                 kerb_ticket=None, port=0,
                                                 user=None)

        open_mock = hdfs_system_mock.return_value.open
        open_mock.assert_called_once_with('/mypath/more_path/some/sample_file', 'wb')

        write_mock = open_mock.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b'PyArrow Works')
Beispiel #5
0
def test_log_artifact_with_kerberos_setup(hdfs_system_mock):
    if sys.platform == 'win32':
        pytest.skip()
    os.environ['MLFLOW_KERBEROS_TICKET_CACHE'] = '/tmp/krb5cc_22222222'
    os.environ['MLFLOW_KERBEROS_USER'] = '******'

    repo = HdfsArtifactRepository('hdfs:/some/maybe/path')

    with NamedTemporaryFile() as tmp_local_file:
        tmp_local_file.write(b'PyArrow Works')
        tmp_local_file.seek(0)

        repo.log_artifact(tmp_local_file.name, 'test_hdfs/some/path')

        hdfs_system_mock.assert_called_once_with(extra_conf=None,
                                                 host='default',
                                                 kerb_ticket='/tmp/krb5cc_22222222', port=0,
                                                 user='******')

        # TODO: refactor this magic ...
        write_mock = hdfs_system_mock.return_value.open.return_value.__enter__.return_value.write
        write_mock.assert_called_once_with(b'PyArrow Works')
Beispiel #6
0
def test_log_artifact_with_invalid_local_dir(_):
    repo = HdfsArtifactRepository('hdfs://host_name:8020/maybe/path')

    with pytest.raises(Exception,
                       match="No such file or directory: '/not/existing/local/path'"):
        repo.log_artifact('/not/existing/local/path', 'test_hdfs/some/path')
Beispiel #7
0
def test_delete_artifacts(hdfs_system_mock):
    delete_mock = hdfs_system_mock.return_value.delete
    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')
    repo.delete_artifacts('artifacts')
    delete_mock.assert_called_once_with('/some_path/maybe/path/artifacts', recursive=True)
Beispiel #8
0
def test_list_artifacts_empty_hdfs_dir(hdfs_system_mock):
    hdfs_system_mock.return_value.exists.return_value = False

    repo = HdfsArtifactRepository('hdfs:/some_path/maybe/path')
    actual = repo.list_artifacts()
    assert actual == []