Example #1
0
def test_get_samples(client: Client):
    sample = client.get_sample("safecast")
    assert len(sample) > 0

    sample = client.get_sample("safecast", as_df=True, params={"limit": 10})
    assert isinstance(sample, pd.DataFrame)
    assert len(sample) == 10
Example #2
0
def test_list_samples(client: Client):
    samples = client.list_samples()
    assert len(samples) > 0
    assert all([type(s) == str for s in samples])

    samples = client.list_samples(include_details=True)
    assert all([type(s) == dict for s in samples])

    # check to make sure query params propagate to the request
    with pytest.raises(BadRequest):
        client.list_samples(params={"bad_param": "yes"})
Example #3
0
def test_detect_entities(client: Client):
    payload = {"email": "*****@*****.**"}
    detected_entities = client.detect_entities(payload)

    assert len(detected_entities) == 1
    assert (len(detected_entities[0]["metadata"]["fields"]["email"]["ner"]
                ["labels"]) == 3)
Example #4
0
def test_send_bulk(reader, client: Client):
    client._write_records = Mock()
    records = [{"one": "test", "two": "test"}]
    p = Project(name="proj", client=client, project_id=123)
    p.send_bulk(
        records, detection_mode="all", params={"test": "param"}, headers={"test": "two"}
    )

    client._write_records.assert_called_with(
        project="proj",
        reader=reader.return_value,
        headers={"test": "two"},
        params={"test": "param", "detection_mode": "all"},
    )
Example #5
0
def test_send(client: Client):
    client._write_record_sync = Mock(
        return_value={"data": {"success": [], "failure": []}}
    )
    records = [{"one": "test", "two": "test"}]
    p = Project(name="proj", client=client, project_id=123)
    p.send(
        records, detection_mode="all", params={"test": "param"}, headers={"test": "two"}
    )

    client._write_record_sync.assert_called_with(
        "proj",
        records,
        headers={"test": "two"},
        params={"test": "param", "detection_mode": "all"},
    )
Example #6
0
def test_send_dataframe(client: Client):
    client._write_records = Mock()
    df = pd.DataFrame([{f"foo_{i}": "bar"} for i in range(50)])
    p = Project(name="proj", client=client, project_id=123)

    p.send_dataframe(
        df,
        sample=25,
        detection_mode="none",
        headers={"X-Test-Gretel": "one"},
        params={"test-param": "two"},
    )
    _, _, kwargs = client._write_records.mock_calls[0]
    check_df = kwargs["reader"]
    assert len(check_df.df) == 25
    assert kwargs["headers"] == {"X-Test-Gretel": "one"}
    assert kwargs["params"] == {"test-param": "two", "detection_mode": "none"}

    with pytest.raises(ValueError):
        p.send_dataframe(df, sample=0)
    with pytest.raises(ValueError):
        p.send_dataframe(df, sample=100)
    with pytest.raises(ValueError):
        p.send_dataframe(df, sample=-1)
    with pytest.raises(ValueError):
        p.send_dataframe([1, 2])

    p.send_dataframe(df, sample=0.1)
    _, _, kwargs = client._write_records.mock_calls[1]
    check_df = kwargs["reader"]
    assert len(check_df.df) == 5

    p.send_dataframe(df)
    _, _, kwargs = client._write_records.mock_calls[2]
    check_df = kwargs["reader"]
    assert len(check_df.df) == 50
Example #7
0
def test_iter_records(client: Client):
    client._iter_records = Mock()
    p = Project(name="proj", client=client, project_id=123)
    p.iter_records()
    _, _, kwargs = p.client._iter_records.mock_calls[0]
    assert kwargs["project"] == "proj"
Example #8
0
def test_bulk_record_summary_count(client: Client):
    samples = client.get_sample("safecast", params={"limit": 150})
    with temporary_project(client) as project:
        summary = project.send_bulk(samples)
        assert summary.records_sent == len(samples)
Example #9
0
def test_get_sample_not_found(client: Client):
    with pytest.raises(NotFound):
        client.get_sample("this_sample_not_found")