def test_analyze_resource_detailed():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert list(analysis.keys()) == [
        "variableTypes",
        "notNullRows",
        "rowsWithNullValues",
        "fieldStats",
        "correlations",
        "averageRecordSizeInBytes",
        "timeTaken",
        "hash",
        "bytes",
        "fields",
        "rows",
    ]
    assert round(analysis["averageRecordSizeInBytes"]) == 85 if IS_UNIX else 86
    assert analysis["fields"] == 11
    assert analysis["rows"] == 9
    assert analysis["rowsWithNullValues"] == 2
    assert analysis["notNullRows"] == 7
    assert analysis["variableTypes"] == {
        "boolean": 2,
        "integer": 2,
        "number": 2,
        "string": 5,
    }
def test_analyze_resource_detailed_descriptive_statistics_variables_correlation(
):
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert list(analysis["correlations"].keys()) == [
        "parent_age",
        "parent_salary",
        "house_area",
        "average_grades",
    ]
    assert (analysis["correlations"]["average_grades"][0]["fieldName"]
            == "parent_age"
            and analysis["correlations"]["average_grades"][0]["corr"]
            == -0.09401771232099933)
    assert (analysis["correlations"]["average_grades"][1]["fieldName"]
            == "parent_salary"
            and analysis["correlations"]["average_grades"][1]["corr"]
            == 0.4241304392492213)
    assert (analysis["correlations"]["average_grades"][2]["fieldName"]
            == "house_area"
            and analysis["correlations"]["average_grades"][2]["corr"]
            == 0.14354348594097088)
    assert (analysis["correlations"]["average_grades"][3]["fieldName"]
            == "average_grades"
            and analysis["correlations"]["average_grades"][3]["corr"] == 1.0)
def test_analyze_resource_with_invalid_data():
    resource = Resource({"path": "data/invalid.csv"})
    analysis = resource.analyze()
    assert round(analysis["averageRecordSizeInBytes"]) == 12 if IS_UNIX else 14
    assert analysis["fields"] == 4
    assert analysis["fieldStats"] == {}
    assert analysis["rows"] == 4
    assert analysis["rowsWithNullValues"] == 3
    assert analysis["notNullRows"] == 1
    assert analysis["variableTypes"] == {}
def test_analyze_resource_detailed_with_invalid_data():
    resource = Resource({"path": "data/invalid.csv"})
    analysis = resource.analyze(detailed=True)
    assert round(analysis["averageRecordSizeInBytes"]) == 12 if IS_UNIX else 14
    assert analysis["fields"] == 4
    assert list(
        analysis["fieldStats"].keys()) == ["id", "name", "field3", "name2"]
    assert analysis["rows"] == 4
    assert analysis["rowsWithNullValues"] == 3
    assert analysis["notNullRows"] == 1
    assert analysis["variableTypes"] == {"integer": 3, "string": 1}
def test_analyze_resource_detailed_non_numeric_data_identification():
    data = [
        ["gender", "country"],
        ["male", "usa"],
        ["female", "usa"],
        ["male", "italy"],
        ["female", "italy"],
        ["female", "italy"],
    ]
    resource = Resource(data)
    analysis = resource.analyze(detailed=True)
    assert analysis["fieldStats"]["gender"]["type"] == "categorical"
    assert analysis["fieldStats"]["gender"]["values"] == {"male", "female"}
    assert analysis["fieldStats"]["country"]["type"] == "categorical"
    assert analysis["fieldStats"]["country"]["values"] == {"usa", "italy"}
def test_analyze_resource_detailed_numeric_descriptive_statistics():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert analysis["fieldStats"]["parent_age"]["bounds"] == [39, 67]
    assert analysis["fieldStats"]["parent_age"]["max"] == 57
    assert analysis["fieldStats"]["parent_age"]["mean"] == 52.666666666666664
    assert analysis["fieldStats"]["parent_age"]["median"] == 52
    assert analysis["fieldStats"]["parent_age"]["min"] == 48
    assert analysis["fieldStats"]["parent_age"]["missingValues"] == 0
    assert analysis["fieldStats"]["parent_age"]["mode"] == 57
    assert analysis["fieldStats"]["parent_age"]["quantiles"] == [
        49.5, 52.0, 56.5
    ]
    assert analysis["fieldStats"]["parent_age"]["stdev"] == 3.391164991562634
    assert analysis["fieldStats"]["parent_age"]["uniqueValues"] == 7
    assert analysis["fieldStats"]["parent_age"]["variance"] == 11.5
    assert analysis["fieldStats"]["parent_age"]["outliers"] == []
def test_analyze_resource_detailed_with_empty_rows():
    data = [["a", "b"]]
    resource = Resource(data)
    analysis = resource.analyze(detailed=True)
    assert list(analysis.keys()) == [
        "variableTypes",
        "notNullRows",
        "rowsWithNullValues",
        "fieldStats",
        "averageRecordSizeInBytes",
        "timeTaken",
        "hash",
        "bytes",
        "fields",
        "rows",
    ]
    assert analysis["rows"] == 0
def test_analyze_resource_detailed_numeric_values_descriptive_summray():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert list(analysis["fieldStats"]["parent_age"].keys()) == [
        "type",
        "mean",
        "median",
        "mode",
        "variance",
        "quantiles",
        "stdev",
        "max",
        "min",
        "bounds",
        "uniqueValues",
        "outliers",
        "missingValues",
    ]
def test_analyze_resource_detailed_numeric_descriptive_statistics_with_missingValues(
):
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert analysis["fieldStats"]["average_grades"]["bounds"] == [81, 96]
    assert analysis["fieldStats"]["average_grades"]["max"] == 10000.0
    assert analysis["fieldStats"]["average_grades"]["mean"] == 1503.28
    assert analysis["fieldStats"]["average_grades"]["median"] == 86.91
    assert analysis["fieldStats"]["average_grades"]["min"] == 84.65
    assert analysis["fieldStats"]["average_grades"]["missingValues"] == 2
    assert analysis["fieldStats"]["average_grades"]["mode"] == 86.79
    assert analysis["fieldStats"]["average_grades"]["quantiles"] == [
        86.79, 86.91, 90.39
    ]
    assert round(analysis["fieldStats"]["average_grades"]["stdev"]) == 3747
    assert analysis["fieldStats"]["average_grades"]["uniqueValues"] == 6
    assert round(
        analysis["fieldStats"]["average_grades"]["variance"]) == 14037774
    assert analysis["fieldStats"]["average_grades"]["outliers"] == [10000.0]
def test_analyze_resource():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze()
    assert list(analysis.keys()) == [
        "variableTypes",
        "notNullRows",
        "rowsWithNullValues",
        "fieldStats",
        "averageRecordSizeInBytes",
        "timeTaken",
        "hash",
        "bytes",
        "fields",
        "rows",
    ]
    assert round(analysis["averageRecordSizeInBytes"]) == 85 if IS_UNIX else 86
    assert analysis["fields"] == 11
    assert analysis["rows"] == 9
    assert analysis["rowsWithNullValues"] == 2
    assert analysis["notNullRows"] == 7
    assert analysis["variableTypes"] == {}
def test_analyze_resource_detailed_non_numeric_values_summary():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert list(analysis["fieldStats"]["gender"].keys()) == ["type", "values"]
def test_analyze_resource_detailed_descriptive_statistics_with_outliers():
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert analysis["fieldStats"]["average_grades"]["bounds"] == [81, 96]
    assert analysis["fieldStats"]["average_grades"]["outliers"] == [10000.0]