def test_resample():
    data = mdpp.load_data(
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        csv_style={"sep": ",", "decimal": "."},
    )
    resampled = mdpp.data_consolidation(data, datetime_column="Date", freq="M")
    assert len(resampled) < len(data) and len(resampled) > 1
def test_list_of_dicts():
    assert mdpp.load_data(
        [
            {"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]},
            {"col_1": [4, 2, 1, 0], "col_2": [3, 2, 1, 0]},
            {"col_1": [5, 2, 1, 0], "col_2": [3, 2, 1, 0]},
        ],
        data_orientation="columns",
    ).ndim
def test_more_files():

    data_loaded = mdpp.load_data(
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        request_datatype_suffix=".json",
        predicted_table="txs",
        data_orientation="index",
    )
    data_loaded2 = mdpp.load_data(
        [
            "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
            "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        ],
        request_datatype_suffix=".json",
        predicted_table="txs",
        data_orientation="index",
    )

    assert len(data_loaded2) == 2 * len(data_loaded)
def test_local_files():

    test_files = Path(__file__).parent / "test_files"
    xls = mdpp.load_data(test_files / "file_example_xls.xls")
    xlsx = mdpp.load_data(test_files / "file_example_xlsx.xlsx")

    df_imported = pd.read_csv(
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv"
    )
    df_part = df_imported.iloc[:10, :]

    df_imported.to_csv("tested.csv")
    df_part.to_csv("tested2.csv")
    df_imported.to_parquet("tested.parquet", compression="gzip")
    df_imported.to_hdf("tested.h5", key="df")

    loaded_data = requests.get("https://blockchain.info/unconfirmed-transactions?format=json").content
    with open("tested.json", "w") as outfile:
        json.dump(json.loads(loaded_data), outfile)

    try:
        df_csv = mdpp.load_data("tested.csv")
        df_csv_joined = mdpp.load_data(["tested.csv", "tested2.csv"])
        df_json = mdpp.data_consolidation(
            mdpp.load_data("tested.json", request_datatype_suffix=".json", predicted_table="txs")
        )
        df_parquet = mdpp.load_data("tested.parquet")
        df_hdf = mdpp.load_data("tested.h5")

    except Exception:
        pass

    finally:
        os.remove("tested.csv")
        os.remove("tested2.csv")
        os.remove("tested.json")
        os.remove("tested.parquet")
        os.remove("tested.h5")

    assert all(
        [
            xls.ndim,
            xlsx.ndim,
            df_csv.ndim,
            df_json.ndim,
            df_parquet.ndim,
            df_hdf.ndim,
            len(df_csv_joined) == len(df_csv) + 10,
        ]
    )
def test_integration():

    # Load data from file or URL
    data_loaded = mdpp.load_data(
        "https://blockchain.info/unconfirmed-transactions?format=json",
        request_datatype_suffix=".json",
        predicted_table="txs",
        data_orientation="index",
    )

    # Transform various data into defined format - pandas dataframe - convert to numeric if possible, keep
    # only numeric data and resample ifg configured. It return array, dataframe
    data_consolidated = mdpp.data_consolidation(
        data_loaded,
        predicted_column="weight",
        remove_nans_threshold=0.9,
        remove_nans_or_replace="interpolate",
    )

    # Preprocess data. It return preprocessed data, but also last undifferenced value and scaler for inverse
    # transformation, so unpack it with _
    data_preprocessed_df, _, _ = mdpp.preprocess_data(
        data_consolidated,
        remove_outliers=True,
        smoothit=(11, 2),
        correlation_threshold=False,
        data_transform=True,
        standardizeit="standardize",
    )

    data_preprocessed, _, _ = mdpp.preprocess_data(
        data_consolidated.values,
        remove_outliers=True,
        smoothit=(11, 2),
        correlation_threshold=0.9,
        data_transform="difference",
        standardizeit="01",
    )

    assert not np.isnan(np.min(data_preprocessed_df.values)) and not np.isnan(np.min(data_preprocessed))
def test_exceptions():

    exceptions = []

    try:
        mdpp.load_data("testfile")
    except Exception as e:
        exceptions.append(isinstance(e, FileNotFoundError))

    try:
        test_file = test_dir_path / "data_test"
        mdpp.load_data(test_file)
    except Exception as e:
        exceptions.append(isinstance(e, TypeError))

    try:
        mdpp.load_data("https://blockchain.info/unconfirmed-transactions?format=json")
    except Exception as e:
        exceptions.append(isinstance(e, TypeError))

    assert all(exceptions)
def test_tuple():
    assert mdpp.load_data((("Jon", "Smith", 21), ("Mark", "Brown", 38), ("Maria", "Lee", 42))).ndim
def test_list():
    assert mdpp.load_data([["Jon", "Smith", 21], ["Mark", "Brown", 38], ["Maria", "Lee", 42]]).ndim
def test_dict():
    assert mdpp.load_data({"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}, data_orientation="index").ndim
def test_numpys_and_pandas():
    assert (
        mdpp.load_data([np.random.randn(20, 3), np.random.randn(25, 3)]).ndim
        and mdpp.load_data((pd.DataFrame(np.random.randn(20, 3)), pd.DataFrame(np.random.randn(25, 3)))).ndim
    )
def test_numpy_and_dataframe():
    assert (
        mdpp.load_data(np.random.randn(100, 3)).ndim
        and mdpp.load_data(pd.DataFrame(np.random.randn(100, 3))).ndim
    )
def test_test_data():
    assert mdpp.load_data("test").ndim