Exemple #1
0
def test_fwf_file_chunksize():
    setup_fwf_file(overwrite=True)

    # Tests __next__ and correctness of reader as an iterator
    rdf_reader = pd.read_fwf(TEST_FWF_FILENAME, chunksize=5)
    pd_reader = pandas.read_fwf(TEST_FWF_FILENAME, chunksize=5)

    for modin_df, pd_df in zip(rdf_reader, pd_reader):
        df_equals(modin_df, pd_df)

    # Tests that get_chunk works correctly
    rdf_reader = pd.read_fwf(TEST_FWF_FILENAME, chunksize=1)
    pd_reader = pandas.read_fwf(TEST_FWF_FILENAME, chunksize=1)

    modin_df = rdf_reader.get_chunk(1)
    pd_df = pd_reader.get_chunk(1)

    df_equals(modin_df, pd_df)

    # Tests that read works correctly
    rdf_reader = pd.read_fwf(TEST_FWF_FILENAME, chunksize=1)
    pd_reader = pandas.read_fwf(TEST_FWF_FILENAME, chunksize=1)

    modin_df = rdf_reader.read()
    pd_df = pd_reader.read()

    df_equals(modin_df, pd_df)
Exemple #2
0
def test_fwf_file_parse_dates():
    dates = pandas.date_range("2000", freq="h", periods=10)
    fwf_data = "col1 col2        col3 col4"
    for i in range(10, 20):
        fwf_data = fwf_data + "\n{col1}   {col2}  {col3}   {col4}".format(
            col1=str(i),
            col2=str(dates[i - 10].date()),
            col3=str(i),
            col4=str(dates[i - 10].time()),
        )

    setup_fwf_file(overwrite=True, fwf_data=fwf_data)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME,
                                parse_dates=[["col2", "col4"]])
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]])
    df_equals(modin_df, pandas_df)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME,
                                parse_dates={"time": ["col2", "col4"]})
    modin_df = pd.read_fwf(TEST_FWF_FILENAME,
                           parse_dates={"time": ["col2", "col4"]})
    df_equals(modin_df, pandas_df)

    teardown_fwf_file()
Exemple #3
0
def test_fwf_file_colspecs_widths(kwargs):
    setup_fwf_file(overwrite=True)

    modin_df = pd.read_fwf(TEST_FWF_FILENAME, **kwargs)
    pandas_df = pd.read_fwf(TEST_FWF_FILENAME, **kwargs)

    df_equals(modin_df, pandas_df)
Exemple #4
0
def test_fwf_file_skiprows():
    setup_fwf_file(overwrite=True)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, skiprows=2)
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, skiprows=2)
    df_equals(modin_df, pandas_df)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, usecols=[0, 4, 7], skiprows=[2, 5])
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, usecols=[0, 4, 7], skiprows=[2, 5])
    df_equals(modin_df, pandas_df)
Exemple #5
0
def test_fwf_file_skipfooter():
    setup_fwf_file(overwrite=True)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, skipfooter=2)
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, skipfooter=2)

    df_equals(modin_df, pandas_df)
Exemple #6
0
def test_fwf_file():
    setup_fwf_file()

    colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)]
    with pytest.warns(UserWarning):
        df = pd.read_fwf(TEST_FWF_FILENAME, colspecs=colspecs, header=None, index_col=0)
        assert isinstance(df, pd.DataFrame)

    teardown_fwf_file()
Exemple #7
0
def test_fwf_file():
    fwf_data = """id8141  360.242940  149.910199 11950.7
id1594  444.953632  166.985655 11788.4
id1849  364.136849  183.628767 11806.2
id1230  413.836124  184.375703 11916.8
id1948  502.953953  173.237159 12468.3"""

    setup_fwf_file(True, fwf_data=fwf_data)

    colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)]
    df = pd.read_fwf(TEST_FWF_FILENAME, colspecs=colspecs, header=None, index_col=0)
    assert isinstance(df, pd.DataFrame)

    teardown_fwf_file()
Exemple #8
0
def test_fwf_file_index_col():
    fwf_data = """a       b           c          d
id8141  360.242940  149.910199 11950.7
id1594  444.953632  166.985655 11788.4
id1849  364.136849  183.628767 11806.2
id1230  413.836124  184.375703 11916.8
id1948  502.953953  173.237159 12468.3"""

    setup_fwf_file(overwrite=True, fwf_data=fwf_data)

    pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, index_col="c")
    modin_df = pd.read_fwf(TEST_FWF_FILENAME, index_col="c")
    df_equals(modin_df, pandas_df)

    teardown_fwf_file()
Exemple #9
0
def test_fwf_file_kwargs():
    fwf_data = """ACW000116041961TAVG -142  k  183  k  419  k  720  k 1075  k 1546  k 1517  k 1428  k 1360  k 1121  k  457  k  -92  k
ACW000116041962TAVG   60  k   32  k -207  k  582  k  855  k 1328  k 1457  k 1340  k 1110  k  941  k  270  k -179  k
ACW000116041963TAVG -766  k -606  k -152  k  488  k 1171  k 1574  k 1567  k 1543  k 1279  k  887  k  513  k -161  k
ACW000116041964TAVG    9  k -138  k    2  k  685  k 1166  k 1389  k 1453  k 1504  k 1168  k  735  k  493  k   59  k
ACW000116041965TAVG   -9  k -158  k  -15  k  537  k  934  k 1447  k 1434  k 1424  k 1324  k  921  k  -22  k -231  k
ACW000116041966TAVG -490  k -614  k  108  k  246  k 1082  k 1642  k 1620  k 1471  k 1195  k  803  k  329  k    2  k
ACW000116041967TAVG -270  k   36  k  397  k  481  k 1052  k 1373  k 1655  k 1598  k 1318  k  997  k  559  k  -96  k
ACW000116041968TAVG -306  k -183  k  220  k  714  k  935  k 1635  k 1572  k 1718  k 1331  k  781  k  180  k  -56  k
ACW000116041969TAVG -134  k -494  k -185  k  497  k  962  k 1634  k 1687  k 1773  k 1379  k  932  k  321  k -275  k
ACW000116041970TAVG -483  k -704  k  -75  k  261  k 1093  k 1724  k 1470  k 1609  k 1163  k  836  k  300  k   73  k
ACW000116041971TAVG   -6  k   83  k  -40  k  472  k 1180  k 1411  k 1700  k 1600  k 1165  k  908  k  361  k  383  k
ACW000116041972TAVG -377  k   -4  k  250  k  556  k 1117  k 1444  k 1778  k 1545  k 1073  k  797  k  481  k  404  k
ACW000116041973TAVG   61  k  169  k  453  k  472  k 1075  k 1545  k 1866  k 1579  k 1199  k  563  k  154  k   11  k
ACW000116041974TAVG  191  k  209  k  339  k  748  k 1094  k 1463  k 1498  k 1541  k 1319  k  585  k  428  k  335  k
ACW000116041975TAVG  346  k   88  k  198  k  488  k 1165  k 1483  k 1756  k 1906  k 1374  k  845  k  406  k  387  k
ACW000116041976TAVG -163  k  -62  k -135  k  502  k 1128  k 1461  k 1822  k 1759  k 1136  k  715  k  458  k -205  k
ACW000116041977TAVG -192  k -279  k  234  k  332  k 1128  k 1566  k 1565  k 1556  k 1126  k  949  k  421  k  162  k
ACW000116041978TAVG   55  k -354  k   66  k  493  k 1155  k 1552  k 1564  k 1555  k 1061  k  932  k  688  k -464  k
ACW000116041979TAVG -618  k -632  k   35  k  474  k  993  k 1566  k 1484  k 1483  k 1229  k  647  k  412  k  -40  k
ACW000116041980TAVG -340  k -500  k  -35  k  524  k 1071  k 1534  k 1655  k 1502  k 1269  k  660  k  138  k  125  k"""
    with open(TEST_FWF_FILENAME, "w") as f:
        f.write(fwf_data)

    column_specs = [
        (0, 11),
        (11, 15),
        (19, 24),
        (27, 32),
        (35, 40),
        (43, 48),
        (51, 56),
        (59, 64),
        (67, 72),
        (75, 80),
        (83, 88),
        (91, 96),
        (99, 104),
        (107, 112),
    ]
    column_names = ["stationID", "year", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    nan_values = ["-9999"]
    with pytest.warns(UserWarning):
        modin_df = pd.read_fwf(
            TEST_FWF_FILENAME,
            colspecs=column_specs,
            names=column_names,
            na_values=nan_values,
            index_col=["stationID", "year"],
        )
    pandas_df = pd.read_fwf(
        TEST_FWF_FILENAME,
        colspecs=column_specs,
        names=column_names,
        na_values=nan_values,
        index_col=["stationID", "year"],
    )

    df_equals(modin_df, pandas_df)

    teardown_fwf_file()