Example #1
0
    def setUp(self):
        self.seriesd = common.getSeriesData()
        self.tsd = common.getTimeSeriesData()

        self.frame = self.klass(self.seriesd)
        self.intframe = self.klass(
            dict((k, v.astype(int)) for k, v in self.seriesd.iteritems()))

        self.tsframe = self.klass(self.tsd)

        self.mixed_frame = self.frame.copy()
        self.mixed_frame['foo'] = 'bar'

        self.ts1 = common.makeTimeSeries()
        self.ts2 = common.makeTimeSeries()[5:]
        self.ts3 = common.makeTimeSeries()[-5:]
        self.ts4 = common.makeTimeSeries()[1:-1]

        self.ts_dict = {
            'col1': self.ts1,
            'col2': self.ts2,
            'col3': self.ts3,
            'col4': self.ts4,
        }
        self.empty = self.klass({})

        self.unsortable = self.klass(
            {
                'foo': [1] * 1000,
                datetime.today(): [1] * 1000,
                'bar': ['bar'] * 1000,
                datetime.today() + timedelta(1): ['bar'] * 1000
            },
            index=np.arange(1000))
Example #2
0
def float_frame():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                       A         B         C         D
    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
    ...              ...       ...       ...       ...
    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426

    [30 rows x 4 columns]
    """
    return DataFrame(tm.getSeriesData())
Example #3
0
def mixed_float_frame():
    """
    Fixture for DataFrame of different float types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                       A         B         C         D
    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
    ...              ...       ...       ...       ...
    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData())
    df.A = df.A.astype('float32')
    df.B = df.B.astype('float32')
    df.C = df.C.astype('float16')
    df.D = df.D.astype('float64')
    return df
Example #4
0
def test_convert_r_matrix():

    is_na = robj.baseenv.get("is.na")

    seriesd = _test.getSeriesData()
    frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
    # Null data
    frame["E"] = [np.nan for item in frame["A"]]

    r_dataframe = convert_to_r_matrix(frame)

    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
    assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

    for column in frame[["A", "B", "C", "D"]]:
        coldata = r_dataframe.rx(True, column)
        original_data = frame[column]
        assert np.array_equal(convert_robj(coldata),
                              original_data)

    # Pandas bug 1282
    frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

    # FIXME: Ugly, this whole module needs to be ported to nose/unittest
    try:
        wrong_matrix = convert_to_r_matrix(frame)
    except TypeError:
        pass
    except Exception:
        raise
Example #5
0
def float_frame2():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['D', 'C', 'B', 'A']
    """
    return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A'])
Example #6
0
def float_string_frame():
    """
    Fixture for DataFrame of floats and strings with index of unique strings

    Columns are ['A', 'B', 'C', 'D', 'foo'].

                       A         B         C         D  foo
    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
    ...              ...       ...       ...       ...  ...
    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar

    [30 rows x 5 columns]
    """
    df = DataFrame(tm.getSeriesData())
    df["foo"] = "bar"
    return df
Example #7
0
def float_frame_with_na():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing

                       A         B         C         D
    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
    soujjZ0A08       NaN       NaN       NaN       NaN
    7W6NLGsjB9       NaN       NaN       NaN       NaN
    ...              ...       ...       ...       ...
    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData())
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #8
0
    def setUp(self):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = df()
        self.df_mixed_floats = DataFrame(
            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
             'C': np.random.randn(8),
             'D': np.array(
                 np.random.randn(8), dtype='float32')})

        self.mframe = mframe()

        self.three_group = DataFrame(
            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
                   'foo', 'foo', 'foo'],
             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
                   'two', 'two', 'one'],
             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
                   'dull', 'shiny', 'shiny', 'shiny'],
             'D': np.random.randn(11),
             'E': np.random.randn(11),
             'F': np.random.randn(11)})
Example #9
0
def int_frame():
    """
    Fixture for DataFrame of ints with index of unique strings

    Columns are ['A', 'B', 'C', 'D']

                A  B  C  D
    vpBeWjM651  1  0  1  0
    5JyxmrP1En -1  0  0  0
    qEDaoD49U2 -1  1  0  0
    m66TkTfsFe  0  0  0  0
    EHPaNzEUFm -1  0 -1  0
    fpRJCevQhi  2  0  0  0
    OlQvnmfi3Q  0  0 -2  0
    ...        .. .. .. ..
    uB1FPlz4uP  0  0  0  1
    EcSe6yNzCU  0  0 -1  0
    L50VudaiI8 -1  1 -2  0
    y3bpw4nwIp  0 -1  0  0
    H0RdLLwrCT  1  1  0  0
    rY82K0vMwm  0  0  0  0
    1OPIUjnkjk  2  0  0  0

    [30 rows x 4 columns]
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    # force these all to int64 to avoid platform testing issues
    return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
Example #10
0
def float_frame():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                       A         B         C         D
    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
    ...              ...       ...       ...       ...
    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426

    [30 rows x 4 columns]
    """
    return DataFrame(tm.getSeriesData())
Example #11
0
    def test_convert_r_matrix(self):

        is_na = robj.baseenv.get("is.na")

        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"])
        # Null data
        frame["E"] = [np.nan for item in frame["A"]]

        r_dataframe = com.convert_to_r_matrix(frame)

        assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index)
        assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns)
        assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

        for column in frame[["A", "B", "C", "D"]]:
            coldata = r_dataframe.rx(True, column)
            original_data = frame[column]
            assert np.array_equal(com.convert_robj(coldata), original_data)

        # Pandas bug 1282
        frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

        try:
            wrong_matrix = com.convert_to_r_matrix(frame)
        except TypeError:
            pass
        except Exception:
            raise
Example #12
0
def test_convert_r_matrix():

    is_na = robj.baseenv.get("is.na")

    seriesd = _test.getSeriesData()
    frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
    #Null data
    frame["E"] = [np.nan for item in frame["A"]]

    r_dataframe = convert_to_r_matrix(frame)

    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
    assert all(is_na(item) for item in r_dataframe.rx(True, "E"))

    for column in frame[["A", "B", "C", "D"]]:
        coldata = r_dataframe.rx(True, column)
        original_data = frame[column]
        assert np.array_equal(convert_robj(coldata), original_data)

    # Pandas bug 1282
    frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

    #FIXME: Ugly, this whole module needs to be ported to nose/unittest
    try:
        wrong_matrix = convert_to_r_matrix(frame)
    except TypeError:
        pass
    except Exception:
        raise
Example #13
0
def float_frame_with_na():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing

                       A         B         C         D
    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
    soujjZ0A08       NaN       NaN       NaN       NaN
    7W6NLGsjB9       NaN       NaN       NaN       NaN
    ...              ...       ...       ...       ...
    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData())
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #14
0
def test_convert_r_dataframe():

    is_na = robj.baseenv.get("is.na")

    seriesd = _test.getSeriesData()
    frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

    #Null data
    frame["E"] = [np.nan for item in frame["A"]]
    # Some mixed type data
    frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

    r_dataframe = convert_to_r_dataframe(frame)

    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
    assert all(is_na(item) for item in r_dataframe.rx2("E"))

    for column in frame[["A", "B", "C", "D"]]:
        coldata = r_dataframe.rx2(column)
        original_data = frame[column]
        assert np.array_equal(convert_robj(coldata), original_data)

    for column in frame[["D", "E"]]:
        for original, converted in zip(frame[column], r_dataframe.rx2(column)):

            if pd.isnull(original):
                assert is_na(converted)
            else:
                assert original == converted
def float_frame():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].
    """
    return DataFrame(tm.getSeriesData())
Example #16
0
def bool_frame_with_na():
    """
    Fixture for DataFrame of booleans with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing

                    A      B      C      D
    zBZxY2IDGd  False  False  False  False
    IhBWBMWllt  False   True   True   True
    ctjdvZSR6R   True  False   True   True
    AVTujptmxb  False   True  False   True
    G9lrImrSWq  False  False  False   True
    sFFwdIUfz2    NaN    NaN    NaN    NaN
    s15ptEJnRb    NaN    NaN    NaN    NaN
    ...           ...    ...    ...    ...
    UW41KkDyZ4   True   True  False  False
    l9l6XkOdqV   True  False  False  False
    X2MeZfzDYA  False   True  False  False
    xWkIKU7vfX  False   True  False   True
    QOhL6VmpGU  False  False  False   True
    22PwkRJdat  False   True  False  False
    kfboQ3VeIK   True  False   True  False

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData()) > 0
    df = df.astype(object)
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #17
0
def int_frame():
    """
    Fixture for DataFrame of ints with index of unique strings

    Columns are ['A', 'B', 'C', 'D']

                A  B  C  D
    vpBeWjM651  1  0  1  0
    5JyxmrP1En -1  0  0  0
    qEDaoD49U2 -1  1  0  0
    m66TkTfsFe  0  0  0  0
    EHPaNzEUFm -1  0 -1  0
    fpRJCevQhi  2  0  0  0
    OlQvnmfi3Q  0  0 -2  0
    ...        .. .. .. ..
    uB1FPlz4uP  0  0  0  1
    EcSe6yNzCU  0  0 -1  0
    L50VudaiI8 -1  1 -2  0
    y3bpw4nwIp  0 -1  0  0
    H0RdLLwrCT  1  1  0  0
    rY82K0vMwm  0  0  0  0
    1OPIUjnkjk  2  0  0  0

    [30 rows x 4 columns]
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    # force these all to int64 to avoid platform testing issues
    return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
Example #18
0
def bool_frame_with_na():
    """
    Fixture for DataFrame of booleans with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing

                    A      B      C      D
    zBZxY2IDGd  False  False  False  False
    IhBWBMWllt  False   True   True   True
    ctjdvZSR6R   True  False   True   True
    AVTujptmxb  False   True  False   True
    G9lrImrSWq  False  False  False   True
    sFFwdIUfz2    NaN    NaN    NaN    NaN
    s15ptEJnRb    NaN    NaN    NaN    NaN
    ...           ...    ...    ...    ...
    UW41KkDyZ4   True   True  False  False
    l9l6XkOdqV   True  False  False  False
    X2MeZfzDYA  False   True  False  False
    xWkIKU7vfX  False   True  False   True
    QOhL6VmpGU  False  False  False   True
    22PwkRJdat  False   True  False  False
    kfboQ3VeIK   True  False   True  False

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData()) > 0
    df = df.astype(object)
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #19
0
def mixed_int_frame():
    """
    Fixture for DataFrame of different int types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                A  B    C    D
    mUrCZ67juP  0  1    2    2
    rw99ACYaKS  0  1    0    0
    7QsEcpaaVU  0  1    1    1
    xkrimI2pcE  0  1    0    0
    dz01SuzoS8  0  1  255  255
    ccQkqOHX75 -1  1    0    0
    DN0iXaoDLd  0  1    0    0
    ...        .. ..  ...  ...
    Dfb141wAaQ  1  1  254  254
    IPD8eQOVu5  0  1    0    0
    CcaKulsCmv  0  1    0    0
    rIBa8gu7E5  0  1    0    0
    RP6peZmh5o  0  1    1    1
    NMb9pipQWQ  0  1    0    0
    PqgbJEzjib  0  1    3    3

    [30 rows x 4 columns]
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    df.A = df.A.astype('int32')
    df.B = np.ones(len(df.B), dtype='uint64')
    df.C = df.C.astype('uint8')
    df.D = df.C.astype('int64')
    return df
Example #20
0
def mixed_int_frame():
    """
    Fixture for DataFrame of different int types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                A  B    C    D
    mUrCZ67juP  0  1    2    2
    rw99ACYaKS  0  1    0    0
    7QsEcpaaVU  0  1    1    1
    xkrimI2pcE  0  1    0    0
    dz01SuzoS8  0  1  255  255
    ccQkqOHX75 -1  1    0    0
    DN0iXaoDLd  0  1    0    0
    ...        .. ..  ...  ...
    Dfb141wAaQ  1  1  254  254
    IPD8eQOVu5  0  1    0    0
    CcaKulsCmv  0  1    0    0
    rIBa8gu7E5  0  1    0    0
    RP6peZmh5o  0  1    1    1
    NMb9pipQWQ  0  1    0    0
    PqgbJEzjib  0  1    3    3

    [30 rows x 4 columns]
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    df.A = df.A.astype("int32")
    df.B = np.ones(len(df.B), dtype="uint64")
    df.C = df.C.astype("uint8")
    df.D = df.C.astype("int64")
    return df
Example #21
0
def test_convert_r_dataframe():

    is_na = robj.baseenv.get("is.na")

    seriesd = _test.getSeriesData()
    frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

    # Null data
    frame["E"] = [np.nan for item in frame["A"]]
    # Some mixed type data
    frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]

    r_dataframe = convert_to_r_dataframe(frame)

    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
    assert all(is_na(item) for item in r_dataframe.rx2("E"))

    for column in frame[["A", "B", "C", "D"]]:
        coldata = r_dataframe.rx2(column)
        original_data = frame[column]
        assert np.array_equal(convert_robj(coldata), original_data)

    for column in frame[["D", "E"]]:
        for original, converted in zip(frame[column],
                                       r_dataframe.rx2(column)):

            if pd.isnull(original):
                assert is_na(converted)
            else:
                assert original == converted
Example #22
0
def float_string_frame():
    """
    Fixture for DataFrame of floats and strings with index of unique strings

    Columns are ['A', 'B', 'C', 'D', 'foo'].

                       A         B         C         D  foo
    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
    ...              ...       ...       ...       ...  ...
    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar

    [30 rows x 5 columns]
    """
    df = DataFrame(tm.getSeriesData())
    df['foo'] = 'bar'
    return df
Example #23
0
    def setUp(self):
        self.seriesd = common.getSeriesData()
        self.tsd = common.getTimeSeriesData()

        self.frame = self.klass(self.seriesd)
        self.intframe = self.klass(dict((k, v.astype(int))
                                        for k, v in self.seriesd.iteritems()))

        self.tsframe = self.klass(self.tsd)

        self.mixed_frame = self.frame.copy()
        self.mixed_frame['foo'] = 'bar'

        self.ts1 = common.makeTimeSeries()
        self.ts2 = common.makeTimeSeries()[5:]
        self.ts3 = common.makeTimeSeries()[-5:]
        self.ts4 = common.makeTimeSeries()[1:-1]

        self.ts_dict = {
            'col1' : self.ts1,
            'col2' : self.ts2,
            'col3' : self.ts3,
            'col4' : self.ts4,
        }
        self.empty = self.klass({})

        self.unsortable = self.klass(
            {'foo' : [1] * 1000,
             datetime.today() : [1] * 1000,
             'bar' : ['bar'] * 1000,
             datetime.today() + timedelta(1) : ['bar'] * 1000},
            index=np.arange(1000))
Example #24
0
def mixed_float_frame():
    """
    Fixture for DataFrame of different float types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].

                       A         B         C         D
    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
    ...              ...       ...       ...       ...
    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502

    [30 rows x 4 columns]
    """
    df = DataFrame(tm.getSeriesData())
    df.A = df.A.astype("float32")
    df.B = df.B.astype("float32")
    df.C = df.C.astype("float16")
    df.D = df.D.astype("float64")
    return df
Example #25
0
def float_string_frame():
    """
    Fixture for DataFrame of floats and strings with index of unique strings

    Columns are ['A', 'B', 'C', 'D', 'foo'].
    """
    df = DataFrame(tm.getSeriesData())
    df['foo'] = 'bar'
    return df
Example #26
0
def int_frame():
    """
    Fixture for DataFrame of ints with index of unique strings

    Columns are ['A', 'B', 'C', 'D']
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    # force these all to int64 to avoid platform testing issues
    return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
def float_string_frame():
    """
    Fixture for DataFrame of floats and strings with index of unique strings

    Columns are ['A', 'B', 'C', 'D', 'foo'].
    """
    df = DataFrame(tm.getSeriesData())
    df['foo'] = 'bar'
    return df
Example #28
0
def int_frame():
    """
    Fixture for DataFrame of ints with index of unique strings

    Columns are ['A', 'B', 'C', 'D']
    """
    df = DataFrame({k: v.astype(int)
                   for k, v in compat.iteritems(tm.getSeriesData())})
    # force these all to int64 to avoid platform testing issues
    return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64)
Example #29
0
    def setup_method(self, method):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = DataFrame({
            'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
            'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
            'C':
            np.random.randn(8),
            'D':
            np.random.randn(8)
        })

        self.df_mixed_floats = DataFrame({
            'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
            'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
            'C':
            np.random.randn(8),
            'D':
            np.array(np.random.randn(8), dtype='float32')
        })

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        self.mframe = DataFrame(np.random.randn(10, 3),
                                index=index,
                                columns=['A', 'B', 'C'])

        self.three_group = DataFrame({
            'A': [
                'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo',
                'foo', 'foo'
            ],
            'B': [
                'one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two',
                'two', 'one'
            ],
            'C': [
                'dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
                'dull', 'shiny', 'shiny', 'shiny'
            ],
            'D':
            np.random.randn(11),
            'E':
            np.random.randn(11),
            'F':
            np.random.randn(11)
        })
Example #30
0
def float_frame_with_na():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    df = DataFrame(tm.getSeriesData())
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
def float_frame_with_na():
    """
    Fixture for DataFrame of floats with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    df = DataFrame(tm.getSeriesData())
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #32
0
def mixed_float_frame2():
    """
    Fixture for DataFrame of different float types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].
    """
    df = DataFrame(tm.getSeriesData())
    df.D = df.D.astype('float16')
    df.C = df.C.astype('float32')
    df.B = df.B.astype('float64')
    return df
Example #33
0
def mixed_float_frame2():
    """
    Fixture for DataFrame of different float types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].
    """
    df = DataFrame(tm.getSeriesData())
    df.D = df.D.astype('float32')
    df.C = df.C.astype('float32')
    df.B = df.B.astype('float16')
    df.D = df.D.astype('float64')
    return df
def bool_frame_with_na():
    """
    Fixture for DataFrame of booleans with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    df = DataFrame(tm.getSeriesData()) > 0
    df = df.astype(object)
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #35
0
def bool_frame_with_na():
    """
    Fixture for DataFrame of booleans with index of unique strings

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    df = DataFrame(tm.getSeriesData()) > 0
    df = df.astype(object)
    # set some NAs
    df.loc[5:10] = np.nan
    df.loc[15:20, -2:] = np.nan
    return df
Example #36
0
def mixed_int_frame():
    """
    Fixture for DataFrame of different int types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].
    """
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
    df.A = df.A.astype('int32')
    df.B = np.ones(len(df.B), dtype='uint64')
    df.C = df.C.astype('uint8')
    df.D = df.C.astype('int64')
    return df
Example #37
0
def mixed_int_frame():
    """
    Fixture for DataFrame of different int types with index of unique strings

    Columns are ['A', 'B', 'C', 'D'].
    """
    df = DataFrame({k: v.astype(int)
                   for k, v in compat.iteritems(tm.getSeriesData())})
    df.A = df.A.astype('int32')
    df.B = np.ones(len(df.B), dtype='uint64')
    df.C = df.C.astype('uint8')
    df.D = df.C.astype('int64')
    return df
Example #38
0
    def test_ix_assign_column_mixed(self):
        # GH #1142
        df = DataFrame(tm.getSeriesData())
        df['foo'] = 'bar'

        orig = df.loc[:, 'B'].copy()
        df.loc[:, 'B'] = df.loc[:, 'B'] + 1
        tm.assert_series_equal(df.B, orig + 1)

        # GH 3668, mixed frame with series value
        df = DataFrame({
            'x': np.arange(10),
            'y': np.arange(10, 20),
            'z': 'bar'
        })
        expected = df.copy()

        for i in range(5):
            indexer = i * 2
            v = 1000 + i * 200
            expected.loc[indexer, 'y'] = v
            assert expected.loc[indexer, 'y'] == v

        df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
        tm.assert_frame_equal(df, expected)

        # GH 4508, making sure consistency of assignments
        df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
        df.loc[[
            0,
            2,
        ], 'b'] = [100, -100]
        expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({'a': list(range(4))})
        df['b'] = np.nan
        df.loc[[1, 3], 'b'] = [100, -100]
        expected = DataFrame({
            'a': [0, 1, 2, 3],
            'b': [np.nan, 100, np.nan, -100]
        })
        tm.assert_frame_equal(df, expected)

        # ok, but chained assignments are dangerous
        # if we turn off chained assignment it will work
        with option_context('chained_assignment', None):
            df = DataFrame({'a': list(range(4))})
            df['b'] = np.nan
            df['b'].loc[[1, 3]] = [100, -100]
            tm.assert_frame_equal(df, expected)
Example #39
0
def test_convert_r_matrix():

    seriesd = _test.getSeriesData()
    frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

    r_dataframe = convert_to_r_matrix(frame)

    assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
    assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)

    for column in r_dataframe.colnames:
        coldata = r_dataframe.rx(True, column)
        original_data = frame[column]
        assert np.array_equal(convert_robj(coldata), original_data)
Example #40
0
def test_rename_inplace():
    test_data = pandas.DataFrame(tm.getSeriesData())
    modin_df = pd.DataFrame(test_data)

    df_equals(
        modin_df.rename(columns={"C": "foo"}),
        test_data.rename(columns={"C": "foo"}),
    )

    frame = test_data.copy()
    modin_frame = modin_df.copy()
    frame.rename(columns={"C": "foo"}, inplace=True)
    modin_frame.rename(columns={"C": "foo"}, inplace=True)

    df_equals(modin_frame, frame)
Example #41
0
    def test_frame_operators(self):
        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd)
        frame2 = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

        garbage = np.random.random(4)
        colSeries = pd.Series(garbage, index=np.array(frame.columns))

        idSum = frame + frame
        seriesSum = frame + colSeries

        for col, series in idSum.items():
            for idx, val in series.items():
                origVal = frame[col][idx] * 2
                if not np.isnan(val):
                    assert val == origVal
                else:
                    assert np.isnan(origVal)

        for col, series in seriesSum.items():
            for idx, val in series.items():
                origVal = frame[col][idx] + colSeries[col]
                if not np.isnan(val):
                    assert val == origVal
                else:
                    assert np.isnan(origVal)

        added = frame2 + frame2
        expected = frame2 * 2
        tm.assert_frame_equal(added, expected)

        df = pd.DataFrame({'a': ['a', None, 'b']})
        tm.assert_frame_equal(df + df,
                              pd.DataFrame({'a': ['aa', np.nan, 'bb']}))

        # Test for issue #10181
        for dtype in ('float', 'int64'):
            frames = [
                pd.DataFrame(dtype=dtype),
                pd.DataFrame(columns=['A'], dtype=dtype),
                pd.DataFrame(index=[0], dtype=dtype),
            ]
            for df in frames:
                assert (df + df).equals(df)
                tm.assert_frame_equal(df + df, df)
Example #42
0
    def test_frame_operators(self):
        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd)
        frame2 = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

        garbage = np.random.random(4)
        colSeries = pd.Series(garbage, index=np.array(frame.columns))

        idSum = frame + frame
        seriesSum = frame + colSeries

        for col, series in idSum.items():
            for idx, val in series.items():
                origVal = frame[col][idx] * 2
                if not np.isnan(val):
                    assert val == origVal
                else:
                    assert np.isnan(origVal)

        for col, series in seriesSum.items():
            for idx, val in series.items():
                origVal = frame[col][idx] + colSeries[col]
                if not np.isnan(val):
                    assert val == origVal
                else:
                    assert np.isnan(origVal)

        added = frame2 + frame2
        expected = frame2 * 2
        tm.assert_frame_equal(added, expected)

        df = pd.DataFrame({'a': ['a', None, 'b']})
        tm.assert_frame_equal(df + df,
                              pd.DataFrame({'a': ['aa', np.nan, 'bb']}))

        # Test for issue #10181
        for dtype in ('float', 'int64'):
            frames = [
                pd.DataFrame(dtype=dtype),
                pd.DataFrame(columns=['A'], dtype=dtype),
                pd.DataFrame(index=[0], dtype=dtype),
            ]
            for df in frames:
                assert (df + df).equals(df)
                tm.assert_frame_equal(df + df, df)
Example #43
0
    def test_ix_assign_column_mixed(self):
        # GH #1142
        df = DataFrame(tm.getSeriesData())
        df['foo'] = 'bar'

        orig = df.loc[:, 'B'].copy()
        df.loc[:, 'B'] = df.loc[:, 'B'] + 1
        tm.assert_series_equal(df.B, orig + 1)

        # GH 3668, mixed frame with series value
        df = DataFrame({'x': np.arange(10),
                        'y': np.arange(10, 20),
                        'z': 'bar'})
        expected = df.copy()

        for i in range(5):
            indexer = i * 2
            v = 1000 + i * 200
            expected.loc[indexer, 'y'] = v
            assert expected.loc[indexer, 'y'] == v

        df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
        tm.assert_frame_equal(df, expected)

        # GH 4508, making sure consistency of assignments
        df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
        df.loc[[0, 2, ], 'b'] = [100, -100]
        expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({'a': list(range(4))})
        df['b'] = np.nan
        df.loc[[1, 3], 'b'] = [100, -100]
        expected = DataFrame({'a': [0, 1, 2, 3],
                              'b': [np.nan, 100, np.nan, -100]})
        tm.assert_frame_equal(df, expected)

        # ok, but chained assignments are dangerous
        # if we turn off chained assignment it will work
        with option_context('chained_assignment', None):
            df = DataFrame({'a': list(range(4))})
            df['b'] = np.nan
            df['b'].loc[[1, 3]] = [100, -100]
            tm.assert_frame_equal(df, expected)
Example #44
0
def test_astype():
    td = pandas.DataFrame(tm.getSeriesData())
    modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns)
    expected_df = pandas.DataFrame(td.values,
                                   index=td.index,
                                   columns=td.columns)

    modin_df_casted = modin_df.astype(np.int32)
    expected_df_casted = expected_df.astype(np.int32)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(np.float64)
    expected_df_casted = expected_df.astype(np.float64)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype(str)
    expected_df_casted = expected_df.astype(str)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df_casted = modin_df.astype("category")
    expected_df_casted = expected_df.astype("category")
    df_equals(modin_df_casted, expected_df_casted)

    dtype_dict = {"A": np.int32, "B": np.int64, "C": str}
    modin_df_casted = modin_df.astype(dtype_dict)
    expected_df_casted = expected_df.astype(dtype_dict)
    df_equals(modin_df_casted, expected_df_casted)

    # Ignore lint because this is testing bad input
    bad_dtype_dict = {"B": np.int32, "B": np.int64, "B": str}  # noqa F601
    modin_df_casted = modin_df.astype(bad_dtype_dict)
    expected_df_casted = expected_df.astype(bad_dtype_dict)
    df_equals(modin_df_casted, expected_df_casted)

    modin_df = pd.DataFrame(index=["row1"], columns=["col1"])
    modin_df["col1"]["row1"] = 11
    modin_df_casted = modin_df.astype(int)
    expected_df = pandas.DataFrame(index=["row1"], columns=["col1"])
    expected_df["col1"]["row1"] = 11
    expected_df_casted = expected_df.astype(int)
    df_equals(modin_df_casted, expected_df_casted)

    with pytest.raises(KeyError):
        modin_df.astype({"not_exists": np.uint8})
Example #45
0
def test_rename_axis_inplace():
    test_frame = pandas.DataFrame(tm.getSeriesData())
    modin_df = pd.DataFrame(test_frame)

    result = test_frame.copy()
    modin_result = modin_df.copy()
    no_return = result.rename_axis("foo", inplace=True)
    modin_no_return = modin_result.rename_axis("foo", inplace=True)

    assert no_return is modin_no_return
    df_equals(modin_result, result)

    result = test_frame.copy()
    modin_result = modin_df.copy()
    no_return = result.rename_axis("bar", axis=1, inplace=True)
    modin_no_return = modin_result.rename_axis("bar", axis=1, inplace=True)

    assert no_return is modin_no_return
    df_equals(modin_result, result)
Example #46
0
    def setUp(self):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = DataFrame(
            {
                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
                "C": np.random.randn(8),
                "D": np.random.randn(8),
            }
        )

        self.df_mixed_floats = DataFrame(
            {
                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
                "C": np.random.randn(8),
                "D": np.array(np.random.randn(8), dtype="float32"),
            }
        )

        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["first", "second"],
        )
        self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])

        self.three_group = DataFrame(
            {
                "A": ["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"],
                "B": ["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"],
                "C": ["dull", "dull", "shiny", "dull", "dull", "shiny", "shiny", "dull", "shiny", "shiny", "shiny"],
                "D": np.random.randn(11),
                "E": np.random.randn(11),
                "F": np.random.randn(11),
            }
        )
Example #47
0
    def test_ix_assign_column_mixed(self):
        # GH #1142
        df = DataFrame(tm.getSeriesData())
        df['foo'] = 'bar'

        orig = df.ix[:, 'B'].copy()
        df.ix[:, 'B'] = df.ix[:, 'B'] + 1
        assert_series_equal(df.B, orig + 1)

        # GH 3668, mixed frame with series value
        df = DataFrame({'x':range(10), 'y':range(10,20),'z' : 'bar'})
        expected = df.copy()
        expected.ix[0, 'y'] = 1000
        expected.ix[2, 'y'] = 1200
        expected.ix[4, 'y'] = 1400
        expected.ix[6, 'y'] = 1600
        expected.ix[8, 'y'] = 1800

        df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100
        assert_frame_equal(df,expected)
Example #48
0
    def test_ix_assign_column_mixed(self):
        # GH #1142
        df = DataFrame(tm.getSeriesData())
        df['foo'] = 'bar'

        orig = df.ix[:, 'B'].copy()
        df.ix[:, 'B'] = df.ix[:, 'B'] + 1
        assert_series_equal(df.B, orig + 1)

        # GH 3668, mixed frame with series value
        df = DataFrame({'x':range(10), 'y':range(10,20),'z' : 'bar'})
        expected = df.copy()
        expected.ix[0, 'y'] = 1000
        expected.ix[2, 'y'] = 1200
        expected.ix[4, 'y'] = 1400
        expected.ix[6, 'y'] = 1600
        expected.ix[8, 'y'] = 1800

        df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100
        assert_frame_equal(df,expected)
Example #49
0
    def setup_method(self, method):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = df()
        self.df_mixed_floats = DataFrame({
            'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
            'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
            'C':
            np.random.randn(8),
            'D':
            np.array(np.random.randn(8), dtype='float32')
        })

        self.mframe = mframe()

        self.three_group = DataFrame({
            'A': [
                'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo',
                'foo', 'foo'
            ],
            'B': [
                'one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two',
                'two', 'one'
            ],
            'C': [
                'dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
                'dull', 'shiny', 'shiny', 'shiny'
            ],
            'D':
            np.random.randn(11),
            'E':
            np.random.randn(11),
            'F':
            np.random.randn(11)
        })
Example #50
0
    def test_ix_assign_column_mixed(self):
        # GH #1142
        df = DataFrame(tm.getSeriesData())
        df['foo'] = 'bar'

        orig = df.ix[:, 'B'].copy()
        df.ix[:, 'B'] = df.ix[:, 'B'] + 1
        assert_series_equal(df.B, orig + 1)

        # GH 3668, mixed frame with series value
        df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'})
        expected = df.copy()

        for i in range(5):
            indexer = i*2
            v = 1000 + i*200
            expected.ix[indexer, 'y'] = v
            self.assert_(expected.ix[indexer, 'y'] == v)

        df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100
        assert_frame_equal(df,expected)

        # GH 4508, making sure consistency of assignments
        df = DataFrame({'a':[1,2,3],'b':[0,1,2]})
        df.ix[[0,2,],'b'] = [100,-100]
        expected = DataFrame({'a' : [1,2,3], 'b' : [100,1,-100] })
        assert_frame_equal(df,expected)

        df = pd.DataFrame({'a': lrange(4) })
        df['b'] = np.nan
        df.ix[[1,3],'b'] = [100,-100]
        expected = DataFrame({'a' : [0,1,2,3], 'b' : [np.nan,100,np.nan,-100] })
        assert_frame_equal(df,expected)

        # ok, but chained assignments are dangerous
        df = pd.DataFrame({'a': lrange(4) })
        df['b'] = np.nan
        df['b'].ix[[1,3]] = [100,-100]
        assert_frame_equal(df,expected)
Example #51
0
    def setUp(self):
        self.ts = tm.makeTimeSeries()

        self.seriesd = tm.getSeriesData()
        self.tsd = tm.getTimeSeriesData()
        self.frame = DataFrame(self.seriesd)
        self.tsframe = DataFrame(self.tsd)

        self.df = DataFrame(
            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
             'C': np.random.randn(8),
             'D': np.random.randn(8)})

        self.df_mixed_floats = DataFrame(
            {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
             'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
             'C': np.random.randn(8),
             'D': np.array(
                 np.random.randn(8), dtype='float32')})

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
                                                                  'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        self.mframe = DataFrame(np.random.randn(10, 3), index=index,
                                columns=['A', 'B', 'C'])

        self.three_group = DataFrame(
            {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
                   'foo', 'foo', 'foo'],
             'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
                   'two', 'two', 'one'],
             'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
                   'dull', 'shiny', 'shiny', 'shiny'],
             'D': np.random.randn(11),
             'E': np.random.randn(11),
             'F': np.random.randn(11)})
Example #52
0

def _skip_if_no_xlsxwriter():
    try:
        import xlsxwriter  # NOQA
    except ImportError:
        raise nose.SkipTest('xlsxwriter not installed, skipping')


def _skip_if_no_excelsuite():
    _skip_if_no_xlrd()
    _skip_if_no_xlwt()
    _skip_if_no_openpyxl()


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
_frame = DataFrame(_seriesd)[:10]
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
_tsframe = tm.makeTimeDataFrame()[:5]
_mixed_frame = _frame.copy()
_mixed_frame['foo'] = 'bar'


class SharedItems(object):
    def setUp(self):
        self.dirpath = tm.get_data_path()
        self.csv1 = os.path.join(self.dirpath, 'test1.csv')
        self.csv2 = os.path.join(self.dirpath, 'test2.csv')
        self.xls1 = os.path.join(self.dirpath, 'test.xls')
        self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
Example #53
0
import os
import sys
import unittest
from textwrap import dedent

from numpy import nan
from numpy.random import randn
import numpy as np

from pandas import DataFrame, Series, Index
import pandas.core.format as fmt
import pandas.util.testing as tm
import pandas
import pandas as pd

_frame = DataFrame(tm.getSeriesData())


def curpath():
    pth, _ = os.path.split(os.path.abspath(__file__))
    return pth


class TestDataFrameFormatting(unittest.TestCase):
    def setUp(self):
        self.frame = _frame.copy()

    def test_repr_embedded_ndarray(self):
        arr = np.empty(10, dtype=[('err', object)])
        for i in range(len(arr)):
            arr['err'][i] = np.random.randn(i)
Example #54
0
import warnings

from numpy import nan
from numpy.random import randn
import numpy as np

from pandas import DataFrame, Series, Index
from pandas.util.py3compat import lzip
import pandas.core.format as fmt
import pandas.util.testing as tm
import pandas
import pandas as pd
from pandas.core.config import (set_option, get_option,
                                option_context, reset_option)

_frame = DataFrame(tm.getSeriesData())

def curpath():
    pth, _ = os.path.split(os.path.abspath(__file__))
    return pth

class TestDataFrameFormatting(unittest.TestCase):
    _multiprocess_can_split_ = True
    def setUp(self):
        self.warn_filters = warnings.filters
        warnings.filterwarnings('ignore',
                                category=FutureWarning,
                                module=".*format")

        self.frame = _frame.copy()
Example #55
0
    def test_invert(self):
        _seriesd = tm.getSeriesData()
        df = pd.DataFrame(_seriesd)

        assert_frame_equal(-(df < 0), ~(df < 0))
Example #56
0

def _skip_if_no_xlsxwriter():
    try:
        import xlsxwriter  # NOQA
    except ImportError:
        raise nose.SkipTest('xlsxwriter not installed, skipping')


def _skip_if_no_excelsuite():
    _skip_if_no_xlrd()
    _skip_if_no_xlwt()
    _skip_if_no_openpyxl()


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
_frame = DataFrame(_seriesd)[:10]
_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10]
_tsframe = tm.makeTimeDataFrame()[:5]
_mixed_frame = _frame.copy()
_mixed_frame['foo'] = 'bar'


class SharedItems(object):
    def setUp(self):
        self.dirpath = tm.get_data_path()
        self.csv1 = os.path.join(self.dirpath, 'test1.csv')
        self.csv2 = os.path.join(self.dirpath, 'test2.csv')
        self.xls1 = os.path.join(self.dirpath, 'test.xls')
        self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
Example #57
0
 def setUp(self):
     self.frame = DataFrame(tm.getSeriesData())
     self.mixed_frame = self.frame.copy()
     self.mixed_frame['foo'] = 'bar'
def frame():
    return DataFrame(tm.getSeriesData())
def seriesd():
    return tm.getSeriesData()
Example #60
0
 def setUp(self):
     self.frame = DataFrame(tm.getSeriesData())
     self.mixed_frame = self.frame.copy()
     self.mixed_frame['foo'] = 'bar'