Beispiel #1
0
 def test_non_unique_idvars(self):
     # GH16382
     # Raise an error message if non unique id vars (i) are passed
     df = pd.DataFrame({
         'A_A1': [1, 2, 3, 4, 5],
         'B_B1': [1, 2, 3, 4, 5],
         'x': [1, 1, 1, 1, 1]
     })
     with pytest.raises(ValueError):
         wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname')
Beispiel #2
0
 def test_invalid_suffixtype(self):
     # If all stubs names end with a string, but a numeric suffix is
     # assumed,  an empty data frame is returned
     df = pd.DataFrame({
         'Aone': [1.0, 2.0],
         'Atwo': [3.0, 4.0],
         'Bone': [5.0, 6.0],
         'X': ['X1', 'X2']
     })
     df['id'] = df.index
     exp_data = {
         'X': '',
         'Aone': [],
         'Atwo': [],
         'Bone': [],
         'id': [],
         'year': [],
         'A': [],
         'B': []
     }
     exp_frame = pd.DataFrame(exp_data)
     exp_frame = exp_frame.set_index(
         ['id', 'year'])[['X', 'Aone', 'Atwo', 'Bone', 'A', 'B']]
     exp_frame.index.set_levels([[0, 1], []], inplace=True)
     long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year')
     tm.assert_frame_equal(long_frame.sort_index(axis=1),
                           exp_frame.sort_index(axis=1))
Beispiel #3
0
 def test_num_string_disambiguation(self):
     # Test that we can disambiguate number value_vars from
     # string value_vars
     df = pd.DataFrame({
         'A11': ['a11', 'a22', 'a33'],
         'A12': ['a21', 'a22', 'a23'],
         'B11': ['b11', 'b12', 'b13'],
         'B12': ['b21', 'b22', 'b23'],
         'BB11': [1, 2, 3],
         'BB12': [4, 5, 6],
         'Arating': [91, 92, 93],
         'Arating_old': [91, 92, 93]
     })
     df['id'] = df.index
     exp_frame = pd.DataFrame({
         'Arating': [91, 92, 93, 91, 92, 93],
         'Arating_old': [91, 92, 93, 91, 92, 93],
         'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'],
         'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'],
         'BB': [1, 2, 3, 4, 5, 6],
         'id': [0, 1, 2, 0, 1, 2],
         'year': ['11', '11', '11', '12', '12', '12']
     })
     exp_frame = exp_frame.set_index(
         ['id', 'year'])[['Arating', 'Arating_old', 'A', 'B', 'BB']]
     long_frame = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year')
     tm.assert_frame_equal(long_frame.sort_index(axis=1),
                           exp_frame.sort_index(axis=1))
Beispiel #4
0
 def test_invalid_separator(self):
     # if an invalid separator is supplied a empty data frame is returned
     sep = 'nope!'
     df = pd.DataFrame({
         'A2010': [1.0, 2.0],
         'A2011': [3.0, 4.0],
         'B2010': [5.0, 6.0],
         'X': ['X1', 'X2']
     })
     df['id'] = df.index
     exp_data = {
         'X': '',
         'A2010': [],
         'A2011': [],
         'B2010': [],
         'id': [],
         'year': [],
         'A': [],
         'B': []
     }
     exp_frame = pd.DataFrame(exp_data)
     exp_frame = exp_frame.set_index(
         ['id', 'year'])[['X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
     exp_frame.index.set_levels([[0, 1], []], inplace=True)
     long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
     tm.assert_frame_equal(long_frame.sort_index(axis=1),
                           exp_frame.sort_index(axis=1))
Beispiel #5
0
 def test_character_overlap(self):
     # Test we handle overlapping characters in both id_vars and value_vars
     df = pd.DataFrame({
         'A11': ['a11', 'a22', 'a33'],
         'A12': ['a21', 'a22', 'a23'],
         'B11': ['b11', 'b12', 'b13'],
         'B12': ['b21', 'b22', 'b23'],
         'BB11': [1, 2, 3],
         'BB12': [4, 5, 6],
         'BBBX': [91, 92, 93],
         'BBBZ': [91, 92, 93]
     })
     df['id'] = df.index
     exp_frame = pd.DataFrame({
         'BBBX': [91, 92, 93, 91, 92, 93],
         'BBBZ': [91, 92, 93, 91, 92, 93],
         'A': ['a11', 'a22', 'a33', 'a21', 'a22', 'a23'],
         'B': ['b11', 'b12', 'b13', 'b21', 'b22', 'b23'],
         'BB': [1, 2, 3, 4, 5, 6],
         'id': [0, 1, 2, 0, 1, 2],
         'year': ['11', '11', '11', '12', '12', '12']
     })
     exp_frame = exp_frame.set_index(['id', 'year'
                                      ])[['BBBX', 'BBBZ', 'A', 'B', 'BB']]
     long_frame = wide_to_long(df, ['A', 'B', 'BB'], i='id', j='year')
     tm.assert_frame_equal(long_frame.sort_index(axis=1),
                           exp_frame.sort_index(axis=1))
Beispiel #6
0
 def test_unbalanced(self):
     # test that we can have a varying amount of time variables
     df = pd.DataFrame({
         'A2010': [1.0, 2.0],
         'A2011': [3.0, 4.0],
         'B2010': [5.0, 6.0],
         'X': ['X1', 'X2']
     })
     df['id'] = df.index
     exp_data = {
         'X': ['X1', 'X1', 'X2', 'X2'],
         'A': [1.0, 3.0, 2.0, 4.0],
         'B': [5.0, np.nan, 6.0, np.nan],
         'id': [0, 0, 1, 1],
         'year': ['2010', '2011', '2010', '2011']
     }
     exp_frame = pd.DataFrame(exp_data)
     exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]]
     long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year')
     tm.assert_frame_equal(long_frame, exp_frame)
Beispiel #7
0
 def test_escapable_characters(self):
     np.random.seed(123)
     x = np.random.randn(3)
     df = pd.DataFrame({
         "A(quarterly)1970": {
             0: "a",
             1: "b",
             2: "c"
         },
         "A(quarterly)1980": {
             0: "d",
             1: "e",
             2: "f"
         },
         "B(quarterly)1970": {
             0: 2.5,
             1: 1.2,
             2: .7
         },
         "B(quarterly)1980": {
             0: 3.2,
             1: 1.3,
             2: .1
         },
         "X": dict(zip(range(3), x))
     })
     df["id"] = df.index
     exp_data = {
         "X": x.tolist() + x.tolist(),
         "A(quarterly)": ['a', 'b', 'c', 'd', 'e', 'f'],
         "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
         "year": ['1970', '1970', '1970', '1980', '1980', '1980'],
         "id": [0, 1, 2, 0, 1, 2]
     }
     exp_frame = DataFrame(exp_data)
     exp_frame = exp_frame.set_index(
         ['id', 'year'])[["X", "A(quarterly)", "B(quarterly)"]]
     long_frame = wide_to_long(df, ["A(quarterly)", "B(quarterly)"],
                               i="id",
                               j="year")
     tm.assert_frame_equal(long_frame, exp_frame)
Beispiel #8
0
 def test_multiple_id_columns(self):
     # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
     df = pd.DataFrame({
         'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
         'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
         'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
         'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
     })
     exp_frame = pd.DataFrame({
         'ht': [
             2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4,
             2.2, 3.3, 2.3, 3.4, 2.1, 2.9
         ],
         'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
         'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
         'age': [
             '1', '2', '1', '2', '1', '2', '1', '2', '1', '2', '1', '2',
             '1', '2', '1', '2', '1', '2'
         ]
     })
     exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']]
     long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
     tm.assert_frame_equal(long_frame, exp_frame)