def test_multi_dims_time_series_and_uni(self): result = CSV(slicer.metrics.wins) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] self.assertEqual(expected.to_csv(), result)
def test_uni_dim(self): result = CSV(slicer.metrics.wins) \ .transform(uni_dim_df, slicer, [slicer.dimensions.candidate], []) expected = uni_dim_df.copy() \ .set_index(fd('candidate_display'), append=True) \ .reset_index(fd('candidate'), drop=True)[[fm('wins')]] expected.index.names = ['Candidate'] expected.columns = ['Wins'] self.assertEqual(expected.to_csv(), result)
def test_pivoted_multi_dims_time_series_and_uni(self): result = CSV(slicer.metrics.votes, pivot=[slicer.dimensions.state]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] self.assertEqual(expected.to_csv(), result)
def test_uni_dim(self): result = Pandas(slicer.metrics.wins) \ .transform(uni_dim_df, slicer, [slicer.dimensions.candidate], []) expected = uni_dim_df.copy() \ .set_index(fd('candidate_display'), append=True) \ .reset_index(fd('candidate'), drop=True) \ [[fm('wins')]] expected.index.names = ['Candidate'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' pandas.testing.assert_frame_equal(expected, result)
def test_pivoted_multi_dims_time_series_and_uni_with_sort_index_desc(self): result = Pandas(slicer.metrics.votes, pivot=[slicer.dimensions.state], sort=[0], ascending=[False]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes')]] expected = expected.unstack(level=[1]) expected.index.names = ['Timestamp'] expected.columns = ['California', 'Texas'] expected.columns.names = ['State'] expected = expected.sort_index(ascending=False) pandas.testing.assert_frame_equal(expected, result)
def test_multi_dims_time_series_and_cat_sort_index_level_0_asc(self): result = Pandas(slicer.metrics.wins, sort=[0]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['Timestamp']) expected = expected.set_index(['Timestamp', 'State']) pandas.testing.assert_frame_equal(expected, result)
def test_pivoted_multi_dims_time_series_and_cat_sort_index_and_values(self): result = Pandas(slicer.metrics.wins, sort=[0, 2], ascending=[False, True]) \ .transform(cont_uni_dim_df, slicer, [slicer.dimensions.timestamp, slicer.dimensions.state], []) expected = cont_uni_dim_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=False)[[fm('wins')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Wins'] expected.columns.name = 'Metrics' expected = expected.reset_index() expected = expected.sort_values(['Timestamp', 'Wins'], ascending=[False, True]) expected = expected.set_index(['Timestamp', 'State']) pandas.testing.assert_frame_equal(expected, result)
def test_uni_dim_no_display_definition(self): import copy candidate = copy.copy(slicer.dimensions.candidate) uni_dim_df_copy = uni_dim_df.copy() del uni_dim_df_copy[fd(slicer.dimensions.candidate.display.key)] del candidate.display result = ReactTable(slicer.metrics.wins) \ .transform(uni_dim_df_copy, slicer, [candidate], []) self.assertEqual({ 'columns': [{'Header': 'Candidate', 'accessor': '$d$candidate'}, {'Header': 'Wins', 'accessor': '$m$wins'}], 'data': [{'$d$candidate': {'raw': '1'}, '$m$wins': {'display': '2', 'raw': 2}}, {'$d$candidate': {'raw': '2'}, '$m$wins': {'display': '0', 'raw': 0}}, {'$d$candidate': {'raw': '3'}, '$m$wins': {'display': '0', 'raw': 0}}, {'$d$candidate': {'raw': '4'}, '$m$wins': {'display': '4', 'raw': 4}}, {'$d$candidate': {'raw': '5'}, '$m$wins': {'display': '0', 'raw': 0}}, {'$d$candidate': {'raw': '6'}, '$m$wins': {'display': '0', 'raw': 0}}, {'$d$candidate': {'raw': '7'}, '$m$wins': {'display': '4', 'raw': 4}}, {'$d$candidate': {'raw': '8'}, '$m$wins': {'display': '0', 'raw': 0}}, {'$d$candidate': {'raw': '9'}, '$m$wins': {'display': '0', 'raw': 0}}, { '$d$candidate': {'raw': '10'}, '$m$wins': {'display': '2', 'raw': 2} }, { '$d$candidate': {'raw': '11'}, '$m$wins': {'display': '0', 'raw': 0} }] }, result)
def test_time_series_ref(self): result = CSV(slicer.metrics.votes) \ .transform(cont_uni_dim_ref_df, slicer, [ slicer.dimensions.timestamp, slicer.dimensions.state ], [ ElectionOverElection(slicer.dimensions.timestamp) ]) expected = cont_uni_dim_ref_df.copy() \ .set_index(fd('state_display'), append=True) \ .reset_index(fd('state'), drop=True)[[fm('votes'), fm('votes_eoe')]] expected.index.names = ['Timestamp', 'State'] expected.columns = ['Votes', 'Votes (EoE)'] self.assertEqual(expected.to_csv(), result)
def test_uni_dim_no_display_definition(self): import copy candidate = copy.copy(slicer.dimensions.candidate) uni_dim_df_copy = uni_dim_df.copy() del uni_dim_df_copy[fd(slicer.dimensions.candidate.display.key)] del candidate.display result = DataTablesJS(slicer.metrics.wins) \ .transform(uni_dim_df_copy, slicer, [candidate], []) self.assertEqual({ 'columns': [{ 'data': 'candidate', 'render': {'_': 'value'}, 'title': 'Candidate' }, { 'data': 'wins', 'render': {'_': 'value', 'display': 'display'}, 'title': 'Wins' }], 'data': [{ 'candidate': {'value': 1}, 'wins': {'display': '2', 'value': 2} }, { 'candidate': {'value': 2}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 3}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 4}, 'wins': {'display': '4', 'value': 4} }, { 'candidate': {'value': 5}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 6}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 7}, 'wins': {'display': '4', 'value': 4} }, { 'candidate': {'value': 8}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 9}, 'wins': {'display': '0', 'value': 0} }, { 'candidate': {'value': 10}, 'wins': {'display': '2', 'value': 2} }, { 'candidate': {'value': 11}, 'wins': {'display': '0', 'value': 0} }], }, result)
def test_uni_dim_no_display_definition(self): import copy candidate = copy.copy(slicer.dimensions.candidate) uni_dim_df_copy = uni_dim_df.copy() del uni_dim_df_copy[fd(slicer.dimensions.candidate.display.key)] del candidate.display result = CSV(slicer.metrics.wins) \ .transform(uni_dim_df_copy, slicer, [candidate], []) expected = uni_dim_df_copy.copy()[[fm('wins')]] expected.index.names = ['Candidate'] expected.columns = ['Wins'] self.assertEqual(expected.to_csv(), result)
name=cat_dim_df.index.name))) uni_dim_nans_df = uni_dim_df.append( pd.DataFrame([[None, 300, 2]], columns=uni_dim_df.columns, index=pd.Index([None], name=uni_dim_df.index.name))) def add_nans(df): return pd.DataFrame([[None, 300, 2]], columns=df.columns, index=pd.Index([None], name=df.index.names[1])) cont_uni_dim_nans_df = cont_uni_dim_df \ .append(cont_uni_dim_df.groupby(level=fd('timestamp')).apply(add_nans)) \ .sort_index() def totals(df): return pd.DataFrame([[None] + list(df.sum())], columns=df.columns, index=pd.Index([None], name=df.index.names[1])) cont_uni_dim_nans_totals_df = cont_uni_dim_nans_df \ .append(cont_uni_dim_nans_df.groupby(level=fd('timestamp')).apply(totals)) \ .sort_index() \ .sort_index(level=[0, 1], ascending=False) # This sorts the DF so that the first instance of NaN is the totals
(1, 1): True, (1, 2): False, (1, 3): False, (2, 4): True, (2, 5): False, (3, 4): True, (3, 6): False, (4, 7): True, (4, 8): False, (5, 7): True, (5, 9): False, (6, 10): True, (6, 11): False, } df_columns = [fd('timestamp'), fd('candidate'), fd('candidate_display'), fd('political_party'), fd('election'), fd('election_display'), fd('state'), fd('state_display'), fd('winner'), fm('votes'), fm('wins')] def PoliticsRow(timestamp, candidate, candidate_display, political_party, election, election_display, state, state_display, winner, votes, wins): return ( timestamp, candidate, candidate_display, political_party, election, election_display, state, state_display, winner, votes, wins )
(1, 2): False, (1, 3): False, (2, 4): True, (2, 5): False, (3, 4): True, (3, 6): False, (4, 7): True, (4, 8): False, (5, 7): True, (5, 9): False, (6, 10): True, (6, 11): False, } df_columns = [ fd('timestamp'), fd('candidate'), fd('candidate_display'), fd('political_party'), fd('election'), fd('election_display'), fd('state'), fd('state_display'), fd('winner'), fm('votes'), fm('wins') ] def PoliticsRow(timestamp, candidate, candidate_display, political_party, election, election_display, state, state_display, winner,
columns=cat_dim_df.columns, index=pd.Index([None], name=cat_dim_df.index.name))) uni_dim_nans_df = uni_dim_df.append( pd.DataFrame([[None, 300, 2]], columns=uni_dim_df.columns, index=pd.Index([None], name=uni_dim_df.index.name))) def add_nans(df): return pd.DataFrame([[None, 300, 2]], columns=df.columns, index=pd.Index([None], name=df.index.names[1])) cont_uni_dim_nans_df = cont_uni_dim_df \ .append(cont_uni_dim_df.groupby(level=fd('timestamp')).apply(add_nans)) \ .sort_index() def totals(df): return pd.DataFrame([[None] + list(df.sum())], columns=df.columns, index=pd.Index([None], name=df.index.names[1])) cont_uni_dim_nans_totals_df = cont_uni_dim_nans_df \ .append(cont_uni_dim_nans_df.groupby(level=fd('timestamp')).apply(totals)) \ .sort_index() \ .sort_index(level=[0, 1], ascending=False) # This sorts the DF so that the first instance of NaN is the totals