def test_run_1(self):
     # should have the same output as test_run_2, but _do_multiprocessing() is a MagicMock
     in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_c = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_series = [in_a, in_b, in_c]
     expected = DataFrame(
         {
             0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             1: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             2: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             u"all": Series({1: 15, 2: 6, 3: 6, 4: 3, 5: 3}),
         }
     )
     exp = FrequencyExperimenter(in_series)
     exp._do_multiprocessing = mock.MagicMock()
     exp._do_multiprocessing.return_value = [
         (0, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})),
         (1, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})),
         (2, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})),
     ]
     actual = exp.run()
     exp._do_multiprocessing.assert_called_once_with(experimenter_func, [[(0, in_a)], [(1, in_b)], [(2, in_c)]])
     self.assertEqual(len(expected.columns), len(actual.columns))
     for i in expected.columns:
         self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index))
         self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
Example #2
0
 def test_run_2(self):
     """two DataFrame, only count some of the results in each"""
     in_df = [
         pandas.DataFrame({
             'a': self.in_a,
             'b': self.in_b
         }),
         pandas.DataFrame({
             'a': self.in_a,
             'b': self.in_b
         })
     ]
     actual = FrequencyExperimenter(in_df, {'column': 'a'}).run()
     self.assertEqual(2, len(actual))
     for each_df in actual:
         each_df = each_df['frequency.FrequencyExperimenter']
         self.assertEqual(1, len(each_df.columns))
         if six.PY2:
             self.assertItemsEqual(list(self.freq_a.index),
                                   list(each_df['a'].index))
         else:
             self.assertCountEqual(list(self.freq_a.index),
                                   list(each_df['a'].index))
         for each in self.freq_a.index:
             self.assertEqual(self.freq_a[each], each_df['a'][each])
Example #3
0
 def test_run_3(self):
     """two DataFrame, 'column' breaks a MultiIndex"""
     in_df = [
         pandas.DataFrame([self.in_a, self.in_b, self.in_c, self.in_d],
                          index=[['one', 'one', 'two', 'two'],
                                 ['a', 'b', 'c', 'd']]).T,
         pandas.DataFrame([self.in_a, self.in_b, self.in_c, self.in_d],
                          index=[['two', 'two', 'one', 'one'],
                                 ['a', 'b', 'c', 'd']]).T
     ]
     actual = FrequencyExperimenter(in_df, {'column': 'one'}).run()
     self.assertEqual(2, len(actual))
     left_df = actual[0]['frequency.FrequencyExperimenter']
     right_df = actual[1]['frequency.FrequencyExperimenter']
     self.assertEqual(2, len(left_df.columns))
     self.assertEqual(2, len(right_df.columns))
     # left, column 'a'
     if six.PY2:
         self.assertItemsEqual(list(self.freq_a.index),
                               list(left_df['a'].index))
     else:
         self.assertCountEqual(list(self.freq_a.index),
                               list(left_df['a'].index))
     for each in self.freq_a.index:
         self.assertEqual(self.freq_a[each], left_df['a'][each])
     # left, column 'b'
     if six.PY2:
         self.assertItemsEqual(list(self.freq_b.index),
                               list(left_df['b'].index))
     else:
         self.assertCountEqual(list(self.freq_a.index),
                               list(left_df['b'].index))
     for each in self.freq_a.index:
         self.assertEqual(self.freq_b[each], left_df['b'][each])
     # rightt, column 'c'
     if six.PY2:
         self.assertItemsEqual(list(self.freq_c.index),
                               list(right_df['c'].index))
     else:
         self.assertCountEqual(list(self.freq_a.index),
                               list(right_df['c'].index))
     for each in self.freq_a.index:
         self.assertEqual(self.freq_c[each], right_df['c'][each])
     # right, column 'd'  (the only column with NaN)
     if six.PY2:
         self.assertItemsEqual(list(self.freq_d.index),
                               list(right_df['d'].index))
     else:
         self.assertCountEqual(list(self.freq_a.index),
                               list(right_df['d'].index))
     for each in self.freq_a.index:
         if numpy.isnan(self.freq_d[each]):  # pylint: disable=no-member
             self.assertTrue(numpy.isnan(right_df['d'][each]))  # pylint: disable=no-member
         else:
             self.assertEqual(self.freq_d[each], right_df['d'][each])
 def test_run_2(self):
     # should have the same output as test_run_1, but without the MagicMock
     in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_c = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_series = [in_a, in_b, in_c]
     expected = DataFrame(
         {
             0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             1: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             2: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             u"all": Series({1: 15, 2: 6, 3: 6, 4: 3, 5: 3}),
         }
     )
     exp = FrequencyExperimenter(in_series)
     actual = exp.run()
     self.assertEqual(len(expected.columns), len(actual.columns))
     for i in expected.columns:
         self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index))
         self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
 def test_run_4(self):
     # same as test_run_3, but input is a dict
     in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1, 4, 4, 3, 5, 1, 1, 1])
     in_c = Series([1, 2, 1, 1, 3, 1, 3, 2, 1])
     in_series = {u"hello": in_a, u"zello": in_b, u"jello": in_c}
     expected = DataFrame(
         {
             u"hello": Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             u"zello": Series({1: 8, 2: 2, 3: 3, 4: 3, 5: 2}),
             u"jello": Series({1: 5, 2: 2, 3: 2}),
             u"all": Series({1: 18, 2: 6, 3: 7, 4: 4, 5: 3}),
         }
     )
     exp = FrequencyExperimenter(in_series)
     actual = exp.run()
     # because numpy's NaN != NaN
     actual = actual.fillna(value=4000)
     expected = expected.fillna(value=4000)
     self.assertEqual(len(expected.columns), len(actual.columns))
     for i in expected.columns:
         self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index))
         self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
 def test_run_3(self):
     # more complicated arithmetic
     in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1])
     in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1, 4, 4, 3, 5, 1, 1, 1])
     in_c = Series([1, 2, 1, 1, 3, 1, 3, 2, 1])
     in_series = [in_a, in_b, in_c]
     expected = DataFrame(
         {
             0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}),
             1: Series({1: 8, 2: 2, 3: 3, 4: 3, 5: 2}),
             2: Series({1: 5, 2: 2, 3: 2}),
             u"all": Series({1: 18, 2: 6, 3: 7, 4: 4, 5: 3}),
         }
     )
     exp = FrequencyExperimenter(in_series)
     actual = exp.run()
     # because numpy's NaN != NaN
     actual = actual.fillna(value=4000)
     expected = expected.fillna(value=4000)
     self.assertEqual(len(expected.columns), len(actual.columns))
     for i in expected.columns:
         self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index))
         self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
Example #7
0
 def test_run_1(self):
     """single DataFrame, no 'column' setting"""
     in_df = pandas.DataFrame({'a': self.in_a, 'b': self.in_b})
     actual = FrequencyExperimenter(in_df).run()
     self.assertEqual(1, len(actual))
     actual = actual[0]['frequency.FrequencyExperimenter']
     self.assertEqual(2, len(actual.columns))
     self.assertSequenceEqual(list(self.freq_a.index),
                              list(actual['a'].index))
     self.assertSequenceEqual(list(self.freq_a.values),
                              list(actual['a'].values))
     self.assertSequenceEqual(list(self.freq_b.index),
                              list(actual['b'].index))
     self.assertSequenceEqual(list(self.freq_b.values),
                              list(actual['b'].values))