Esempio n. 1
0
    def test_to_dict_without_categories(self):
        tdm = get_term_doc_matrix_without_categories()
        scatter_chart = ScatterChart(term_doc_matrix=tdm,
                                     minimum_term_frequency=0)

        with self.assertRaises(NeedToInjectCoordinatesException):
            scatter_chart.to_dict_without_categories()
        x_coords = tdm.get_term_doc_mat().sum(axis=0).A1
        y_coords = tdm.get_term_doc_mat().astype(bool).astype(int).sum(
            axis=0).A1
        scatter_chart.inject_coordinates(original_x=x_coords,
                                         original_y=y_coords,
                                         x_coords=scale(x_coords),
                                         y_coords=scale(y_coords))
        j = scatter_chart.to_dict_without_categories()
        self.assertIsInstance(j, dict)
        self.assertEqual(set(j.keys()), set(['data']))
        self.assertEqual(len(j['data']), tdm.get_num_terms())
        self.assertEqual(
            j['data'][-1], {
                'cat': 4,
                'cat25k': 735,
                'ox': 4,
                'oy': 3,
                'term': 'speak',
                'x': 1.0,
                'y': 1.0
            })
Esempio n. 2
0
 def test_inject_coordinates_original(self):
     tdm = build_hamlet_jz_term_doc_mat()
     freq_df = tdm.get_term_freq_df()
     scatter_chart = ScatterChart(term_doc_matrix=tdm,
                                  minimum_term_frequency=0)
     x = freq_df[freq_df.columns[1]].astype(np.float)
     y = freq_df[freq_df.columns[0]].astype(np.float)
     scatter_chart.inject_coordinates(x / x.max(), y / y.max(), original_x=x, original_y=y)
     j = scatter_chart.to_dict('hamlet')
     self.assertEqual(j['data'][0].keys(),
                      {'x', 'os', 'y', 'ncat25k', 'neut', 'cat25k', 'ox', 'neut25k', 'extra25k', 'extra', 'oy',
                       'term',
                       's', 'bg'})
     and_term = [t for t in j['data'] if t['term'] == 'and'][0]
     self.assertEqual(and_term['ox'], 0)
     self.assertEqual(and_term['oy'], 1)
 def test_inject_coordinates_original(self):
     tdm = build_hamlet_jz_term_doc_mat()
     freq_df = tdm.get_term_freq_df()
     scatter_chart = ScatterChart(term_doc_matrix=tdm,
                                  minimum_term_frequency=0)
     x = freq_df[freq_df.columns[1]].astype(np.float)
     y = freq_df[freq_df.columns[0]].astype(np.float)
     scatter_chart.inject_coordinates(x / x.max(), y / y.max(), original_x=x, original_y=y)
     j = scatter_chart.to_dict('hamlet')
     self.assertEqual(j['data'][0].keys(),
                      {'x', 'os', 'y', 'ncat25k', 'neut', 'cat25k', 'ox', 'neut25k', 'extra25k', 'extra', 'oy',
                       'term',
                       's', 'bg'})
     and_term = [t for t in j['data'] if t['term'] == 'and'][0]
     self.assertEqual(and_term['ox'], 0)
     self.assertEqual(and_term['oy'], 1)
    def test_to_dict_without_categories(self):
        tdm = get_term_doc_matrix_without_categories()
        scatter_chart = ScatterChart(term_doc_matrix=tdm, minimum_term_frequency=0)

        with self.assertRaises(NeedToInjectCoordinatesException):
            scatter_chart.to_dict_without_categories()
        x_coords = tdm.get_term_doc_mat().sum(axis=0).A1
        y_coords = tdm.get_term_doc_mat().astype(bool).astype(int).sum(axis=0).A1
        scatter_chart.inject_coordinates(original_x=x_coords,
                                         original_y=y_coords,
                                         x_coords=scale(x_coords),
                                         y_coords=scale(y_coords))
        j = scatter_chart.to_dict_without_categories()
        self.assertIsInstance(j, dict)
        self.assertEqual(set(j.keys()), set(['data']))
        self.assertEqual(len(j['data']), tdm.get_num_terms())
        self.assertEqual(j['data'][-1],
                         {'cat': 4, 'cat25k': 735, 'ox': 4, 'oy': 3,
                          'term': 'speak', 'x': 1.0, 'y': 1.0})
Esempio n. 5
0
 def test_inject_coordinates(self):
     tdm = build_hamlet_jz_term_doc_mat()
     freq_df = tdm.get_term_freq_df()
     scatter_chart = ScatterChart(term_doc_matrix=tdm,
                                  minimum_term_frequency=0)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates([], [])
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(freq_df[freq_df.columns[0]], [])
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates([], freq_df[freq_df.columns[0]])
     x = freq_df[freq_df.columns[1]].astype(np.float)
     y = freq_df[freq_df.columns[0]].astype(np.float)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x, y)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x, y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x / x.max(), y)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(-x / x.max(), -y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(-x / x.max(), y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x / x.max(), -y / y.max())
     scatter_chart.inject_coordinates(x / x.max(), y / y.max())
 def test_inject_coordinates(self):
     tdm = build_hamlet_jz_term_doc_mat()
     freq_df = tdm.get_term_freq_df()
     scatter_chart = ScatterChart(term_doc_matrix=tdm,
                                  minimum_term_frequency=0)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates([], [])
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(freq_df[freq_df.columns[0]], [])
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates([], freq_df[freq_df.columns[0]])
     x = freq_df[freq_df.columns[1]].astype(np.float)
     y = freq_df[freq_df.columns[0]].astype(np.float)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x, y)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x, y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x / x.max(), y)
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(-x / x.max(), -y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(-x / x.max(), y / y.max())
     with self.assertRaises(CoordinatesNotRightException):
         scatter_chart.inject_coordinates(x / x.max(), -y / y.max())
     scatter_chart.inject_coordinates(x / x.max(), y / y.max())