Exemple #1
0
    def test_guess_data_type_time(self):
        in_values = ["2019-10-10", "2019-10-10", "2019-10-10", "2019-10-01"]
        valuemap, _, coltype = guess_data_type(in_values)
        self.assertEqual(TimeVariable, coltype)
        self.assertIsNone(valuemap)

        in_values = [
            "2019-10-10T12:08:51", "2019-10-10T12:08:51",
            "2019-10-10T12:08:51", "2019-10-01T12:08:51"
        ]
        valuemap, _, coltype = guess_data_type(in_values)
        self.assertEqual(TimeVariable, coltype)
        self.assertIsNone(valuemap)

        in_values = [
            "2019-10-10 12:08:51", "2019-10-10 12:08:51",
            "2019-10-10 12:08:51", "2019-10-01 12:08:51"
        ]
        valuemap, _, coltype = guess_data_type(in_values)
        self.assertEqual(TimeVariable, coltype)
        self.assertIsNone(valuemap)

        in_values = [
            "2019-10-10 12:08", "2019-10-10 12:08", "2019-10-10 12:08",
            "2019-10-01 12:08"
        ]
        valuemap, _, coltype = guess_data_type(in_values)
        self.assertEqual(TimeVariable, coltype)
        self.assertIsNone(valuemap)
Exemple #2
0
 def _unknown_column(data: np.ndarray, col: int, **_) -> _ColumnProperties:
     orig_vals, namask = _TableBuilder._values_mask(data, col)
     valuemap, values, coltype = guess_data_type(orig_vals, namask)
     return _ColumnProperties(valuemap=valuemap,
                              values=values,
                              coltype=coltype,
                              orig_values=orig_vals)
Exemple #3
0
    def test_guess_data_type_continuous(self):
        # should be ContinuousVariable
        valuemap, values, coltype = guess_data_type(list(range(1, 100)))
        self.assertEqual(ContinuousVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal(np.array(list(range(1, 100))), values)

        valuemap, values, coltype = guess_data_type([1, 2, 3, 1, 2, 3])
        self.assertEqual(ContinuousVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal([1, 2, 3, 1, 2, 3], values)

        valuemap, values, coltype = guess_data_type(
            ["1", "2", "3", "1", "2", "3"])
        self.assertEqual(ContinuousVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal([1, 2, 3, 1, 2, 3], values)
Exemple #4
0
    def test_guess_data_type_discrete(self):
        # should be DiscreteVariable
        valuemap, values, coltype = guess_data_type([1, 2, 1, 2])
        self.assertEqual(DiscreteVariable, coltype)
        self.assertEqual([1, 2], valuemap)
        np.testing.assert_array_equal([1, 2, 1, 2], values)

        valuemap, values, coltype = guess_data_type(["1", "2", "1", "2", "a"])
        self.assertEqual(DiscreteVariable, coltype)
        self.assertEqual(["1", "2", "a"], valuemap)
        np.testing.assert_array_equal(['1', '2', '1', '2', 'a'], values)

        # just below the threshold for string variable
        in_values = list(map(lambda x: str(x) + "a", range(24))) + ["a"] * 76
        valuemap, values, coltype = guess_data_type(in_values)
        self.assertEqual(DiscreteVariable, coltype)
        self.assertEqual(sorted(set(in_values)), valuemap)
        np.testing.assert_array_equal(in_values, values)
Exemple #5
0
 def test_guess_data_type_values_order(self):
     """
     Test if values are ordered naturally
     """
     in_values = [
         "something1", "something12", "something2", "something1",
         "something20", "something1", "something2", "something12",
         "something1", "something12"
     ]
     res = ["something1", "something2", "something12", "something20"]
     valuemap, _, coltype = guess_data_type(in_values)
     self.assertEqual(DiscreteVariable, coltype)
     self.assertListEqual(res, valuemap)
Exemple #6
0
    def test_guess_data_type_string(self):
        # should be StringVariable
        # too many different values for discrete
        in_values = list(map(lambda x: str(x) + "a", range(90)))
        valuemap, values, coltype = guess_data_type(in_values)
        self.assertEqual(StringVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal(in_values, values)

        # more than len(values)**0.7
        in_values = list(map(lambda x: str(x) + "a", range(25))) + ["a"] * 75
        valuemap, values, coltype = guess_data_type(in_values)
        self.assertEqual(StringVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal(in_values, values)

        # more than 100 different values - exactly 101
        # this is the case when len(values)**0.7 rule would vote for the
        # DiscreteVariable
        in_values = list(map(lambda x: str(x) + "a", range(100))) + ["a"] * 999
        valuemap, values, coltype = guess_data_type(in_values)
        self.assertEqual(StringVariable, coltype)
        self.assertIsNone(valuemap)
        np.testing.assert_array_equal(in_values, values)