Ejemplo n.º 1
0
    def test_same_data_arrays(self):
        first_array = np.array([[1, 2], [3, 4]])
        second_array = np.array([[1, 2], [3, 4]])

        first_hash = hash_numpy_array(first_array)
        second_hash = hash_numpy_array(second_array)

        self.assertEqual(first_hash, second_hash)
Ejemplo n.º 2
0
    def test_different_data_arrays(self):
        first_array = np.array([[1, 2], [3, 4]])
        second_array = np.array([[1, 3], [2, 4]])

        first_hash = hash_numpy_array(first_array)
        second_hash = hash_numpy_array(second_array)

        self.assertNotEqual(first_hash, second_hash)
Ejemplo n.º 3
0
    def test_different_data_arrays(self):
        first_array = np.array([[1, 2], [3, 4]])
        second_array = np.array([[1, 3], [2, 4]])

        first_hash = hash_numpy_array(first_array)
        second_hash = hash_numpy_array(second_array)

        self.assertNotEqual(first_hash, second_hash)
Ejemplo n.º 4
0
    def test_same_data_arrays(self):
        first_array = np.array([[1, 2], [3, 4]])
        second_array = np.array([[1, 2], [3, 4]])

        first_hash = hash_numpy_array(first_array)
        second_hash = hash_numpy_array(second_array)

        self.assertEqual(first_hash, second_hash)
Ejemplo n.º 5
0
    def test_transpose_arrays(self):
        c_array = np.array([[1, 2], [3, 4]])
        f_array = np.array([[1, 3], [2, 4]])
        f_array = np.asfortranarray(f_array)

        c_hash = hash_numpy_array(c_array)
        f_hash = hash_numpy_array(f_array)

        self.assertEqual(c_hash, f_hash)
Ejemplo n.º 6
0
    def test_transpose_arrays(self):
        c_array = np.array([[1, 2], [3, 4]])
        f_array = np.array([[1, 3], [2, 4]])
        f_array = np.asfortranarray(f_array)

        c_hash = hash_numpy_array(c_array)
        f_hash = hash_numpy_array(f_array)

        self.assertEqual(c_hash, f_hash)
Ejemplo n.º 7
0
    def test_f_contiguous_array(self):
        array = np.array([[1, 2], [3, 4]])
        array = np.asfortranarray(array)

        hash = hash_numpy_array(array)

        self.assertIsNotNone(hash)
Ejemplo n.º 8
0
    def test_f_contiguous_array(self):
        array = np.array([[1, 2], [3, 4]])
        array = np.asfortranarray(array)

        hash = hash_numpy_array(array)

        self.assertIsNotNone(hash)
Ejemplo n.º 9
0
    def fit(self,
            X,
            y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if not self._shared_mode:
            self._backend.context.delete_directories()
        else:
            # If this fails, it's likely that this is the first call to get
            # the data manager
            try:
                D = self._backend.load_datamanager()
                dataset_name = D.name
            except IOError:
                pass

        self._backend.context.create_directories()

        if dataset_name is None:
            dataset_name = hash_numpy_array(X)

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        self._logger = self._get_logger(dataset_name)

        if isinstance(metric, str):
            metric = STRING_TO_METRIC[metric]

        if feat_type is not None and len(feat_type) != X.shape[1]:
            raise ValueError('Array feat_type does not have same number of '
                             'variables as X has features. %d vs %d.' %
                             (len(feat_type), X.shape[1]))
        if feat_type is not None and not all(
            [isinstance(f, str) for f in feat_type]):
            raise ValueError('Array feat_type must only contain strings.')
        if feat_type is not None:
            for ft in feat_type:
                if ft.lower() not in ['categorical', 'numerical']:
                    raise ValueError('Only `Categorical` and `Numerical` are '
                                     'valid feature types, you passed `%s`' %
                                     ft)

        self._data_memory_limit = None
        loaded_data_manager = XYDataManager(X,
                                            y,
                                            task=task,
                                            metric=metric,
                                            feat_type=feat_type,
                                            dataset_name=dataset_name,
                                            encode_labels=False)

        return self._fit(loaded_data_manager)
Ejemplo n.º 10
0
    def fit(self, X, y,
            task=MULTICLASS_CLASSIFICATION,
            metric='acc_metric',
            feat_type=None,
            dataset_name=None):
        if not self._shared_mode:
            self._backend.context.delete_directories()
        else:
            # If this fails, it's likely that this is the first call to get
            # the data manager
            try:
                D = self._backend.load_datamanager()
                dataset_name = D.name
            except IOError:
                pass

        self._backend.context.create_directories()

        if dataset_name is None:
            dataset_name = hash_numpy_array(X)

        self._backend.save_start_time(self._seed)
        self._stopwatch = StopWatch()
        self._dataset_name = dataset_name
        self._stopwatch.start_task(self._dataset_name)

        self._logger = self._get_logger(dataset_name)

        if isinstance(metric, str):
            metric = STRING_TO_METRIC[metric]

        if feat_type is not None and len(feat_type) != X.shape[1]:
            raise ValueError('Array feat_type does not have same number of '
                             'variables as X has features. %d vs %d.' %
                             (len(feat_type), X.shape[1]))
        if feat_type is not None and not all([isinstance(f, str)
                                              for f in feat_type]):
            raise ValueError('Array feat_type must only contain strings.')
        if feat_type is not None:
            for ft in feat_type:
                if ft.lower() not in ['categorical', 'numerical']:
                    raise ValueError('Only `Categorical` and `Numerical` are '
                                     'valid feature types, you passed `%s`' % ft)

        self._data_memory_limit = None
        loaded_data_manager = XYDataManager(X, y,
                                            task=task,
                                            metric=metric,
                                            feat_type=feat_type,
                                            dataset_name=dataset_name,
                                            encode_labels=False)

        return self._fit(loaded_data_manager)