def test_same_data_arrays(self): first_array = np.array([[1, 2], [3, 4]]) second_array = np.array([[1, 2], [3, 4]]) first_hash = hash_numpy_array(first_array) second_hash = hash_numpy_array(second_array) self.assertEqual(first_hash, second_hash)
def test_different_data_arrays(self): first_array = np.array([[1, 2], [3, 4]]) second_array = np.array([[1, 3], [2, 4]]) first_hash = hash_numpy_array(first_array) second_hash = hash_numpy_array(second_array) self.assertNotEqual(first_hash, second_hash)
def test_transpose_arrays(self): c_array = np.array([[1, 2], [3, 4]]) f_array = np.array([[1, 3], [2, 4]]) f_array = np.asfortranarray(f_array) c_hash = hash_numpy_array(c_array) f_hash = hash_numpy_array(f_array) self.assertEqual(c_hash, f_hash)
def test_f_contiguous_array(self): array = np.array([[1, 2], [3, 4]]) array = np.asfortranarray(array) hash = hash_numpy_array(array) self.assertIsNotNone(hash)
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric='acc_metric', feat_type=None, dataset_name=None): if not self._shared_mode: self._backend.context.delete_directories() else: # If this fails, it's likely that this is the first call to get # the data manager try: D = self._backend.load_datamanager() dataset_name = D.name except IOError: pass self._backend.context.create_directories() if dataset_name is None: dataset_name = hash_numpy_array(X) self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) self._logger = self._get_logger(dataset_name) if isinstance(metric, str): metric = STRING_TO_METRIC[metric] if feat_type is not None and len(feat_type) != X.shape[1]: raise ValueError('Array feat_type does not have same number of ' 'variables as X has features. %d vs %d.' % (len(feat_type), X.shape[1])) if feat_type is not None and not all( [isinstance(f, str) for f in feat_type]): raise ValueError('Array feat_type must only contain strings.') if feat_type is not None: for ft in feat_type: if ft.lower() not in ['categorical', 'numerical']: raise ValueError('Only `Categorical` and `Numerical` are ' 'valid feature types, you passed `%s`' % ft) self._data_memory_limit = None loaded_data_manager = XYDataManager(X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False) return self._fit(loaded_data_manager)
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric='acc_metric', feat_type=None, dataset_name=None): if not self._shared_mode: self._backend.context.delete_directories() else: # If this fails, it's likely that this is the first call to get # the data manager try: D = self._backend.load_datamanager() dataset_name = D.name except IOError: pass self._backend.context.create_directories() if dataset_name is None: dataset_name = hash_numpy_array(X) self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) self._logger = self._get_logger(dataset_name) if isinstance(metric, str): metric = STRING_TO_METRIC[metric] if feat_type is not None and len(feat_type) != X.shape[1]: raise ValueError('Array feat_type does not have same number of ' 'variables as X has features. %d vs %d.' % (len(feat_type), X.shape[1])) if feat_type is not None and not all([isinstance(f, str) for f in feat_type]): raise ValueError('Array feat_type must only contain strings.') if feat_type is not None: for ft in feat_type: if ft.lower() not in ['categorical', 'numerical']: raise ValueError('Only `Categorical` and `Numerical` are ' 'valid feature types, you passed `%s`' % ft) self._data_memory_limit = None loaded_data_manager = XYDataManager(X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False) return self._fit(loaded_data_manager)