def max_k(self, max_k): if not isinstance(max_k, int): raise e.TypeError('`max_k` should be an integer') if max_k < 1: raise e.ValueError('`max_k` should be >= 1') if max_k < self.min_k: raise e.ValueError('`max_k` should be >= `min_k`') self._max_k = max_k
def last(self, last): if not isinstance(last, int): raise e.TypeError('`last` should be an integer') if last < -1: raise e.ValueError('`last` should be > -1') self._last = last
def pred(self, pred): if not isinstance(pred, int): raise e.TypeError('`pred` should be an integer') if pred < c.NIL: raise e.ValueError('`pred` should have a value larger than `NIL`, e.g., -1') self._pred = pred
def _read_distances(self, file_name): """Reads the distance between nodes from a pre-defined file. Args: file_name (str): File to be loaded. """ logger.debug('Running private method: read_distances().') # Getting file extension extension = file_name.split('.')[-1] if extension == 'csv': distances = loader.load_csv(file_name) elif extension == 'txt': distances = loader.load_txt(file_name) else: # Raises an ArgumentError exception raise e.ArgumentError('File extension not recognized. It should be either `.csv` or .txt`') # Check if distances have been properly loaded if distances is None: raise e.ValueError('Pre-computed distances could not been properly loaded') # Apply the distances matrix to the property self.pre_distances = distances
def size(self, size): if not isinstance(size, int): raise e.TypeError('`size` should be an integer') if size < 1: raise e.ValueError('`size` should be > 0') self._size = size
def n_features(self, n_features): if not isinstance(n_features, int): raise e.TypeError('`n_features` should be an integer') if n_features < 0: raise e.ValueError('`n_features` should be >= 0') self._n_features = n_features
def test_value_error(): new_exception = exception.ValueError('error') try: raise new_exception except exception.ValueError: pass
def mean_absolute_error(labels, preds): """Calculates the Mean Absolute Error (MAE) between true and predicted labels. Args: labels (np.array | list): List or numpy array holding the true labels. preds (np.array | list): List or numpy array holding the predicted labels. Returns: The MAE measure between 0 and 1. """ # Making sure that labels is a numpy array labels = np.asarray(labels) # Making sure that predictions is a numpy array preds = np.asarray(preds) # Number of testing samples to be evaluated n = float(len(labels)) if n <= 0: raise e.ValueError('`n` should be a positive real number.') return np.sum(np.abs(labels - preds)) / n
def idx(self, idx): if not isinstance(idx, int): raise e.TypeError('`idx` should be an integer') if idx < 0: raise e.ValueError('`idx` should be >= 0') self._idx = idx
def label(self, label): if not isinstance(label, int): raise e.TypeError('`label` should be an integer') if label < 0: raise e.ValueError('`label` should be >= 0') self._label = label
def n_plateaus(self, n_plateaus): if not isinstance(n_plateaus, int): raise e.TypeError('`n_plateaus` should be an integer') if n_plateaus < 0: raise e.ValueError('`n_plateaus` should be >= 0') self._n_plateaus = n_plateaus
def root(self, root): if not isinstance(root, int): raise e.TypeError('`root` should be an integer') if root < 0: raise e.ValueError('`root` should be >= 0') self._root = root
def predicted_label(self, predicted_label): if not isinstance(predicted_label, int): raise e.TypeError('`predicted_label` should be an integer') if predicted_label < 0: raise e.ValueError('`predicted_label` should be >= 0') self._predicted_label = predicted_label
def cluster_label(self, cluster_label): if not isinstance(cluster_label, int): raise e.TypeError('`cluster_label` should be an integer') if cluster_label < 0: raise e.ValueError('`cluster_label` should be >= 0') self._cluster_label = cluster_label
def mean_squared_error(labels, preds, square_root=False): """Calculates the Mean Squared Error (MSE) between true and predicted labels. Args: labels (np.array | list): List or numpy array holding the true labels. preds (np.array | list): List or numpy array holding the predicted labels. square_root (bool): Boolean that indicated whether to apply squared root or not to MSE. Returns: The MSE or RMSE measure between 0 and 1. """ # Making sure that labels is a numpy array labels = np.asarray(labels) # Making sure that predictions is a numpy array preds = np.asarray(preds) # Number of testing samples to be evaluated n = float(len(labels)) if n <= 0: raise e.ValueError('`n` should be a positive real number.') mse = np.sum((labels - preds)**2) / n if square_root: # Calculate the root-mean squared error (RMSE) return mse**0.5 return mse
def best_k(self, best_k): if not isinstance(best_k, int): raise e.TypeError('`best_k` should be an integer') if best_k < 0: raise e.ValueError('`best_k` should be >= 0') self._best_k = best_k
def n_clusters(self, n_clusters): if not isinstance(n_clusters, int): raise e.TypeError('`n_clusters` should be an integer') if n_clusters < 0: raise e.ValueError('`n_clusters` should be >= 0') self._n_clusters = n_clusters
def parse_loader(data): """Parses data in OPF file format that was pre-loaded (.csv, .txt or .json). Args: data (np.array): Numpy array holding the data in OPF file format. Returns: Arrays holding the features and labels. """ logger.info('Parsing data ...') # Tries to parse the dataframe try: # From third columns beyond, we should have the features X = data[:, 2:] # Second column should be the label Y = data[:, 1] # Calculates the amount of samples per class _, counts = np.unique(Y, return_counts=True) # If there is only one class if len(counts) < 2: # Raises a ValueError raise e.ValueError( 'Parsed data should have at least two distinct labels') # If there are unsequential labels if len(counts) != np.max(Y): # Raises a ValueError raise e.ValueError( 'Parsed data should have sequential labels, e.g., 1, 2, ..., n' ) logger.info('Data parsed.') return X, Y.astype(int) # If dataframe could not be parsed except TypeError as error: # Logs an error logger.error(error) return None, None
def _read_distances(self, file_path): """Reads the distance between nodes from a pre-defined file. Args: file_path (str): File to be loaded. Returns: A matrix with pre-computed distances. """ logger.debug('Running private method: read_distances().') # Getting file extension extension = file_path.split('.')[-1] # Check if extension is .csv if extension == 'csv': # If yes, call the method that actually loads csv distances = loader.load_csv(file_path) # Check if extension is .txt elif extension == 'txt': # If yes, call the method that actually loads txt distances = loader.load_txt(file_path) # If extension is not recognized else: # Raises an ArgumentError exception raise e.ArgumentError( 'File extension not recognized. It should be either `.csv` or .txt`' ) # Check if distances have been properly loaded if distances is None: # If not, raises a ValueError raise e.ValueError( 'Pre-computed distances could not been properly loaded') return distances
def parse_loader(data): """Parses data in OPF file format that was pre-loaded (.csv, .txt or .json). Args: data (np.array): Numpy array holding the data in OPF file format. Returns: Arrays holding the features and labels. """ logger.info('Parsing data ...') try: # From third columns beyond, we should have the features X = data[:, 2:] # Second column should be the label Y = data[:, 1] # Calculates the amount of samples per class _, counts = np.unique(Y, return_counts=True) # If there is only one class if len(counts) == 1: logger.warning('Parsed data only have a single label.') # If there are unsequential labels if len(counts) != (np.max(Y) + 1): raise e.ValueError('Parsed data should have sequential labels, e.g., 0, 1, ..., n-1') logger.info('Data parsed.') return X, Y.astype(int) except TypeError as error: logger.error(error) return None, None
def policy(self, policy): if policy not in ['min', 'max']: raise e.ValueError('`policy` should be `min` or `max`') self._policy = policy