def test_label_encoder(): """Test LabelEncoder's transform and inverse_transform methods""" le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_empty_array(): le = LabelEncoder() le.fit(np.array(["1", "2", "1", "2", "2"])) # test empty transform transformed = le.transform([]) assert_array_equal(np.array([]), transformed) # test empty inverse transform inverse_transformed = le.inverse_transform([]) assert_array_equal(np.array([]), inverse_transformed)
def test_label_encoder_negative_ints(): le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_negative_ints(): le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_empty_array(values): le = LabelEncoder() le.fit(values) # test empty transform transformed = le.transform([]) assert_array_equal(np.array([]), transformed) # test empty inverse transform inverse_transformed = le.inverse_transform([]) assert_array_equal(np.array([]), inverse_transformed)
def test_label_encoder(): """Test LabelEncoder's transform and inverse_transform methods""" le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_string_labels(): """Test LabelEncoder's transform and inverse_transform methods with non-numeric labels""" le = LabelEncoder() le.fit(["paris", "paris", "tokyo", "amsterdam"]) assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"]) assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]), [2, 2, 1]) assert_array_equal(le.inverse_transform([2, 2, 1]), ["tokyo", "tokyo", "paris"]) assert_raises(ValueError, le.transform, ["london"])
def test_label_encoder_string_labels(): """Test LabelEncoder's transform and inverse_transform methods with non-numeric labels""" le = LabelEncoder() le.fit(["paris", "paris", "tokyo", "amsterdam"]) assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"]) assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]), [2, 2, 1]) assert_array_equal(le.inverse_transform([2, 2, 1]), ["tokyo", "tokyo", "paris"]) assert_raises(ValueError, le.transform, ["london"])
def test_label_encoder(): # Test LabelEncoder's transform and inverse_transform methods le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6]) le.fit(["apple", "orange"]) msg = "bad input shape" assert_raise_message(ValueError, msg, le.transform, "apple")
def test_label_encoder(values, classes, unknown): # Test LabelEncoder's transform, fit_transform and # inverse_transform methods le = LabelEncoder() le.fit(values) assert_array_equal(le.classes_, classes) assert_array_equal(le.transform(values), [1, 0, 2, 0, 2]) assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values) le = LabelEncoder() ret = le.fit_transform(values) assert_array_equal(ret, [1, 0, 2, 0, 2]) with pytest.raises(ValueError, match="unseen labels"): le.transform(unknown)
def test_label_encoder(): # Test LabelEncoder's transform and inverse_transform methods le = LabelEncoder() le.fit([1, 1, 4, 5, -1, 0]) assert_array_equal(le.classes_, [-1, 0, 1, 4, 5]) assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0]) assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]) assert_raises(ValueError, le.transform, [0, 6]) le.fit(["apple", "orange"]) msg = "bad input shape" assert_raise_message(ValueError, msg, le.transform, "apple")
def test_label_encoder(values, classes, unknown): # Test LabelEncoder's transform, fit_transform and # inverse_transform methods le = LabelEncoder() le.fit(values) assert_array_equal(le.classes_, classes) assert_array_equal(le.transform(values), [1, 0, 2, 0, 2]) assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values) le = LabelEncoder() ret = le.fit_transform(values) assert_array_equal(ret, [1, 0, 2, 0, 2]) with pytest.raises(ValueError, match="unseen labels"): le.transform(unknown)
def test_label_encoder_errors(): # Check that invalid arguments yield ValueError le = LabelEncoder() with pytest.raises(ValueError): le.transform([]) with pytest.raises(ValueError): le.inverse_transform([]) # Fail on unseen labels le = LabelEncoder() le.fit([1, 2, 3, -1, 1]) msg = "contains previously unseen labels" with pytest.raises(ValueError, match=msg): le.inverse_transform([-2]) with pytest.raises(ValueError, match=msg): le.inverse_transform([-2, -3, -4]) # Fail on inverse_transform("") msg = "bad input shape ()" with pytest.raises(ValueError, match=msg): le.inverse_transform("")
class CustomNNCategorical(CustomNNBase): """ Base for custom sk classifier implementing NN using keras implement an MLP for classification with custom metric cohen kappa, no custom loss for now @todo """ def __init__(self, hidden=[200, 100, 50, 20], dropout=[0.1, 0.1], reg=[0.05, 0.05], h_act=[relu], epoch=500, batch_size=32, cbEarly="metric", loss="categorical_crossentropy", optimizer='adam', metrics=['cohen_kappa'], kappa_weights="quadratic", validation=0.2, smooth_cb=True): ''' :param hidden: :param dropout: dropout[0] is assigned to input then hidden :param reg: ularization :param h_act: hidden_actication :param epoch: :param batch_size: :param cbEarly: "metric" or an EarlyStopping instance :param loss: :param optimizer: :param metrics: "Accuracy" or 'cohen_kappa' :param kappa_weights: compatible with sk(ex:"quadratic", None) ignored if metrics != 'cohen_kappa' :param smooth_cb: if True EarlyStopping use val_cohen_kappa smoothed (left avg window 3), only with val_cohen_kappa :note restore_best_weights requires keras 2.2.3 ''' assert loss in ["categorical_crossentropy", lossOCC, lossOCCQuadratic] CustomNNBase.__init__(self, epoch, loss, optimizer, metrics, batch_size) # 'categorical_crossentropy', OCC.lossOCCQuadratic, lossOCC assert (len(hidden) > 0) & (len(hidden)+1 >= len(dropout)) & \ (len(hidden) >= len(reg)) & (len(hidden) >= len(h_act)) self.hidden = hidden self.dropout = dropout self.reg = reg self.h_act = h_act self.validation = validation self.final_activation = softmax self.cbEarly = cbEarly self.smooth_cb = smooth_cb self.cbReduceLR = ReduceLROnPlateau(monitor='loss', factor=0.8, patience=3, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0) # self.cbReduceLR = ReduceLROnPlateau(monitor='loss', factor=0.5, # patience=4, min_lr=0.000001, verbose=0) # ReduceLROnPlateau(monitor='val_loss', factor=0.2, # patience=2, min_lr=0.000001, verbose=0) # self.kappa_weights = kappa_weights if len(self.metrics) > 1: raise "TODO" def __compile(self, input_shape, output_shape): ter = lambda x, i: None if len(x) <= i else x[i] reg = [regularizers.l2(i) for i in self.reg ] #@TODO ALSO USE L1 FOR BETTER FEATURE SELECTION h_act = self.h_act * round(len(self.hidden) / len(self.h_act)) self.model = Sequential() self.model.add(InputLayer(input_shape=(input_shape, ))) if not ter(self.dropout, 0) is None: self.model.add(Dropout(ter(self.dropout, 0))) for i in range(0, len(self.hidden)): self.model.add( Dense(self.hidden[i], activation=h_act[i], kernel_regularizer=ter(reg, i), bias_regularizer=ter(reg, i))) if not ter(self.dropout, i + 1) is None: self.model.add(Dropout(ter(self.dropout, i + 1))) # first for input self.model.add(Dense(output_shape, activation=self.final_activation)) self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics) def __category_to_output(self, y): self.label_encoder = LabelEncoder() y = self.label_encoder.fit_transform(y) target = to_categorical(y, num_classes=np.unique(y).size) return target def __output_to_category(self, output): pred = [np.argmax(i) for i in output] pred = self.label_encoder.inverse_transform(pred) return pred def cohen_kappa_metric_keras(self, y_true, y_pred): ''' Do not work as a metric because kappa is not linear and keras make a weighted avg of batches score :deprecated @see Cohen_kappa_logger ''' raise "deprecated @see Cohen_kappa_logger" return tf.py_func(self.cohen_kappa_score, [y_true, y_pred], tf.float32) def cohen_kappa_score(self, y_true, y_pred): raise "deprecated @see Cohen_kappa_logger" y_pred = self.__output_to_category(y_pred) y_true = self.__output_to_category(y_true) score = metrics.cohen_kappa_score(y_true, y_pred, weights=self.kappa_weights) return score.astype(np.float32) def break_on_epoch_n(self, threshold, sec=60): self.n_epoch = len(self.history.history["loss"]) if self.n_epoch > threshold: sleep(sec) # cool down def _fit_val(self, X, output): # @todo clean below if type(self.validation) is float: self.history = self.model.fit(X, output, validation_split=self.validation, epochs=self.epoch, batch_size=self.batch_size, callbacks=self.callback_list, verbose=0) elif type(self.validation) is tuple: assert self.validation[0].shape[1] == X.shape[ 1], "X_validation must be transformed with prep first" self.validation = (self.validation[0], self.__category_to_output(self.validation[1])) self.history = self.model.fit(X, output, validation_data=self.validation, epochs=self.epoch, batch_size=self.batch_size, callbacks=self.callback_list, verbose=0) elif self.validation is None: self.history = self.model.fit(X, output, epochs=self.epoch, batch_size=self.batch_size, callbacks=self.callback_list, verbose=0) else: raise "unknown validation type" def _kappa_disambiguation(self, X, output): ''' :param X: :param output: ''' self.metric_plot = None self.patience = 20 #for cbEarly is enoughfrom observation @todo in init if self.metrics[0] == "accuracy": self.metric_plot = "acc" raise "min_delta must be redefined according to val_acc" if self.use_smooth_cb: raise 'not available for acc self.use_smooth_cb' if self.cbEarly == "metric": self.cbEarly = EarlyStopping( monitor='val_acc' if self.validation else "acc", min_delta=0.0001, patience=self.patience, verbose=0, mode='auto') self.kappa_logger = None elif self.metrics[0] == 'cohen_kappa': self.metrics = None # 'cohen_kappa_metric' cannot be supported @see explication in Cohen_kappa_logger self.metric_plot = 'cohen_kappa' if self.cbEarly == "metric": if self.validation: monitor = "val_cohen_kappa_smoothed" if self.smooth_cb else "val_cohen_kappa" else: if not self.smooth_cb: monitor = "cohen_kappa" else: raise "No cohen_kappa_smoothed" print("monitor", monitor) self.cbEarly = EarlyStopping( monitor=monitor if self.validation else "cohen_kappa", min_delta=0.00000001, patience=self.patience, # a large patience is necessary! verbose=0, mode='max', restore_best_weights=True) if type(self.validation) is float: X, X_val, output, y_val = train_test_split( X, output, test_size=self.validation) elif type(self.validation) is tuple: assert self.validation[0].shape[1] == X.shape[ 1], "X_validation must be transformed with prep first" X_val = self.validation[0] y_val = self.__category_to_output(self.validation[1]) elif not self.validation is None: raise "unknown validation type" # self.validation = None # can slightly reduce computation but need val_loss for callback LRReduceOnPlateau self.kappa_logger = Cohen_kappa_logger( output_to_category=self.__output_to_category, X_train=X, y_train=output, X_val=X_val, y_val=y_val, kappa_weights=self.kappa_weights) else: print(self.metrics[0]) raise "not implemented" return X, output def fit(self, X, y=None): ''' :param X: :param y: :param cbEarly: Parameter for early stopping ''' output = self.__category_to_output(y) X, output = self._kappa_disambiguation(X, output) output_shape = output.shape[1] input_shape = X.shape[1] self.__compile(input_shape, output_shape) self.callback_list = [] for cb in [self.kappa_logger, self.cbReduceLR, self.cbEarly]: if cb: self.callback_list.append(cb) self._fit_val(X, output) self.break_on_epoch_n(50) return self def predict(self, X, y=None): try: getattr(self, "history") except AttributeError: raise RuntimeError("Call fit first.") preds = self.model.predict(X) preds = self.__output_to_category(preds) return preds def plot_history(self, plotname="NN", saving_file=None): ''' :param plotname: :param saving_file: filename where to save plots :return plt , to avoid carbage collection and closing of the windows ''' history = self.history plot = (saving_file is None) # print("History acc", history.history['acc']) # print("History loss", history.history['loss']) # print("History lr", history.history['lr']) # print("Acc train (last)", history.history['acc'][-5:-1]) import matplotlib.pyplot as plt if plot: plt.ion() if plot: plt.show() fig = plt.figure() plt.grid(True) plt.title(plotname) # print("possible plot", history.history.keys()) if self.metric_plot in history.history.keys(): plt.subplot(221) plt.plot(history.history[self.metric_plot]) plt.ylabel(self.metric_plot + " ") if plot: plt.draw() if "val_" + self.metric_plot in history.history.keys(): plt.subplot(222) # print("possible plot", history.history.keys()) plt.plot(history.history["val_" + self.metric_plot]) plt.ylabel("val_" + self.metric_plot + " ") if plot: plt.draw() if False: print("self.patience last epochs") print(history.history["val_" + self.metric_plot][-(self.patience + 1):]) plt.subplot(223) plt.plot(history.history['loss']) plt.ylabel('"loss" ' + " " + plotname) if plot: plt.draw() plt.subplot(224) if "val_cohen_kappa_smoothed" in history.history.keys(): plt.plot(history.history['val_cohen_kappa_smoothed']) plt.ylabel("val_cohen_kappa_smoothed") else: plt.plot(history.history['lr']) plt.ylabel('"lr"' + " " + plotname) if plot: plt.draw() if plot: plt.pause(1) if saving_file: fig.savefig(saving_file) plt = None # send to carbage return plt
predictions = np.argmax(model_sum_best_probs,axis=1) correct = np.sum(predictions==labels_val) accuracy = correct / len(dog_val.dataset) print('Current Ensemble predictive accuracy:i ' + str(accuracy)) if best_ensemble_val_acc < accuracy: best_ensemble_val_acc = accuracy best_model = model_idx+1 print('Record best ensemble predictive accuracy: ' + str(best_ensemble_val_acc)) print('Record best model ensemble: ' + str(model_names[:best_model]) +'\n') #print(best_ensemble_val_acc) predictions = np.argmax(model_sum_best_probs,axis=1) correct = np.sum(predictions==labels_val) accuracy = correct / len(dog_val.dataset) correct_filenames = filenames_val[predictions==labels_val] correct_labels = le.inverse_transform(labels_val[predictions==labels_val]) print('Correct classifications:') print('file name, dog breed') print(np.vstack((correct_filenames[:10], correct_labels[:10])).T) incorrect_filenames = filenames_val[predictions!=labels_val] predicted_labels = le.inverse_transform(predictions[predictions!=labels_val][:10]) true_labels = le.inverse_transform(labels_val[predictions!=labels_val][:10]) print('incorrect filenames, predictions, true labels') dummy = np.vstack((incorrect_filenames[:10], predicted_labels[:10])) print(np.vstack((dummy,true_labels)))
def r_precision(S:np.ndarray, y:np.ndarray, metric:str='distance', average:str='weighted', return_y_pred:int=0, verbose:int=0, n_jobs:int=1) -> float: """ Calculate R-Precision (recall at R-th position). Parameters ---------- S : ndarray or CSR matrix Distance (similarity) matrix y : ndarray Target (ground truth) labels metric : 'distance' or 'similarity', optional, default: 'similarity' Define, whether `S` is a distance or similarity matrix. average : 'weighted', 'macro' or None, optional, default: 'weighted' Ignored. Weighted and macro precisions are returned. return_y_pred : int, optional, default: 0 If > 0, return the labels of the `return_y_pred` nearest neighbors verbose : int, optional, default: 0 Increasing level of output. n_jobs : int, optional, default: 1 Number of parallel processes to use. Returns ------- r_precision : dictionary with following keys: macro : float Macro R-Precision. weighted : float Weighted R-Precision. per_item : ndarray R-Precision at the object. relevant_items : ndarray Relevant items per class. y_true : ndarray Target labels (req. for weighting). y_pred : ndarray Labels of some k-nearest neighbors """ io.check_distance_matrix_shape(S) io.check_distance_matrix_shape_fits_labels(S, y) io.check_valid_metric_parameter(metric) log = ConsoleLogging() n, _ = S.shape S_is_sparse = issparse(S) if metric != 'similarity' or not S_is_sparse: raise NotImplementedError("Only sparse similarity matrices so far.") # Map labels to 0..n(labels)-1 le = LabelEncoder() # Add int.min for misclassifications incorr_orig = np.array([np.nan]).astype(int) le.fit(np.append(y, incorr_orig)) y = le.transform(y) incorrect = le.transform(incorr_orig) # Number of relevant items, i.e. number of each label relevant_items = np.bincount(y) - 1 # one less for self class # R-Precision for each item r_prec = np.zeros(n, dtype=np.float) # Classify each point in test set if verbose: log.message("Creating shared memory data.") n_random_pred = mp.Value(ctypes.c_int) n_random_pred.value = 0 if verbose and log: log.message("Spawning processes for prediction.") y_pred = np.zeros((n, return_y_pred), dtype=float) kwargs = {'y_pred' : return_y_pred, 'incorrect' : incorrect} with mp.Pool(processes=n_jobs, initializer=_load_shared_csr, initargs=(S, y, n_random_pred, relevant_items)) as pool: for i, r in enumerate( pool.imap( func=partial(_r_prec_worker, **kwargs), iterable=range(n), chunksize=int(1e2))): if verbose and ((i+1)%int(1e7 / 10**verbose) == 0 or i == n-1): log.message("Classification: {} of {} on {}.".format( i+1, n, mp.current_process().name), flush=True) try: r_prec[i] = r[0] y_pred[i, :] = r[1] except: r_prec[i] = r if i == n-1: pass pool.join() if verbose and log: log.message("Retrieving nearest neighbors.") # Work-around for new scikit-learn requirement of 1D arrays for LabelEncoder y_pred = np.asarray([le.inverse_transform(col) for col in y_pred.T.astype(int)]).T if verbose and log: log.message("Finishing.") if n_random_pred.value: log.warning(("{} queries were classified randomly, because all " "distances were non-finite numbers or there were no other " "objects in the same class.").format(n_random_pred.value)) return_dict = {'macro' : r_prec.mean(), 'weighted' : np.average(r_prec, weights=relevant_items[y]), 'per_item' : r_prec, 'relevant_items' : relevant_items, 'y_true' : y, 'y_pred' : y_pred} return return_dict
class ColumnEnsembleClassifier(BaseClassifier): """Applies estimators to columns of an array or pandas DataFrame. This estimator allows different columns or column subsets of the input to be transformed separately and the features generated by each transformer will be ensembled to form a single output. Parameters ---------- estimators : list of tuples List of (name, transformer, column(s)) tuples specifying the transformer objects to be applied to subsets of the data. name : string Like in Pipeline and FeatureUnion, this allows the transformer and its parameters to be set using ``set_params`` and searched in grid search. Estimator : estimator or {'drop'} Estimator must support `fit` and `predict_proba`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns column(s) : string or int, array-like of string or int, slice, \ boolean mask array or callable remainder : {'drop', 'passthrough'} or estimator, default 'drop' By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`. """ def __init__(self, estimators, remainder='drop', verbose=False): self.estimators = estimators self.remainder = remainder self.verbose = verbose @property def _estimators(self): return [(name, estim) for name, estim, _ in self.estimators] @_estimators.setter def _estimators(self, value): self.estimators = [(name, estim, col) for ((name, estim), (_, _, col)) in zip(value, self.estimators)] # from metaestimators.py def _get_params(self, attr, deep=True): out = super().get_params(deep=deep) if not deep: return out estimators = getattr(self, attr) out.update(estimators) for name, estimator in estimators: if hasattr(estimator, 'get_params'): for key, value in estimator.get_params(deep=True).items(): out['%s__%s' % (name, key)] = value return out def get_params(self, deep=True): """Get parameters for this estimator. Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- params : mapping of string to any Parameter names mapped to their values. """ return self._get_params('_estimators', deep=deep) # from metaestimators.py def _set_params(self, attr, **params): # Ensure strict ordering of parameter setting: # 1. All steps if attr in params: setattr(self, attr, params.pop(attr)) # 2. Step replacement items = getattr(self, attr) names = [] if items: names, _ = zip(*items) for name in list(params.keys()): if '__' not in name and name in names: self._replace_estimator(attr, name, params.pop(name)) # 3. Step parameters and other initialisation arguments super().set_params(**params) return self # from metaestimators.py def _replace_estimator(self, attr, name, new_val): # assumes `name` is a valid estimator name new_estimators = list(getattr(self, attr)) for i, (estimator_name, _) in enumerate(new_estimators): if estimator_name == name: new_estimators[i] = (name, new_val) break setattr(self, attr, new_estimators) def set_params(self, **kwargs): """Set the parameters of this estimator. Valid parameter keys can be listed with ``get_params()``. Returns ------- self """ self._set_params('_estimators', **kwargs) return self def _validate_estimators(self): if not self.estimators: return names, estimators, _ = zip(*self.estimators) self._validate_names(names) # validate estimators for t in estimators: if t == 'drop': continue if not (hasattr(t, "fit") or hasattr(t, "predict_proba")): raise TypeError( "All estimators should implement fit and predict proba" "or can be 'drop' " "specifiers. '%s' (type %s) doesn't." % (t, type(t))) def _validate_names(self, names): if len(set(names)) != len(names): raise ValueError('Names provided are not unique: ' '{0!r}'.format(list(names))) invalid_names = set(names).intersection(self.get_params(deep=False)) if invalid_names: raise ValueError('Estimator names conflict with constructor ' 'arguments: {0!r}'.format(sorted(invalid_names))) invalid_names = [name for name in names if '__' in name] if invalid_names: raise ValueError('Estimator names must not contain __: got ' '{0!r}'.format(invalid_names)) # this check whether the column input was a slice object or a tuple. def _validate_column_callables(self, X): """ Converts callable column specifications. """ columns = [] for _, _, column in self.estimators: if callable(column): column = column(X) columns.append(column) self._columns = columns def _validate_remainder(self, X): """ Validates ``remainder`` and defines ``_remainder`` targeting the remaining columns. """ is_estimator = (hasattr(self.remainder, "fit") or hasattr(self.remainder, "predict_proba")) if (self.remainder not in ('drop') and not is_estimator): raise ValueError( "The remainder keyword needs to be 'drop', '%s' was passed instead" % self.remainder) n_columns = X.shape[1] cols = [] for columns in self._columns: cols.extend(_get_column_indices(X, columns)) remaining_idx = sorted(list(set(range(n_columns)) - set(cols))) or None self._remainder = ('remainder', self.remainder, remaining_idx) def _iter(self, replace_strings=False): """ Generate (name, trans, column, weight) tuples. If fitted=True, use the fitted transformers, else use the user specified transformers updated with converted column names and potentially appended with transformer for remainder. """ # interleave the validated column specifiers estimators = [(name, estims, column) for (name, estims, _), column in zip(self.estimators, self._columns)] # add transformer tuple for remainder if self._remainder[2] is not None: estimators = chain(estimators, [self._remainder]) for name, trans, column in estimators: if replace_strings: # skip in case of 'drop' if trans == 'drop': continue elif _is_empty_column_selection(column): continue yield (name, trans, column) def fit(self, X, y, input_checks=True): # the data passed in could be an array of dataframes? """Fit all estimators, fit the data Parameters ---------- X : array-like or DataFrame of shape [n_samples, n_dimensions, n_length] Input data, of which specified subsets are used to fit the transformers. y : array-like, shape (n_samples, ...), optional Targets for supervised learning. """ if self.estimators is None or len(self.estimators) == 0: raise AttributeError('Invalid `estimators` attribute, `estimators`' ' should be a list of (string, estimator)' ' tuples') # X = _check_X(X) self._validate_estimators() self._validate_column_callables(X) self._validate_remainder(X) self.le_ = LabelEncoder().fit(y) self.classes_ = self.le_.classes_ transformed_y = self.le_.transform(y) for name, estim, column in self._iter(replace_strings=True): estim.fit(_get_column(X, column), transformed_y) return self def _collect_probas(self, X): return np.asarray([ estim.predict_proba(_get_column(X, column)) for (name, estim, column) in self._iter(replace_strings=True) ]) # TODO: check if it is fitted def predict_proba(self, X, input_checks=True): """Predict class probabilities for X in 'soft' voting """ avg = np.average(self._collect_probas(X), axis=0) return avg def _predict(self, X): """Collect results from clf.predict calls. """ return np.asarray([ estim.predict_proba(_get_column(X, column)) for (name, estim, column) in self._iter(replace_strings=True) ]) def predict(self, X, input_checks=True): maj = np.argmax(self.predict_proba(X), axis=1) return self.le_.inverse_transform(maj)
def predict(self): try: #filename如果未定义则会抛出异常 path = self.path.get() mylist = os.listdir(path) feeling_list = [] for item in mylist: if item[6:-16] == '02' and int(item[18:-4]) % 2 == 0: feeling_list.append('female_calm') elif item[6:-16] == '02' and int(item[18:-4]) % 2 == 1: feeling_list.append('male_calm') elif item[6:-16] == '03' and int(item[18:-4]) % 2 == 0: feeling_list.append('female_happy') elif item[6:-16] == '03' and int(item[18:-4]) % 2 == 1: feeling_list.append('male_happy') elif item[6:-16] == '04' and int(item[18:-4]) % 2 == 0: feeling_list.append('female_sad') elif item[6:-16] == '04' and int(item[18:-4]) % 2 == 1: feeling_list.append('male_sad') elif item[6:-16] == '05' and int(item[18:-4]) % 2 == 0: feeling_list.append('female_angry') elif item[6:-16] == '05' and int(item[18:-4]) % 2 == 1: feeling_list.append('male_angry') elif item[6:-16] == '06' and int(item[18:-4]) % 2 == 0: feeling_list.append('female_fearful') elif item[6:-16] == '06' and int(item[18:-4]) % 2 == 1: feeling_list.append('male_fearful') labels = pd.DataFrame(feeling_list) #showinfo('提示', '提取测试集') df = pd.DataFrame(columns=['feature']) bookmark = 0 for index, y in enumerate(mylist): if mylist[index][6:-16] != '01' and mylist[index][ 6:-16] != '07' and mylist[index][ 6:-16] != '08' and mylist[ index][:2] != 'su' and mylist[ index][:1] != 'n' and mylist[ index][:1] != 'd' and mylist[ index][:1] != 'A': X, sample_rate = librosa.load(path + '\\' + y, res_type='kaiser_fast', duration=2.5, sr=22050 * 2, offset=0.5) sample_rate = np.array(sample_rate) mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13), axis=0) feature = mfccs #[float(i) for i in feature] #feature1=feature[:135] df.loc[bookmark] = [feature] bookmark = bookmark + 1 df3 = pd.DataFrame(df['feature'].values.tolist()) #将特征和对应的情感存到同一张表中,情感所在列的列名为‘0’ newdf = pd.concat([df3, labels], axis=1) rnewdf = newdf.rename(index=str, columns={"0": "label"}) rnewdf = shuffle(newdf) rnewdf = rnewdf.fillna(0) #print(rnewdf) #将表格分为训练集和测试集 newdf1 = np.random.rand(len(rnewdf)) < 0.2 train = rnewdf[newdf1] test = rnewdf[~newdf1] #特征值为0到倒数第一列,标签值为最后一列 testfeatures = test.iloc[:, :-1] testlabel = test.iloc[:, -1:] X_test = np.array(testfeatures) y_test = np.array(testlabel) lb = LabelEncoder() y_test = np_utils.to_categorical(lb.fit_transform(y_test)) #print(y_train) #创建CNN模型 print('提取测试集...') x_testcnn = np.expand_dims(X_test, axis=2) print(x_testcnn) print('测试...') #showinfo('提示', '正在测试...') preds = self.model.predict(x_testcnn, batch_size=32, verbose=1) preds1 = preds.argmax(axis=1) abc = preds1.astype(int).flatten() predictions = (lb.inverse_transform((abc))) preddf = pd.DataFrame({'predicted_values': predictions}) actual = y_test.argmax(axis=1) abc123 = actual.astype(int).flatten() #print(abc) actualvalues = (lb.inverse_transform((abc123))) actualdf = pd.DataFrame({'actual_values': actualvalues}) finaldf = actualdf.join(preddf) finaldf.to_csv('H:\\预测实际对照表.csv', index=False) showinfo("提示", "表格打印完成,已保存到H盘目录下") print('\n\n输出预测值与实际值的对比表格:\n\n') print( finaldf.groupby('actual_values').count().join( finaldf.groupby('predicted_values').count())) #showinfo("预测值与实际值的对比", finaldf.groupby('actual_values').count()) except FileNotFoundError: showwarning('warning', '该路径不存在,请重新输入')