Esempio n. 1
0
def test_label_encoder():
    """Test LabelEncoder's transform and inverse_transform methods"""
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
Esempio n. 2
0
def test_label_encoder_empty_array():
    le = LabelEncoder()
    le.fit(np.array(["1", "2", "1", "2", "2"]))
    # test empty transform
    transformed = le.transform([])
    assert_array_equal(np.array([]), transformed)
    # test empty inverse transform
    inverse_transformed = le.inverse_transform([])
    assert_array_equal(np.array([]), inverse_transformed)
Esempio n. 3
0
def test_label_encoder_negative_ints():
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_negative_ints():
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_empty_array(values):
    le = LabelEncoder()
    le.fit(values)
    # test empty transform
    transformed = le.transform([])
    assert_array_equal(np.array([]), transformed)
    # test empty inverse transform
    inverse_transformed = le.inverse_transform([])
    assert_array_equal(np.array([]), inverse_transformed)
Esempio n. 6
0
def test_label_encoder():
    """Test LabelEncoder's transform and inverse_transform methods"""
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
Esempio n. 7
0
def test_label_encoder_string_labels():
    """Test LabelEncoder's transform and inverse_transform methods with
    non-numeric labels"""
    le = LabelEncoder()
    le.fit(["paris", "paris", "tokyo", "amsterdam"])
    assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"])
    assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]), [2, 2, 1])
    assert_array_equal(le.inverse_transform([2, 2, 1]),
                       ["tokyo", "tokyo", "paris"])
    assert_raises(ValueError, le.transform, ["london"])
Esempio n. 8
0
def test_label_encoder_string_labels():
    """Test LabelEncoder's transform and inverse_transform methods with
    non-numeric labels"""
    le = LabelEncoder()
    le.fit(["paris", "paris", "tokyo", "amsterdam"])
    assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"])
    assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]),
                       [2, 2, 1])
    assert_array_equal(le.inverse_transform([2, 2, 1]),
                       ["tokyo", "tokyo", "paris"])
    assert_raises(ValueError, le.transform, ["london"])
Esempio n. 9
0
def test_label_encoder():
    # Test LabelEncoder's transform and inverse_transform methods
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])

    le.fit(["apple", "orange"])
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
def test_label_encoder(values, classes, unknown):
    # Test LabelEncoder's transform, fit_transform and
    # inverse_transform methods
    le = LabelEncoder()
    le.fit(values)
    assert_array_equal(le.classes_, classes)
    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
    le = LabelEncoder()
    ret = le.fit_transform(values)
    assert_array_equal(ret, [1, 0, 2, 0, 2])

    with pytest.raises(ValueError, match="unseen labels"):
        le.transform(unknown)
Esempio n. 11
0
def test_label_encoder():
    # Test LabelEncoder's transform and inverse_transform methods
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])

    le.fit(["apple", "orange"])
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
Esempio n. 12
0
def test_label_encoder(values, classes, unknown):
    # Test LabelEncoder's transform, fit_transform and
    # inverse_transform methods
    le = LabelEncoder()
    le.fit(values)
    assert_array_equal(le.classes_, classes)
    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
    le = LabelEncoder()
    ret = le.fit_transform(values)
    assert_array_equal(ret, [1, 0, 2, 0, 2])

    with pytest.raises(ValueError, match="unseen labels"):
        le.transform(unknown)
Esempio n. 13
0
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    with pytest.raises(ValueError):
        le.transform([])
    with pytest.raises(ValueError):
        le.inverse_transform([])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, -1, 1])
    msg = "contains previously unseen labels"
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform([-2])
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform([-2, -3, -4])

    # Fail on inverse_transform("")
    msg = "bad input shape ()"
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform("")
Esempio n. 14
0
class CustomNNCategorical(CustomNNBase):
    """
    Base for custom sk classifier implementing NN using keras
    
    implement an MLP for classification with custom metric cohen kappa, no custom loss for now @todo
    """
    def __init__(self,
                 hidden=[200, 100, 50, 20],
                 dropout=[0.1, 0.1],
                 reg=[0.05, 0.05],
                 h_act=[relu],
                 epoch=500,
                 batch_size=32,
                 cbEarly="metric",
                 loss="categorical_crossentropy",
                 optimizer='adam',
                 metrics=['cohen_kappa'],
                 kappa_weights="quadratic",
                 validation=0.2,
                 smooth_cb=True):
        '''        
        :param hidden:
        :param dropout:  dropout[0] is assigned to input then hidden
        :param reg: ularization 
        :param h_act: hidden_actication
        :param epoch:
        :param batch_size:
        :param cbEarly: "metric" or an EarlyStopping instance
        :param loss:
        :param optimizer:
        :param metrics: "Accuracy" or 'cohen_kappa'
        :param kappa_weights: compatible with sk(ex:"quadratic", None) ignored if metrics != 'cohen_kappa'
        :param smooth_cb: if True EarlyStopping use val_cohen_kappa smoothed (left avg window 3),
         only with val_cohen_kappa
        
        :note restore_best_weights requires keras 2.2.3
        
        '''
        assert loss in ["categorical_crossentropy", lossOCC, lossOCCQuadratic]
        CustomNNBase.__init__(self, epoch, loss, optimizer, metrics,
                              batch_size)
        # 'categorical_crossentropy', OCC.lossOCCQuadratic, lossOCC
        assert (len(hidden) > 0) & (len(hidden)+1 >= len(dropout)) & \
            (len(hidden) >= len(reg)) & (len(hidden) >= len(h_act))

        self.hidden = hidden
        self.dropout = dropout
        self.reg = reg
        self.h_act = h_act
        self.validation = validation

        self.final_activation = softmax

        self.cbEarly = cbEarly
        self.smooth_cb = smooth_cb

        self.cbReduceLR = ReduceLROnPlateau(monitor='loss',
                                            factor=0.8,
                                            patience=3,
                                            verbose=0,
                                            mode='auto',
                                            min_delta=0.0001,
                                            cooldown=0,
                                            min_lr=0)

        #         self.cbReduceLR = ReduceLROnPlateau(monitor='loss', factor=0.5,
        #                               patience=4, min_lr=0.000001, verbose=0)

        #         ReduceLROnPlateau(monitor='val_loss', factor=0.2,
        #                               patience=2, min_lr=0.000001, verbose=0)
        #

        self.kappa_weights = kappa_weights
        if len(self.metrics) > 1: raise "TODO"

    def __compile(self, input_shape, output_shape):
        ter = lambda x, i: None if len(x) <= i else x[i]
        reg = [regularizers.l2(i) for i in self.reg
               ]  #@TODO ALSO USE L1 FOR BETTER FEATURE SELECTION
        h_act = self.h_act * round(len(self.hidden) / len(self.h_act))

        self.model = Sequential()

        self.model.add(InputLayer(input_shape=(input_shape, )))
        if not ter(self.dropout, 0) is None:
            self.model.add(Dropout(ter(self.dropout, 0)))

        for i in range(0, len(self.hidden)):
            self.model.add(
                Dense(self.hidden[i],
                      activation=h_act[i],
                      kernel_regularizer=ter(reg, i),
                      bias_regularizer=ter(reg, i)))
            if not ter(self.dropout, i + 1) is None:
                self.model.add(Dropout(ter(self.dropout,
                                           i + 1)))  # first for input

        self.model.add(Dense(output_shape, activation=self.final_activation))

        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss,
                           metrics=self.metrics)

    def __category_to_output(self, y):
        self.label_encoder = LabelEncoder()
        y = self.label_encoder.fit_transform(y)
        target = to_categorical(y, num_classes=np.unique(y).size)
        return target

    def __output_to_category(self, output):
        pred = [np.argmax(i) for i in output]
        pred = self.label_encoder.inverse_transform(pred)
        return pred

    def cohen_kappa_metric_keras(self, y_true, y_pred):
        '''
        Do not work as a metric because kappa is not linear and keras make a weighted avg of batches score
        :deprecated @see Cohen_kappa_logger
        '''
        raise "deprecated @see Cohen_kappa_logger"
        return tf.py_func(self.cohen_kappa_score, [y_true, y_pred], tf.float32)

    def cohen_kappa_score(self, y_true, y_pred):
        raise "deprecated @see Cohen_kappa_logger"
        y_pred = self.__output_to_category(y_pred)
        y_true = self.__output_to_category(y_true)

        score = metrics.cohen_kappa_score(y_true,
                                          y_pred,
                                          weights=self.kappa_weights)
        return score.astype(np.float32)

    def break_on_epoch_n(self, threshold, sec=60):
        self.n_epoch = len(self.history.history["loss"])
        if self.n_epoch > threshold:
            sleep(sec)  # cool down

    def _fit_val(self, X, output):
        # @todo clean below
        if type(self.validation) is float:
            self.history = self.model.fit(X,
                                          output,
                                          validation_split=self.validation,
                                          epochs=self.epoch,
                                          batch_size=self.batch_size,
                                          callbacks=self.callback_list,
                                          verbose=0)

        elif type(self.validation) is tuple:
            assert self.validation[0].shape[1] == X.shape[
                1], "X_validation must be transformed with prep first"
            self.validation = (self.validation[0],
                               self.__category_to_output(self.validation[1]))
            self.history = self.model.fit(X,
                                          output,
                                          validation_data=self.validation,
                                          epochs=self.epoch,
                                          batch_size=self.batch_size,
                                          callbacks=self.callback_list,
                                          verbose=0)

        elif self.validation is None:
            self.history = self.model.fit(X,
                                          output,
                                          epochs=self.epoch,
                                          batch_size=self.batch_size,
                                          callbacks=self.callback_list,
                                          verbose=0)
        else:
            raise "unknown validation type"

    def _kappa_disambiguation(self, X, output):
        '''
        :param X:
        :param output:
        '''
        self.metric_plot = None
        self.patience = 20  #for cbEarly is enoughfrom observation  @todo in init

        if self.metrics[0] == "accuracy":
            self.metric_plot = "acc"
            raise "min_delta must be redefined according to val_acc"
            if self.use_smooth_cb:
                raise 'not available for acc self.use_smooth_cb'
            if self.cbEarly == "metric":
                self.cbEarly = EarlyStopping(
                    monitor='val_acc' if self.validation else "acc",
                    min_delta=0.0001,
                    patience=self.patience,
                    verbose=0,
                    mode='auto')
            self.kappa_logger = None

        elif self.metrics[0] == 'cohen_kappa':
            self.metrics = None  # 'cohen_kappa_metric' cannot be supported @see explication in Cohen_kappa_logger
            self.metric_plot = 'cohen_kappa'
            if self.cbEarly == "metric":
                if self.validation:
                    monitor = "val_cohen_kappa_smoothed" if self.smooth_cb else "val_cohen_kappa"
                else:
                    if not self.smooth_cb:
                        monitor = "cohen_kappa"
                    else:
                        raise "No cohen_kappa_smoothed"
                print("monitor", monitor)
                self.cbEarly = EarlyStopping(
                    monitor=monitor if self.validation else "cohen_kappa",
                    min_delta=0.00000001,
                    patience=self.patience,  # a large patience is necessary!
                    verbose=0,
                    mode='max',
                    restore_best_weights=True)

            if type(self.validation) is float:
                X, X_val, output, y_val = train_test_split(
                    X, output, test_size=self.validation)
            elif type(self.validation) is tuple:
                assert self.validation[0].shape[1] == X.shape[
                    1], "X_validation must be transformed with prep first"
                X_val = self.validation[0]
                y_val = self.__category_to_output(self.validation[1])
            elif not self.validation is None:
                raise "unknown validation type"

            #             self.validation = None # can slightly reduce computation but need val_loss for callback LRReduceOnPlateau

            self.kappa_logger = Cohen_kappa_logger(
                output_to_category=self.__output_to_category,
                X_train=X,
                y_train=output,
                X_val=X_val,
                y_val=y_val,
                kappa_weights=self.kappa_weights)

        else:
            print(self.metrics[0])
            raise "not implemented"
        return X, output

    def fit(self, X, y=None):
        '''
        :param X:
        :param y:
        :param cbEarly: Parameter for early stopping
        '''
        output = self.__category_to_output(y)

        X, output = self._kappa_disambiguation(X, output)

        output_shape = output.shape[1]
        input_shape = X.shape[1]
        self.__compile(input_shape, output_shape)

        self.callback_list = []
        for cb in [self.kappa_logger, self.cbReduceLR, self.cbEarly]:
            if cb: self.callback_list.append(cb)

        self._fit_val(X, output)

        self.break_on_epoch_n(50)
        return self

    def predict(self, X, y=None):
        try:
            getattr(self, "history")
        except AttributeError:
            raise RuntimeError("Call fit first.")

        preds = self.model.predict(X)
        preds = self.__output_to_category(preds)
        return preds

    def plot_history(self, plotname="NN", saving_file=None):
        '''
        :param plotname:
        :param saving_file: filename where to save plots
        :return plt , to avoid carbage collection and closing of the windows
        '''
        history = self.history
        plot = (saving_file is None)
        #         print("History acc", history.history['acc'])
        #         print("History loss", history.history['loss'])
        #         print("History lr", history.history['lr'])
        #         print("Acc train (last)", history.history['acc'][-5:-1])

        import matplotlib.pyplot as plt

        if plot: plt.ion()
        if plot: plt.show()

        fig = plt.figure()
        plt.grid(True)
        plt.title(plotname)
        #         print("possible plot", history.history.keys())
        if self.metric_plot in history.history.keys():
            plt.subplot(221)
            plt.plot(history.history[self.metric_plot])
            plt.ylabel(self.metric_plot + "  ")
            if plot: plt.draw()

        if "val_" + self.metric_plot in history.history.keys():
            plt.subplot(222)
            #         print("possible plot", history.history.keys())
            plt.plot(history.history["val_" + self.metric_plot])
            plt.ylabel("val_" + self.metric_plot + "  ")
            if plot: plt.draw()

            if False:
                print("self.patience last epochs")
                print(history.history["val_" +
                                      self.metric_plot][-(self.patience + 1):])

        plt.subplot(223)
        plt.plot(history.history['loss'])
        plt.ylabel('"loss" ' + "  " + plotname)
        if plot: plt.draw()

        plt.subplot(224)
        if "val_cohen_kappa_smoothed" in history.history.keys():
            plt.plot(history.history['val_cohen_kappa_smoothed'])
            plt.ylabel("val_cohen_kappa_smoothed")
        else:
            plt.plot(history.history['lr'])
            plt.ylabel('"lr"' + "  " + plotname)
        if plot: plt.draw()
        if plot: plt.pause(1)

        if saving_file:
            fig.savefig(saving_file)
            plt = None  # send to carbage

        return plt
Esempio n. 15
0
    predictions = np.argmax(model_sum_best_probs,axis=1)
    correct = np.sum(predictions==labels_val)
    accuracy = correct / len(dog_val.dataset)
    print('Current Ensemble predictive accuracy:i ' + str(accuracy))

    if best_ensemble_val_acc < accuracy:
        best_ensemble_val_acc = accuracy
        best_model = model_idx+1
    print('Record best ensemble predictive accuracy: ' + str(best_ensemble_val_acc))
    print('Record best model ensemble: ' + str(model_names[:best_model]) +'\n')
    #print(best_ensemble_val_acc)

predictions = np.argmax(model_sum_best_probs,axis=1)
correct = np.sum(predictions==labels_val)
accuracy = correct / len(dog_val.dataset)
correct_filenames = filenames_val[predictions==labels_val]
correct_labels = le.inverse_transform(labels_val[predictions==labels_val])

print('Correct classifications:')
print('file name, dog breed')
print(np.vstack((correct_filenames[:10], correct_labels[:10])).T)

incorrect_filenames = filenames_val[predictions!=labels_val]
predicted_labels = le.inverse_transform(predictions[predictions!=labels_val][:10])
true_labels = le.inverse_transform(labels_val[predictions!=labels_val][:10])
print('incorrect filenames, predictions, true labels')

dummy = np.vstack((incorrect_filenames[:10], predicted_labels[:10]))
print(np.vstack((dummy,true_labels)))
def r_precision(S:np.ndarray, y:np.ndarray, metric:str='distance',
                average:str='weighted', return_y_pred:int=0,
                verbose:int=0, n_jobs:int=1) -> float:
    """ Calculate R-Precision (recall at R-th position).

    Parameters
    ----------
    S : ndarray or CSR matrix
        Distance (similarity) matrix

    y : ndarray
        Target (ground truth) labels

    metric : 'distance' or 'similarity', optional, default: 'similarity'
        Define, whether `S` is a distance or similarity matrix.

    average : 'weighted', 'macro' or None, optional, default: 'weighted'
        Ignored. Weighted and macro precisions are returned.

    return_y_pred : int, optional, default: 0
        If > 0, return the labels of the `return_y_pred` nearest neighbors

    verbose : int, optional, default: 0
        Increasing level of output.

    n_jobs : int, optional, default: 1
        Number of parallel processes to use.

    Returns
    -------
    r_precision : dictionary with following keys:
        macro : float
            Macro R-Precision.

        weighted : float
            Weighted R-Precision.

        per_item : ndarray
            R-Precision at the object.

        relevant_items : ndarray
            Relevant items per class.

        y_true : ndarray
            Target labels (req. for weighting).

        y_pred : ndarray
            Labels of some k-nearest neighbors
    """
    io.check_distance_matrix_shape(S)
    io.check_distance_matrix_shape_fits_labels(S, y)
    io.check_valid_metric_parameter(metric)
    log = ConsoleLogging()
    n, _ = S.shape
    S_is_sparse = issparse(S)
    if metric != 'similarity' or not S_is_sparse:
        raise NotImplementedError("Only sparse similarity matrices so far.")

    # Map labels to 0..n(labels)-1
    le = LabelEncoder()
    # Add int.min for misclassifications
    incorr_orig = np.array([np.nan]).astype(int)
    le.fit(np.append(y, incorr_orig))
    y = le.transform(y)
    incorrect = le.transform(incorr_orig)
    # Number of relevant items, i.e. number of each label
    relevant_items = np.bincount(y) - 1 # one less for self class
    # R-Precision for each item
    r_prec = np.zeros(n, dtype=np.float)
    
    # Classify each point in test set
    if verbose:
        log.message("Creating shared memory data.")
    n_random_pred = mp.Value(ctypes.c_int)
    n_random_pred.value = 0
    if verbose and log:
        log.message("Spawning processes for prediction.")
    y_pred = np.zeros((n, return_y_pred), dtype=float)
    kwargs = {'y_pred' : return_y_pred,
              'incorrect' : incorrect}
    with mp.Pool(processes=n_jobs, 
                 initializer=_load_shared_csr, 
                 initargs=(S, y, n_random_pred, relevant_items)) as pool:
        for i, r in enumerate(
            pool.imap(
                func=partial(_r_prec_worker, **kwargs),
                iterable=range(n), 
                chunksize=int(1e2))):
            if verbose and ((i+1)%int(1e7 / 10**verbose) == 0 or i == n-1):
                log.message("Classification: {} of {} on {}.".format(
                            i+1, n, mp.current_process().name), flush=True)
            try:
                r_prec[i] = r[0]
                y_pred[i, :] = r[1]
            except:
                r_prec[i] = r
            if i == n-1:
                pass
    pool.join()

    if verbose and log:
        log.message("Retrieving nearest neighbors.")
    # Work-around for new scikit-learn requirement of 1D arrays for LabelEncoder
    y_pred = np.asarray([le.inverse_transform(col) for col in y_pred.T.astype(int)]).T
    if verbose and log:
        log.message("Finishing.")
    if n_random_pred.value:
        log.warning(("{} queries were classified randomly, because all "
                     "distances were non-finite numbers or there were no other "
                     "objects in the same class.").format(n_random_pred.value))
    return_dict = {'macro' : r_prec.mean(),
                   'weighted' : np.average(r_prec, weights=relevant_items[y]),
                   'per_item' : r_prec,
                   'relevant_items' : relevant_items,
                   'y_true' : y,
                   'y_pred' : y_pred}
    return return_dict
Esempio n. 17
0
class ColumnEnsembleClassifier(BaseClassifier):
    """Applies estimators to columns of an array or pandas DataFrame.

        This estimator allows different columns or column subsets of the input
        to be transformed separately and the features generated by each transformer
        will be ensembled to form a single output.

        Parameters
        ----------
        estimators : list of tuples
            List of (name, transformer, column(s)) tuples specifying the
            transformer objects to be applied to subsets of the data.

            name : string
                Like in Pipeline and FeatureUnion, this allows the transformer and
                its parameters to be set using ``set_params`` and searched in grid
                search.
            Estimator : estimator or {'drop'}
                Estimator must support `fit` and `predict_proba`. Special-cased
                strings 'drop' and 'passthrough' are accepted as well, to
                indicate to drop the columns
            column(s) : string or int, array-like of string or int, slice, \
                boolean mask array or callable


        remainder : {'drop', 'passthrough'} or estimator, default 'drop'
            By default, only the specified columns in `transformers` are
            transformed and combined in the output, and the non-specified
            columns are dropped. (default of ``'drop'``).
            By specifying ``remainder='passthrough'``, all remaining columns that
            were not specified in `transformers` will be automatically passed
            through. This subset of columns is concatenated with the output of
            the transformers.
            By setting ``remainder`` to be an estimator, the remaining
            non-specified columns will use the ``remainder`` estimator. The
            estimator must support :term:`fit` and :term:`transform`.

    """
    def __init__(self, estimators, remainder='drop', verbose=False):
        self.estimators = estimators
        self.remainder = remainder
        self.verbose = verbose

    @property
    def _estimators(self):
        return [(name, estim) for name, estim, _ in self.estimators]

    @_estimators.setter
    def _estimators(self, value):
        self.estimators = [(name, estim, col)
                           for ((name, estim),
                                (_, _, col)) in zip(value, self.estimators)]

    # from metaestimators.py
    def _get_params(self, attr, deep=True):
        out = super().get_params(deep=deep)
        if not deep:
            return out
        estimators = getattr(self, attr)
        out.update(estimators)
        for name, estimator in estimators:
            if hasattr(estimator, 'get_params'):
                for key, value in estimator.get_params(deep=True).items():
                    out['%s__%s' % (name, key)] = value
        return out

    def get_params(self, deep=True):
        """Get parameters for this estimator.

        Parameters
        ----------
        deep : boolean, optional
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : mapping of string to any
            Parameter names mapped to their values.
        """
        return self._get_params('_estimators', deep=deep)

    # from metaestimators.py
    def _set_params(self, attr, **params):
        # Ensure strict ordering of parameter setting:
        # 1. All steps
        if attr in params:
            setattr(self, attr, params.pop(attr))
        # 2. Step replacement
        items = getattr(self, attr)
        names = []
        if items:
            names, _ = zip(*items)
        for name in list(params.keys()):
            if '__' not in name and name in names:
                self._replace_estimator(attr, name, params.pop(name))
        # 3. Step parameters and other initialisation arguments
        super().set_params(**params)
        return self

    # from metaestimators.py
    def _replace_estimator(self, attr, name, new_val):
        # assumes `name` is a valid estimator name
        new_estimators = list(getattr(self, attr))
        for i, (estimator_name, _) in enumerate(new_estimators):
            if estimator_name == name:
                new_estimators[i] = (name, new_val)
                break
        setattr(self, attr, new_estimators)

    def set_params(self, **kwargs):
        """Set the parameters of this estimator.

        Valid parameter keys can be listed with ``get_params()``.

        Returns
        -------
        self
        """
        self._set_params('_estimators', **kwargs)
        return self

    def _validate_estimators(self):
        if not self.estimators:
            return

        names, estimators, _ = zip(*self.estimators)

        self._validate_names(names)

        # validate estimators
        for t in estimators:
            if t == 'drop':
                continue
            if not (hasattr(t, "fit") or hasattr(t, "predict_proba")):
                raise TypeError(
                    "All estimators should implement fit and predict proba"
                    "or can be 'drop' "
                    "specifiers. '%s' (type %s) doesn't." % (t, type(t)))

    def _validate_names(self, names):
        if len(set(names)) != len(names):
            raise ValueError('Names provided are not unique: '
                             '{0!r}'.format(list(names)))
        invalid_names = set(names).intersection(self.get_params(deep=False))
        if invalid_names:
            raise ValueError('Estimator names conflict with constructor '
                             'arguments: {0!r}'.format(sorted(invalid_names)))
        invalid_names = [name for name in names if '__' in name]
        if invalid_names:
            raise ValueError('Estimator names must not contain __: got '
                             '{0!r}'.format(invalid_names))

    # this check whether the column input was a slice object or a tuple.
    def _validate_column_callables(self, X):
        """
        Converts callable column specifications.
        """
        columns = []
        for _, _, column in self.estimators:
            if callable(column):
                column = column(X)
            columns.append(column)
        self._columns = columns

    def _validate_remainder(self, X):
        """
        Validates ``remainder`` and defines ``_remainder`` targeting
        the remaining columns.
        """
        is_estimator = (hasattr(self.remainder, "fit")
                        or hasattr(self.remainder, "predict_proba"))
        if (self.remainder not in ('drop') and not is_estimator):
            raise ValueError(
                "The remainder keyword needs to be 'drop', '%s' was passed instead"
                % self.remainder)

        n_columns = X.shape[1]
        cols = []
        for columns in self._columns:
            cols.extend(_get_column_indices(X, columns))
        remaining_idx = sorted(list(set(range(n_columns)) - set(cols))) or None

        self._remainder = ('remainder', self.remainder, remaining_idx)

    def _iter(self, replace_strings=False):
        """
        Generate (name, trans, column, weight) tuples.

        If fitted=True, use the fitted transformers, else use the
        user specified transformers updated with converted column names
        and potentially appended with transformer for remainder.

        """

        # interleave the validated column specifiers
        estimators = [(name, estims, column)
                      for (name, estims,
                           _), column in zip(self.estimators, self._columns)]
        # add transformer tuple for remainder
        if self._remainder[2] is not None:
            estimators = chain(estimators, [self._remainder])

        for name, trans, column in estimators:
            if replace_strings:
                # skip in case of 'drop'
                if trans == 'drop':
                    continue
                elif _is_empty_column_selection(column):
                    continue

            yield (name, trans, column)

    def fit(self, X, y, input_checks=True):
        # the data passed in could be an array of dataframes?
        """Fit all estimators, fit the data

        Parameters
        ----------
        X : array-like or DataFrame of shape [n_samples, n_dimensions, n_length]
            Input data, of which specified subsets are used to fit the
            transformers.

        y : array-like, shape (n_samples, ...), optional
            Targets for supervised learning.

        """

        if self.estimators is None or len(self.estimators) == 0:
            raise AttributeError('Invalid `estimators` attribute, `estimators`'
                                 ' should be a list of (string, estimator)'
                                 ' tuples')

        # X = _check_X(X)
        self._validate_estimators()
        self._validate_column_callables(X)
        self._validate_remainder(X)

        self.le_ = LabelEncoder().fit(y)
        self.classes_ = self.le_.classes_
        transformed_y = self.le_.transform(y)

        for name, estim, column in self._iter(replace_strings=True):
            estim.fit(_get_column(X, column), transformed_y)

        return self

    def _collect_probas(self, X):
        return np.asarray([
            estim.predict_proba(_get_column(X, column))
            for (name, estim, column) in self._iter(replace_strings=True)
        ])

    # TODO: check if it is fitted
    def predict_proba(self, X, input_checks=True):
        """Predict class probabilities for X in 'soft' voting """
        avg = np.average(self._collect_probas(X), axis=0)
        return avg

    def _predict(self, X):
        """Collect results from clf.predict calls. """
        return np.asarray([
            estim.predict_proba(_get_column(X, column))
            for (name, estim, column) in self._iter(replace_strings=True)
        ])

    def predict(self, X, input_checks=True):
        maj = np.argmax(self.predict_proba(X), axis=1)
        return self.le_.inverse_transform(maj)
Esempio n. 18
0
    def predict(self):

        try:
            #filename如果未定义则会抛出异常
            path = self.path.get()
            mylist = os.listdir(path)

            feeling_list = []
            for item in mylist:
                if item[6:-16] == '02' and int(item[18:-4]) % 2 == 0:
                    feeling_list.append('female_calm')

                elif item[6:-16] == '02' and int(item[18:-4]) % 2 == 1:
                    feeling_list.append('male_calm')

                elif item[6:-16] == '03' and int(item[18:-4]) % 2 == 0:
                    feeling_list.append('female_happy')

                elif item[6:-16] == '03' and int(item[18:-4]) % 2 == 1:
                    feeling_list.append('male_happy')

                elif item[6:-16] == '04' and int(item[18:-4]) % 2 == 0:
                    feeling_list.append('female_sad')

                elif item[6:-16] == '04' and int(item[18:-4]) % 2 == 1:
                    feeling_list.append('male_sad')

                elif item[6:-16] == '05' and int(item[18:-4]) % 2 == 0:
                    feeling_list.append('female_angry')

                elif item[6:-16] == '05' and int(item[18:-4]) % 2 == 1:
                    feeling_list.append('male_angry')

                elif item[6:-16] == '06' and int(item[18:-4]) % 2 == 0:
                    feeling_list.append('female_fearful')

                elif item[6:-16] == '06' and int(item[18:-4]) % 2 == 1:
                    feeling_list.append('male_fearful')

            labels = pd.DataFrame(feeling_list)
            #showinfo('提示', '提取测试集')
            df = pd.DataFrame(columns=['feature'])
            bookmark = 0
            for index, y in enumerate(mylist):
                if mylist[index][6:-16] != '01' and mylist[index][
                        6:-16] != '07' and mylist[index][
                            6:-16] != '08' and mylist[
                                index][:2] != 'su' and mylist[
                                    index][:1] != 'n' and mylist[
                                        index][:1] != 'd' and mylist[
                                            index][:1] != 'A':
                    X, sample_rate = librosa.load(path + '\\' + y,
                                                  res_type='kaiser_fast',
                                                  duration=2.5,
                                                  sr=22050 * 2,
                                                  offset=0.5)
                    sample_rate = np.array(sample_rate)
                    mfccs = np.mean(librosa.feature.mfcc(y=X,
                                                         sr=sample_rate,
                                                         n_mfcc=13),
                                    axis=0)
                    feature = mfccs
                    #[float(i) for i in feature]
                    #feature1=feature[:135]
                    df.loc[bookmark] = [feature]
                    bookmark = bookmark + 1

            df3 = pd.DataFrame(df['feature'].values.tolist())

            #将特征和对应的情感存到同一张表中,情感所在列的列名为‘0’

            newdf = pd.concat([df3, labels], axis=1)
            rnewdf = newdf.rename(index=str, columns={"0": "label"})

            rnewdf = shuffle(newdf)
            rnewdf = rnewdf.fillna(0)

            #print(rnewdf)

            #将表格分为训练集和测试集

            newdf1 = np.random.rand(len(rnewdf)) < 0.2
            train = rnewdf[newdf1]
            test = rnewdf[~newdf1]

            #特征值为0到倒数第一列,标签值为最后一列
            testfeatures = test.iloc[:, :-1]

            testlabel = test.iloc[:, -1:]

            X_test = np.array(testfeatures)
            y_test = np.array(testlabel)

            lb = LabelEncoder()

            y_test = np_utils.to_categorical(lb.fit_transform(y_test))

            #print(y_train)

            #创建CNN模型

            print('提取测试集...')
            x_testcnn = np.expand_dims(X_test, axis=2)

            print(x_testcnn)

            print('测试...')
            #showinfo('提示', '正在测试...')
            preds = self.model.predict(x_testcnn, batch_size=32, verbose=1)

            preds1 = preds.argmax(axis=1)

            abc = preds1.astype(int).flatten()

            predictions = (lb.inverse_transform((abc)))

            preddf = pd.DataFrame({'predicted_values': predictions})
            actual = y_test.argmax(axis=1)
            abc123 = actual.astype(int).flatten()

            #print(abc)

            actualvalues = (lb.inverse_transform((abc123)))

            actualdf = pd.DataFrame({'actual_values': actualvalues})

            finaldf = actualdf.join(preddf)

            finaldf.to_csv('H:\\预测实际对照表.csv', index=False)
            showinfo("提示", "表格打印完成,已保存到H盘目录下")
            print('\n\n输出预测值与实际值的对比表格:\n\n')

            print(
                finaldf.groupby('actual_values').count().join(
                    finaldf.groupby('predicted_values').count()))

            #showinfo("预测值与实际值的对比", finaldf.groupby('actual_values').count())
        except FileNotFoundError:

            showwarning('warning', '该路径不存在,请重新输入')