def return_prediction(data, clf_filename='clf_rf'): """ Returns the predicted category of an object by using a trained network. Parameters: data (list): List of the 4 parameters (float or integer) of the object. These parameters are the height, the width, the depth and the weight of the concerned object. Raise a Value_Error exception if the 4 parameters describe abnormal object dimensions. clf_filename (str): The file name of a trained classifier without its extension. Default extension is a joblib file. (Default : 'clf_rf') Returns: flatten_list (str): Predicted category of the object. Can be either 'mlp', 'deco' or 'meuble'. """ # We put the data into a nested list inputs = [data] # We scale the data by retrieving the scaler computed on the training dataset print("directory", os.getcwd()) try: test_scaled = scale_test(inputs, filename='src/scaler_mdm.joblib') except FileNotFoundError: test_scaled = scale_test(inputs, filename='../src/scaler_mdm.joblib') # We load the trained classifier and make the prediction on the data try: clf = load('src/' + clf_filename + '.joblib') except FileNotFoundError: clf = load('../src/' + clf_filename + '.joblib') print(test_scaled) prediction = clf.predict(test_scaled) # We load the encoder to decode the prediction into a string load_onehot = OneHotEncoder() load_onehot.drop_idx_ = None try: load_onehot.categories_ = np.load('src/classes_onehot.npy', allow_pickle=True) except FileNotFoundError: load_onehot.categories_ = np.load('../src/classes_onehot.npy', allow_pickle=True) # Raises ValueError Exception if the values are outliers try: str_pred = load_onehot.inverse_transform(prediction) except ValueError: return "Values are too different from training dataset" # Return the category predicted flatten_list = list(chain.from_iterable(str_pred)) print("Catégorie prédite : ", flatten_list[0]) return flatten_list[0]
def encoder_for(field): if field.get('optype') != 'categorical': return 'passthrough' encoder = OneHotEncoder() encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories]) encoder._legacy_mode = False return encoder
def complete_y_tranformation(self, Y): # Y to matrix y_encoder = None Y = Y.astype(np.float32) if len(Y.shape) == 1: Y = Y.reshape(-1, 1) # encode Y if self.encode_Y: y_encoder = OneHotEncoder(sparse=False, categories="auto", handle_unknown='ignore') y_encoder.categories_ = np.array([]) Y = y_encoder.fit_transform(Y) return Y, y_encoder
def fit(self, pipeline_config, X_train, X_valid, Y_train, Y_valid, categorical_features): ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore") encoder = ColumnTransformer(transformers=[ ("ohe", ohe, [i for i, f in enumerate(categorical_features) if f]) ], remainder="passthrough") encoder.categories_ = np.array([]) encoder.categorical_features = categorical_features if any(categorical_features) and not scipy.sparse.issparse(X_train): # encode X X_train = encoder.fit_transform(X_train) if (X_valid is not None): X_valid = encoder.transform(X_valid) encoder.categories_ = encoder.transformers_[0][1].categories_ # Y to matrix y_encoder = None Y_train = Y_train.astype(np.float32) if len(Y_train.shape) == 1: Y_train = Y_train.reshape(-1, 1) if Y_valid is not None and len(Y_valid.shape) == 1: Y_valid = Y_valid.reshape(-1, 1) # encode Y if self.encode_Y and not scipy.sparse.issparse(Y_train): y_encoder = OneHotEncoder(sparse=False, categories="auto", handle_unknown='ignore') y_encoder.categories_ = np.array([]) Y_train = y_encoder.fit_transform(Y_train) if Y_valid is not None: Y_valid = y_encoder.transform(Y_valid) return { 'X_train': X_train, 'X_valid': X_valid, 'one_hot_encoder': encoder, 'Y_train': Y_train, 'Y_valid': Y_valid, 'y_one_hot_encoder': y_encoder, 'categorical_features': None }