def radiusNeighborClassifier(): maximumValue = 0 returnParameters = ['0', '0'] for neighbor in xrange(100, 1001, 100): neighAutoRadius = RadiusNeighborsClassifier(radius=neighbor, weights='uniform', algorithm='auto', p=2, metric='minkowski') neighAutoRadius.fit(trainData, trainLabel) neighDistanceRadius = RadiusNeighborsClassifier(radius=neighbor, weights='distance', algorithm='auto', p=2, metric='minkowski') neighDistanceRadius.fit(trainData, trainLabel) scoreAuto = neighAutoRadius.score(validationData, validationLabel) scoreDistance = neighDistanceRadius.score(validationData, validationLabel) if max(scoreAuto, scoreDistance) > maximumValue: maximumValue = max(scoreAuto, scoreDistance) returnParameters[0] = str(neighbor) returnParameters[ 1] = 'distance' if scoreDistance > scoreAuto else 'uniform' neighTest = RadiusNeighborsClassifier(radius=int(returnParameters[0]), weights=returnParameters[1], algorithm='auto', p=2, metric='minkowski') neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) guideToGraph['Radius Neighbor'] = scoreTest
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters, evaluation_metrics): # modify parameters to call the clustering algorithm with modified ones, this mainly purposes the distance parameter modified_parameters = prepare_parameters(parameters) if modified_parameters["distance"] != "mahalanobis": initial_classifier = RadiusNeighborsClassifier( n_jobs=-1, radius=modified_parameters["radius"], metric=modified_parameters["distance"], p=modified_parameters["minkowski_p"]) else: initial_classifier = RadiusNeighborsClassifier( n_jobs=-1, radius=modified_parameters["radius"], metric=modified_parameters["distance"], p=modified_parameters["minkowski_p"], algorithm="brute", metric_params={"VI": np.linalg.inv(np.cov(X_train))}) classifier = initial_classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) evaluation_metrics["accuracy"] = classifier.score(X_test, y_test) return evaluation_metrics
def createRadiusNeighborsClassifier(params=None): info("Creating Radius Neighbors Classifier", ind=4) error("This doesn't work") return {"estimator": None, "params": None} ## Params params = mergeParams(RadiusNeighborsClassifier(), params) tuneParams = getRadiusNeighborsClassifierParams() grid = tuneParams['grid'] info("With Parameters", ind=4) algorithm = setParam('algorithm', params, grid, force=False) info("Param: algorithm = {0}".format(algorithm), ind=6) leaf_size = setParam('leaf_size', params, grid, force=False) info("Param: leaf_size = {0}".format(leaf_size), ind=6) metric = setParam('metric', params, grid, force=False) info("Param: metric = {0}".format(metric), ind=6) radius = setParam('radius', params, grid, force=False) info("Param: radius = {0}".format(radius), ind=6) weights = setParam('weights', params, grid, force=False) info("Param: weights = {0}".format(weights), ind=6) ## Estimator reg = RadiusNeighborsClassifier(algorithm=algorithm, leaf_size=leaf_size, metric=metric, radius=radius, weights=weights) return {"estimator": reg, "params": tuneParams}
def palabra(directorio): global X, Y words = '' neigh = RadiusNeighborsClassifier(radius=0.12) neigh.fit(X, Y) for filename in os.listdir(directorio): word = ' ' fs, x = wv.read(directorio + '/' + filename) n = len(x) if ((n / fs) > 0.5): spectrum, freqs, t, im = plt.specgram(x[:, 1], NFFT=1024, Fs=fs, sides='onesided') for i in range(len(spectrum)): point = findForm( np.linspace(0, freqs[len(spectrum[i])] / 1000, len(spectrum[i])), spectrum[i]) if point[0] != 0 and point[1] != 0: try: print(point) val = neigh.predict([point]) char = valores(val[0]) print(char) if word[-1] != char: word = word + char except: print('No neighbors found for the given radius') words = words + ' ' + word return words
def train_models(dict_of_dicts): for midi_key, midi_dict in dict_of_dicts.items(): X = [] y = [] for x in midi_dict: if midi_dict[x] == []: pass else: for t in midi_dict[x]: X.append(t) y.append(x) # n times if X != []: rad = get_radius(midi_key) neigh = RadiusNeighborsClassifier(radius=rad, weights='distance', outlier_label=[7]) X = np.array(X) y = np.array(y) neigh.fit(X.reshape(-1, 1), y) filename = workspace.model_folder + '/' + str( midi_key) + '_rrn_model.sav' pickle.dump(neigh, open(filename, 'wb')) else: pass
def train_model_for_prediction(path_to_csv, path_to_json_dir, company, department, classifier_id='random_forest', needs_type='manual', ratio_cleaner_val=None, random_state=None, remove_ratios=False, remove_needs=False): """ Trains the specified model to make predictions to be used in production. :param path_to_csv: String. Path to csv file of all the shifts. :param path_to_json_dir: String. Path to a directory containing all json schedules. :param company: String. Name of the company. :param department: String. Name of the department in the company. :param classifier_id: String. The classifier to use for training and prediction. 'random_forest', 'k_neighbors', or 'radius_neighbors'. :param needs_type: String. Specifies type of needs to use. 'manual', 'avg', or 'median'. :param ratio_cleaner_val: The cutoff point for ratios to be removed from the data. The max, 7.0, removes all ratios. The min, 0.0 removes none. Recommended values to experiment with are 2.0, 1.5, and 1.0. Use None to ignore this. :param random_state: Int. Seed to remember the split of the data in StratifiedShuffleSplit. :param remove_ratios: Bool. Whether or not the remove the ratios from the features. :param remove_needs: Bool. Whether or not the remove the needs from the features. :return: Tuple of an instance of a trained RandomForestClassifier, KNeighborsClassifier, or RadiusNeighborsClassifier, AND the interpretation_keys. """ (prepared_train_features, prepared_train_targets, prepared_test_features, prepared_test_targets, interpretation_keys) = prepare_features_targets( path_to_csv, path_to_json_dir, company, department, needs_type=needs_type, ratio_cleaner_val=ratio_cleaner_val, random_state=random_state, remove_ratios=remove_ratios, remove_needs=remove_needs) if classifier_id == 'random_forest': classifier = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=1) elif classifier_id == 'k_neighbors': classifier = KNeighborsClassifier() elif classifier_id == 'radius_neighbors': classifier = RadiusNeighborsClassifier() else: raise ValueError( 'Invalid classifier_id specified:', classifier_id + '.', 'Must be of type \'random_forest\', \'k_neighbors\', or \'radius_neighbors\'.' ) classifier.fit(prepared_train_features, prepared_train_targets) # run the test data through it to gauge effectiveness. test_predictions = classifier.predict(prepared_test_features) print_model_analysis(test_predictions, prepared_test_targets) return classifier, interpretation_keys
def Radius_Neighbors(input_file,Output): lvltrace.lvltrace("LVLEntree dans Radius_Neighbors") ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] y = data[:,0] n_samples, n_features = X.shape clf = RadiusNeighborsClassifier(n_neighbors=1) clf.fit(X, y) y_pred = clf.predict(X) print "#########################################################################################################\n" print "Radius Neighbors Accuracy " print "classification accuracy:", metrics.accuracy_score(y, y_pred) print "precision:", metrics.precision_score(y, y_pred) print "recall:", metrics.recall_score(y, y_pred) print "f1 score:", metrics.f1_score(y, y_pred) print "\n" print "#########################################################################################################\n" results = Output+"Raidus_Neighbors_metrics.txt" file = open(results, "w") file.write("Radius Neighbors estimator accuracy\n") file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred)) file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred)) file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred)) file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred)) file.write("\n") file.write("True Value, Predicted Value, Iteration\n") for n in xrange(len(y)): file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1))) file.close() title = "Radius Neighbors" save = Output + "Radius_Neighbors_confusion_matrix.png" plot_confusion_matrix(y, y_pred,title,save) lvltrace.lvltrace("LVLSortie dans Radius_Neighbors")
def SequentialRadiusNeighborsClassifier(epsilon, X_train, X_test, Y_train): X_train_temp = np.copy(X_train) Y_train_temp = np.copy(Y_train) Reps = RadiusNeighborsClassifier(radius=epsilon) test_size = len(X_test) Y_predict = [-1 for x in range(test_size)] Y_current = list(set(Y_train)) test_index = [x for x in range(test_size)] for test_time in range(test_size): Knn_temp = NearestNeighbors(n_neighbors=1) Knn_temp.fit(X_train_temp) min_distances = Knn_temp.kneighbors(X_test[test_index])[0] min_distances = [np.mean(x) for x in min_distances] optimal_indice = min_distances.index(min(min_distances)) optimal_test = test_index[optimal_indice] test_index.remove(optimal_test) Reps.fit(X_train_temp, Y_train_temp) predict_set = Reps.radius_neighbors(X_test[optimal_test].reshape( 1, -1))[1] predict_set = predict_set[0] if predict_set.size > 0: y_predict = Reps.predict(X_test[optimal_test].reshape(1, -1)) y_predict = y_predict[0] else: y_predict = max(Y_current) + 1 Y_current.append(y_predict) Y_predict[optimal_test] = y_predict X_train_temp = np.append(X_train_temp, [X_test[optimal_test]], axis=0) Y_train_temp = np.append(Y_train_temp, [y_predict], axis=0) return Y_predict
def start(self): """ 01. Initialise the data paths and transformation functions. """ self.data_dir = '../data/raw_data' self.trans_primitives = ['weekday', 'hour', 'time_since_previous'] self.agg_primitives = [ 'mean', 'max', 'min', 'std', 'count', 'percent_true', 'last', 'time_since_last', 'mode' ] self.ignore_cols = [ 'num_contacts', 'num_referrals', 'num_successful_referrals' ] self.feature_windows = [10, 30, 60, 90] #[10,20,30] self.max_feature_depth = 2 # list of estimators to use self.estimators = [ ('cbc', CatBoostClassifier()), ('lgbmc', LGBMClassifier()), ('gbc', GradientBoostingClassifier(validation_fraction=0.15, n_iter_no_change=50)), ('et', ExtraTreeClassifier()), ('abc', AdaBoostClassifier()), ('rfc', RandomForestClassifier()), ('bc', BaggingClassifier()), ('etc', ExtraTreesClassifier()), ('gnb', GaussianNB()), ('mlpc', MLPClassifier()), ('gpc', GaussianProcessClassifier()), ('dtc', DecisionTreeClassifier()), ('qda', QuadraticDiscriminantAnalysis()), ('lr', LogisticRegression()), ('knn3', KNeighborsClassifier(3)), ('knn6', KNeighborsClassifier(6)), ('knn12', KNeighborsClassifier(12)), ('nc', NearestCentroid()), ('rnc', RadiusNeighborsClassifier()), ('lp', LabelPropagation()), ('pac', PassiveAggressiveClassifier()), ('rc', RidgeClassifier()), ('sgdc', SGDClassifier()), ('svg', SVC()), ('ngbc', NGBClassifier(Dist=Bernoulli)) ] self.next(self.load_raw_data)
def r_neighbors_classifier(self, n_neighbours=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski'): """ Classifier implementing the k-nearest neighbors radius vote. :param n_neighbours: Number of neighbours to use :param weights: Weight function used in prediction, inputs: uniform: All points in each neighborhood are weighted equally. distance: Weight points by the inverse of their distance, closer points will have a greater influence. :param algorithm: Algorithm used to compute the nearest neighbors, inputs: ball_tree: Fast generalized N-point problems. KDTree: Euclidean tree of n-dimensions. brute: Brute-force search. auto: Will try to decide the most appropriate algorithm given the fit function. :param leaf_size: Leaf size passed to the three. This can affect the computation speed/time. :param p: Parameter for the Minkwoski metric. :param metric: Distance metric to use for the tree, inputs: euclidean, manhattan, chebyshev, minkwoski, seuclidean, mahalanobis :return:probability, conf_matrix """ model = RadiusNeighborsClassifier(n_neighbors=n_neighbours, weights=weights, algorithm=algorithm, leaf_size=leaf_size, p=p, metric=metric) model.fit(self.__x_train, self.__y_train) self.__model = model
def runRNC(X_train, y_train, X_test, R=1.0, weights='uniform', outlier=None): # initialize the classifier model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier) rnc = model.fit(X_train, y_train) predictions = rnc.predict(X_test) return predictions
def __init__(self, feature_length, num_classes): super().__init__(feature_length, num_classes) self.num_classes = num_classes ### # BUILD YOUR MODEL self.model = RadiusNeighborsClassifier(weights='distance', metric='')
def rnn_model(train_input, train_target, test_input, test_target): r_neigh = RadiusNeighborsClassifier(radius=3.0) r_neigh.fit(train_input, train_target) print("R-NN (r=1) accuracy for training set: %s" % (r_neigh.score(train_input, train_target))) print("R-NN (r=1) accuracy for testing set: %s" % (r_neigh.score(test_input, test_target)))
def run_main(): data = load_files(PARAM.data_path, encoding="utf-8") X, y, y_names = clean_docs(data.data, True), data.target, data.target_names vectorizer = tfidf_vectorize(X, max_features = PARAM.max_features, min_df = PARAM.min_df, max_df = PARAM.max_df, analyzer = PARAM.analyzer, ngram_range = (1, 2)) X_train, X_test, y_train, y_test = train_test_split(vectorizer.transform(X).toarray(), y, test_size = 0.2, random_state = 0) if PARAM.classifier == 'knn': classifier = KNeighborsClassifier(n_neighbors = PARAM.n_neighbors, weights = PARAM.weight_func) elif PARAM.classifier == 'rnn': classifier = RadiusNeighborsClassifier(radius = PARAM.radius) elif PARAM.classifier == 'nc': classifier = NearestCentroid() else: raise ValueError("[%s] not supported" % (PARAM.classifier)) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) print_eval(y_test, y_pred) if PARAM.model_path: model_map = {} model_map['clf_name'] = PARAM.classifier model_map['accuracy'] = get_accuracy(y_test, y_pred) model_map['vectorizer'] = vectorizer model_map['classifier'] = classifier save_model_map(model_map, PARAM.model_path)
def __init__(self, regression=True, radius=1.0, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', outlier_label=None, metric_params=None): self._regression = regression self._radius = radius self._weights = weights self._algorithm = algorithm self._leaf_size = leaf_size self._p = p self._metric = metric self._metric_params = metric_params self._outlier_label = outlier_label if regression: self._model = RadiusNeighborsRegressor(radius, weights, algorithm, leaf_size, p, metric, metric_params) else: self._model = RadiusNeighborsClassifier(radius, weights, algorithm, leaf_size, p, metric, metric_params) return super().__init__()
def append_without_dublicates(usual, y, knowledge): if len(usual['data']) == 0: usual['data'] += y return maxims, averages = get_maxims_and_averages(knowledge) usual_to_fit = normalize_fit_input(usual['data'], usual['events'], usual['fields'], averages, maxims) new_data_to_fit = normalize_fit_input(y, usual['events'], usual['fields'], averages, maxims) classifier = RadiusNeighborsClassifier(radius=2, metric='euclidean', outlier_label=-1) classifier.fit(sparse.csr_matrix(usual_to_fit), [0] * len(usual_to_fit)) labels = classifier.predict(sparse.csr_matrix(new_data_to_fit)) for i in range(len(labels) - 1, -1, -1): if labels[i] != -1: y.pop(i) usual['data'] += y
def set_classifier(self, classifier_name): """ Setter for clf Building instances of classifier objects with corresponding name. Parameter --------- classifier_name : string Contains the corresponding name of the wanted classifier from sklearn. """ if classifier_name == "svm_linear": self.clf = svm.SVC(kernel="linear", class_weight="auto") elif classifier_name == "svm_poly": self.clf = svm.SVC(kernel="poly", class_weight="auto") elif classifier_name == "naive_bayes": self.clf = GaussianNB() elif classifier_name == "decision_tree": self.clf = tree.DecisionTreeClassifier() elif classifier_name == "nearest_centroid": self.clf = NearestCentroid() elif classifier_name == "k_neighbors": self.clf = KNeighborsClassifier(n_neighbors=100) elif classifier_name == "radius_neighbors": self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1) else: raise ClassifierNotExistException(classifier_name)
def set_up_radius(self): self.classifier = RadiusNeighborsClassifier(radius=self.radius) self.classifier.fit(self.X_train, self.y_train) y_pred = self.classifier.predict(self.X_test) print("confusion_matrix: \n") print(confusion_matrix(self.y_test, y_pred)) print("classification_report\n") return classification_report(self.y_test, y_pred)
def trainAlgo(self): self.model = RadiusNeighborsClassifier( radius=self.param['radius'], weights=self.param['weights'], algorithm=self.param['algorithm'], p=self.param['p']) y = np.argmax(self.outputData['Y'], axis=1) self.model.fit(self.inputData['X'], y)
def __init__(self, method, n_neighbors, weights, radius): if method == 'knn_class': self.clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights) elif method == 'knn_rad': self.clf = RadiusNeighborsClassifier(radius=radius) elif method == 'knn_cent': self.clf = NearestCentroid()
def __init__(self, name, **kwargs): from sklearn.neighbors import RadiusNeighborsClassifier def signal_proba(model, X): return RadiusNeighborsClassifier.predict_proba(model, X)[:, 1] self._prob_func = signal_proba self._range = [0, 1] super().__init__(name, RadiusNeighborsClassifier(**kwargs))
def train_with_model(fea_c1, fea_c2, model): train_filename = "data/BEST&MOST{}/train-{}{}.arff".format( fea_c2, fea_c1, fea_c2) if not os.path.exists(train_filename): print("No such file: {}".format(train_filename)) return if os.path.exists("model/{}{}-{}.pkl".format(fea_c1, fea_c2, model)): print("This model had been trained: " + "model/{}{}-{}.pkl".format(fea_c1, fea_c2, model)) else: x_train, y_train = load_data(train_filename) ''' PCA operation print(x_train.shape) x_train = pca(x_train) print(x_train.shape) ''' if model == 'knn': mol = KNeighborsClassifier() mol.fit(x_train, y_train) elif model == 'rn': mol = RadiusNeighborsClassifier(radius=5.0) mol.fit(x_train, y_train) elif model == 'bnb': mol = BernoulliNB() mol.fit(x_train, y_train) elif model == 'gnb': mol = GaussianNB() mol.fit(x_train, y_train) elif model == 'dtree': mol = tree.DecisionTreeClassifier() mol.fit(x_train, y_train) elif model == 'rdforest': mol = RandomForestClassifier(n_estimators=10) mol.fit(x_train, y_train) elif model == 'lsvc': mol = LinearSVC(random_state=0, tol=1e-5) mol.fit(x_train, y_train) elif model == 'qda': mol = QuadraticDiscriminantAnalysis() mol.fit(x_train, y_train) with open("model/{}{}-{}.pkl".format(fea_c1, fea_c2, model), 'wb') as f: pickle.dump(mol, f)
def get_classifiers(): # basic classifires dc = DummyClassifier(random_state=0) lr = LogisticRegression() gnb = GaussianNB() svc = LinearSVC(C=1) C0 = { "name": "Basic", "methods": [(dc, "Dummy"), (lr, "Logit"), (gnb, "Naive Bayes"), (svc, "SVC")] } # decission trees dec_tree = DecisionTreeClassifier(random_state=0) etc_tree = ExtraTreeClassifier(random_state=0) C1 = { "name": "Decision Tree", "methods": [(dec_tree, "Decision Tree"), (etc_tree, "Extra Tree")] } # NN classifirer knn = KNeighborsClassifier(n_neighbors=25, weights="distance") rnn = RadiusNeighborsClassifier(radius=20.0, outlier_label=1) nc = NearestCentroid() C2 = { "name": "Nearest Neighbors", "methods": [(knn, "KNN"), (rnn, "Radius NN"), (nc, "Nearest Centroid")] } # ensamble models ada = AdaBoostClassifier() bg = BaggingClassifier(n_estimators=50, max_features=3) etsc = ExtraTreesClassifier(n_estimators=50, criterion="entropy") gb = GradientBoostingClassifier(max_depth=5, random_state=0) rfc = RandomForestClassifier(n_estimators=100) C3 = { "name": "Ensemble", "methods": [(ada, "Ada Boost"), (bg, "Bagging"), (etsc, "Extra Trees"), (gb, "Gradient Boosting"), (rfc, "Random Forest")] } # discriminant analysis & GPC lda = LinearDiscriminantAnalysis() qda = QuadraticDiscriminantAnalysis() C4 = { "name": "Discriminant Analysis", "methods": [(lda, "LDA"), (qda, "QDA")] } # neural net nn = MLPClassifier(alpha=0.1, tol=1e-8) C5 = {"name": "Complex Architecture", "methods": [(nn, "Neural Network")]} CLF = [C0, C1, C2, C3, C4, C5] return CLF
def test_requires_probabilistic_classifier(self): """ Assert requires probabilistic classifier """ message = "requires a probabilistic binary classifier" assert is_classifier(RadiusNeighborsClassifier) assert not is_probabilistic(RadiusNeighborsClassifier) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(RadiusNeighborsClassifier())
def train_model(self, X_train, y_train, modelpath): model = RadiusNeighborsClassifier(radius=self.radius, weights=self.weights, algorithm=self.algorithm, p=self.power_param, outlier_label=self.outlier_label) model.fit(X_train, y_train) self.save_model(model, modelpath) return model
def clusterFacetSamplesRNN(self, reduceRadius=3): """ cluster the samples of each facet using radius nearest neighbours the cluster center and their correspondent normals will be saved in self.objsamplepnts_refcls and self.objsamplenrmals_refcls :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed :return: None author: weiwei date: 20161130, osaka """ self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0], ), dtype=np.object) self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0], ), dtype=np.object) for i, facet in enumerate(self.facets): # print "cluster" # print i,len(self.facets) self.objsamplepnts_refcls[i] = [] self.objsamplenrmls_refcls[i] = [] X = self.objsamplepnts_ref[i] nX = X.shape[0] if nX > 0: neigh = RadiusNeighborsClassifier(radius=1.0) neigh.fit(X, range(nX)) neigharrays = neigh.radius_neighbors(X, radius=reduceRadius, return_distance=False) delset = set([]) for j in range(nX): if j not in delset: self.objsamplepnts_refcls[i].append(np.array(X[j])) self.objsamplenrmls_refcls[i].append( np.array(self.objsamplenrmls_ref[i][j])) # if self.objsamplepnts_refcls[i].size: # self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) # else: # self.objsamplepnts_refcls[i] = np.array([]) # self.objsamplenrmls_refcls[i] = np.array([]) # self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) delset.update(neigharrays[j].tolist()) if self.objsamplepnts_refcls[i]: self.objsamplepnts_refcls[i] = np.vstack( self.objsamplepnts_refcls[i]) self.objsamplenrmls_refcls[i] = np.vstack( self.objsamplenrmls_refcls[i]) else: self.objsamplepnts_refcls[i] = np.empty(shape=(0, 0)) self.objsamplenrmls_refcls[i] = np.empty(shape=(0, 0))
def get_hyperparameters_model(): param_dist = {} clf = RadiusNeighborsClassifier() model = { 'radius_neighbors_classifier': { 'model': clf, 'param_distributions': param_dist } } return model
def get_classifiers(): enabled_classifier_names = [ "RandomForest", "GaussianNB", "MLP", "SGD", "LinearSVC", "LINSVC", "RBFSVC" ] # disabled: # GradientBoosting - takes too long / not training # DecisionTree - long, not great results # RadiusNeighbors - didn't work params = { "GradientBoosting": { "verbose": 1 }, "RandomForest": { "n_estimators": 50 }, "GaussianNB": {}, "RadiusNeighbors": {}, "MLP": {}, "SGD": {}, "LinearSVC": {}, "LINSVC": { "kernel": "linear" }, "RBFSVC": { "kernel": "rbf" }, "DecisionTree": {} } classifiers = { "GradientBoosting": GradientBoostingClassifier(**params["GradientBoosting"]), "RandomForest": RandomForestClassifier(**params["RandomForest"]), "GaussianNB": GaussianNB(**params["GaussianNB"]), "RadiusNeighbors": RadiusNeighborsClassifier(**params["RadiusNeighbors"]), "MLP": MLPClassifier(**params["MLP"]), "SGD": SGDClassifier(**params["SGD"]), "LinearSVC": LinearSVC(**params["LinearSVC"]), "RBFSVC": SVC(**params["RBFSVC"]), "DecisionTree": DecisionTreeClassifier(**params["DecisionTree"]) } enabled_classifiers = [(name, classifier) for name, classifier in classifiers.items() if name in enabled_classifier_names] return enabled_classifiers
def test_model_knn_classifier_binary_class_radius(self): model, X = self._fit_model_binary_classification( RadiusNeighborsClassifier()) model_onnx = convert_sklearn( model, "KNN classifier binary", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), model, model_onnx, basename="SklearnRadiusNeighborsClassifierBinary")
def radius(X_train, X_test, y_train, y_test, string, valor): if (string == "prob"): clf = RadiusNeighborsClassifier(radius=valor, weights='distance', n_jobs=-1) clf.fit(X_train, y_train.values.ravel()) return clf.predict_proba(X_test) clf = RadiusNeighborsClassifier(radius=valor, weights='distance', n_jobs=-1) clf.fit(X_train, y_train.values.ravel()) #pickles.criarModelo(clf,"Rocchio "+string) y_predito = clf.predict(X_test) micro = f1_score(y_test, y_predito, average='micro') macro = f1_score(y_test, y_predito, average='macro') #f1_individual = f1_score(y_test,y_predito,average=None) #salvar_dados.salvar(y_test,y_predito,micro, macro, f1_individual," Knn "+string) print("O f1Score micro do RadiusKnn ", string, " com ", valor, " de raio é: ", micro) print("O f1Score macro do RadiusKnn ", string, " com ", valor, " de raio é: ", macro)