def append_without_dublicates(usual, y, knowledge): if len(usual['data']) == 0: usual['data'] += y return maxims, averages = get_maxims_and_averages(knowledge) usual_to_fit = normalize_fit_input(usual['data'], usual['events'], usual['fields'], averages, maxims) new_data_to_fit = normalize_fit_input(y, usual['events'], usual['fields'], averages, maxims) classifier = RadiusNeighborsClassifier(radius=2, metric='euclidean', outlier_label=-1) classifier.fit(sparse.csr_matrix(usual_to_fit), [0] * len(usual_to_fit)) labels = classifier.predict(sparse.csr_matrix(new_data_to_fit)) for i in range(len(labels) - 1, -1, -1): if labels[i] != -1: y.pop(i) usual['data'] += y
def train_models(dict_of_dicts): for midi_key, midi_dict in dict_of_dicts.items(): X = [] y = [] for x in midi_dict: if midi_dict[x] == []: pass else: for t in midi_dict[x]: X.append(t) y.append(x) # n times if X != []: rad = get_radius(midi_key) neigh = RadiusNeighborsClassifier(radius=rad, weights='distance', outlier_label=[7]) X = np.array(X) y = np.array(y) neigh.fit(X.reshape(-1, 1), y) filename = workspace.model_folder + '/' + str( midi_key) + '_rrn_model.sav' pickle.dump(neigh, open(filename, 'wb')) else: pass
def SequentialRadiusNeighborsClassifier(epsilon, X_train, X_test, Y_train): X_train_temp = np.copy(X_train) Y_train_temp = np.copy(Y_train) Reps = RadiusNeighborsClassifier(radius=epsilon) test_size = len(X_test) Y_predict = [-1 for x in range(test_size)] Y_current = list(set(Y_train)) test_index = [x for x in range(test_size)] for test_time in range(test_size): Knn_temp = NearestNeighbors(n_neighbors=1) Knn_temp.fit(X_train_temp) min_distances = Knn_temp.kneighbors(X_test[test_index])[0] min_distances = [np.mean(x) for x in min_distances] optimal_indice = min_distances.index(min(min_distances)) optimal_test = test_index[optimal_indice] test_index.remove(optimal_test) Reps.fit(X_train_temp, Y_train_temp) predict_set = Reps.radius_neighbors(X_test[optimal_test].reshape( 1, -1))[1] predict_set = predict_set[0] if predict_set.size > 0: y_predict = Reps.predict(X_test[optimal_test].reshape(1, -1)) y_predict = y_predict[0] else: y_predict = max(Y_current) + 1 Y_current.append(y_predict) Y_predict[optimal_test] = y_predict X_train_temp = np.append(X_train_temp, [X_test[optimal_test]], axis=0) Y_train_temp = np.append(Y_train_temp, [y_predict], axis=0) return Y_predict
def Radius_Neighbors(input_file,Output): lvltrace.lvltrace("LVLEntree dans Radius_Neighbors") ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] y = data[:,0] n_samples, n_features = X.shape clf = RadiusNeighborsClassifier(n_neighbors=1) clf.fit(X, y) y_pred = clf.predict(X) print "#########################################################################################################\n" print "Radius Neighbors Accuracy " print "classification accuracy:", metrics.accuracy_score(y, y_pred) print "precision:", metrics.precision_score(y, y_pred) print "recall:", metrics.recall_score(y, y_pred) print "f1 score:", metrics.f1_score(y, y_pred) print "\n" print "#########################################################################################################\n" results = Output+"Raidus_Neighbors_metrics.txt" file = open(results, "w") file.write("Radius Neighbors estimator accuracy\n") file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred)) file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred)) file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred)) file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred)) file.write("\n") file.write("True Value, Predicted Value, Iteration\n") for n in xrange(len(y)): file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1))) file.close() title = "Radius Neighbors" save = Output + "Radius_Neighbors_confusion_matrix.png" plot_confusion_matrix(y, y_pred,title,save) lvltrace.lvltrace("LVLSortie dans Radius_Neighbors")
def r_neighbors_classifier(self, n_neighbours=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski'): """ Classifier implementing the k-nearest neighbors radius vote. :param n_neighbours: Number of neighbours to use :param weights: Weight function used in prediction, inputs: uniform: All points in each neighborhood are weighted equally. distance: Weight points by the inverse of their distance, closer points will have a greater influence. :param algorithm: Algorithm used to compute the nearest neighbors, inputs: ball_tree: Fast generalized N-point problems. KDTree: Euclidean tree of n-dimensions. brute: Brute-force search. auto: Will try to decide the most appropriate algorithm given the fit function. :param leaf_size: Leaf size passed to the three. This can affect the computation speed/time. :param p: Parameter for the Minkwoski metric. :param metric: Distance metric to use for the tree, inputs: euclidean, manhattan, chebyshev, minkwoski, seuclidean, mahalanobis :return:probability, conf_matrix """ model = RadiusNeighborsClassifier(n_neighbors=n_neighbours, weights=weights, algorithm=algorithm, leaf_size=leaf_size, p=p, metric=metric) model.fit(self.__x_train, self.__y_train) self.__model = model
def rnn_model(train_input, train_target, test_input, test_target): r_neigh = RadiusNeighborsClassifier(radius=3.0) r_neigh.fit(train_input, train_target) print("R-NN (r=1) accuracy for training set: %s" % (r_neigh.score(train_input, train_target))) print("R-NN (r=1) accuracy for testing set: %s" % (r_neigh.score(test_input, test_target)))
def palabra(directorio): global X, Y words = '' neigh = RadiusNeighborsClassifier(radius=0.12) neigh.fit(X, Y) for filename in os.listdir(directorio): word = ' ' fs, x = wv.read(directorio + '/' + filename) n = len(x) if ((n / fs) > 0.5): spectrum, freqs, t, im = plt.specgram(x[:, 1], NFFT=1024, Fs=fs, sides='onesided') for i in range(len(spectrum)): point = findForm( np.linspace(0, freqs[len(spectrum[i])] / 1000, len(spectrum[i])), spectrum[i]) if point[0] != 0 and point[1] != 0: try: print(point) val = neigh.predict([point]) char = valores(val[0]) print(char) if word[-1] != char: word = word + char except: print('No neighbors found for the given radius') words = words + ' ' + word return words
def train_model(self, X_train, y_train, modelpath): model = RadiusNeighborsClassifier(radius=self.radius, weights=self.weights, algorithm=self.algorithm, p=self.power_param, outlier_label=self.outlier_label) model.fit(X_train, y_train) self.save_model(model, modelpath) return model
def clusterFacetSamplesRNN(self, reduceRadius=3): """ cluster the samples of each facet using radius nearest neighbours the cluster center and their correspondent normals will be saved in self.objsamplepnts_refcls and self.objsamplenrmals_refcls :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed :return: None author: weiwei date: 20161130, osaka """ self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0], ), dtype=np.object) self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0], ), dtype=np.object) for i, facet in enumerate(self.facets): # print "cluster" # print i,len(self.facets) self.objsamplepnts_refcls[i] = [] self.objsamplenrmls_refcls[i] = [] X = self.objsamplepnts_ref[i] nX = X.shape[0] if nX > 0: neigh = RadiusNeighborsClassifier(radius=1.0) neigh.fit(X, range(nX)) neigharrays = neigh.radius_neighbors(X, radius=reduceRadius, return_distance=False) delset = set([]) for j in range(nX): if j not in delset: self.objsamplepnts_refcls[i].append(np.array(X[j])) self.objsamplenrmls_refcls[i].append( np.array(self.objsamplenrmls_ref[i][j])) # if self.objsamplepnts_refcls[i].size: # self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) # else: # self.objsamplepnts_refcls[i] = np.array([]) # self.objsamplenrmls_refcls[i] = np.array([]) # self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) delset.update(neigharrays[j].tolist()) if self.objsamplepnts_refcls[i]: self.objsamplepnts_refcls[i] = np.vstack( self.objsamplepnts_refcls[i]) self.objsamplenrmls_refcls[i] = np.vstack( self.objsamplenrmls_refcls[i]) else: self.objsamplepnts_refcls[i] = np.empty(shape=(0, 0)) self.objsamplenrmls_refcls[i] = np.empty(shape=(0, 0))
def knnClassifier(): trainData, trainLabel = featureArray(conf['train']['feature_vector']) testData, testLabel = featureArray(conf['test']['feature_vector']) neigh = KNeighborsClassifier(n_neighbors=1, algorithm='auto', p=2) neigh.fit(trainData, trainLabel) print(neigh.score(testData,testLabel)) neighRadius = RadiusNeighborsClassifier(radius=500, weights='distance',algorithm='auto', p=2,metric='minkowski') neighRadius.fit(trainData, trainLabel) print(neighRadius.score(testData, testLabel))
class r07525032_RadiusNeighbors(classification): def trainAlgo(self): self.model = RadiusNeighborsClassifier( radius=self.param['radius'], weights=self.param['weights'], algorithm=self.param['algorithm'], p=self.param['p']) y = np.argmax(self.outputData['Y'], axis=1) self.model.fit(self.inputData['X'], y) def predictAlgo(self): self.result['Y'] = self.model.predict(self.inputData['X']) self.result['Y'] = to_categorical(self.result["Y"])
class _RadiusNeighborsClassifierImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def knnClassifier(): trainData, trainLabel = featureArray(conf['train']['feature_vector']) testData, testLabel = featureArray(conf['test']['feature_vector']) neigh = KNeighborsClassifier(n_neighbors=1, algorithm='auto', p=2) neigh.fit(trainData, trainLabel) print(neigh.score(testData, testLabel)) neighRadius = RadiusNeighborsClassifier(radius=500, weights='distance', algorithm='auto', p=2, metric='minkowski') neighRadius.fit(trainData, trainLabel) print(neighRadius.score(testData, testLabel))
def clusterFacetSamplesRNN(self, reduceRadius=3): """ cluster the samples of each facet using radius nearest neighbours the cluster center and their correspondent normals will be saved in self.objsamplepnts_refcls and self.objsamplenrmals_refcls :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed :return: None author: weiwei date: 20161130, osaka """ self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object) self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object) for i, facet in enumerate(self.facets): # print "cluster" # print i,len(self.facets) self.objsamplepnts_refcls[i] = [] self.objsamplenrmls_refcls[i] = [] X = self.objsamplepnts_ref[i] nX = X.shape[0] if nX > 0: neigh = RadiusNeighborsClassifier(radius=1.0) neigh.fit(X, range(nX)) neigharrays = neigh.radius_neighbors(X, radius=reduceRadius, return_distance=False) delset = set([]) for j in range(nX): if j not in delset: self.objsamplepnts_refcls[i].append(np.array(X[j])) self.objsamplenrmls_refcls[i].append(np.array(self.objsamplenrmls_ref[i][j])) # if self.objsamplepnts_refcls[i].size: # self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) # else: # self.objsamplepnts_refcls[i] = np.array([]) # self.objsamplenrmls_refcls[i] = np.array([]) # self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j])) # self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i], # self.objsamplenrmls_ref[i][j])) delset.update(neigharrays[j].tolist()) if self.objsamplepnts_refcls[i]: self.objsamplepnts_refcls[i] = np.vstack(self.objsamplepnts_refcls[i]) self.objsamplenrmls_refcls[i] = np.vstack(self.objsamplenrmls_refcls[i]) else: self.objsamplepnts_refcls[i] = np.empty(shape=(0,0)) self.objsamplenrmls_refcls[i] = np.empty(shape=(0,0))
def _RadiusNeighborsClassifier(*, train, test, x_predict=None, metrics, radius=1.0, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', outlier_label=None, metric_params=None, n_jobs=None, **kwargs): """ For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsClassifier.html#sklearn.neighbors.RadiusNeighborsClassifier """ model = RadiusNeighborsClassifier(radius=radius, weights=weights, algorithm=algorithm, leaf_size=leaf_size, p=p, metric=metric, outlier_label=outlier_label, metric_params=metric_params, n_jobs=n_jobs, **kwargs) model.fit(train[0], train[1]) model_name = 'Radius Neighbors Classifier' y_hat = model.predict(test[0]) if metrics == 'accuracy': accuracy = accuracy_score(test[1], y_hat) if metrics == 'f1': accuracy = f1_score(test[1], y_hat) if metrics == 'jaccard': accuracy = jaccard_score(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def knnClassifier(xTrain, yTrain, xTest, yTest): # Create KNeighbor & RadiusNeighbor Classifiers knnKNeighbors = KNeighborsClassifier() knnRadiusNeighbors = RadiusNeighborsClassifier() # Fit data knnKNeighbors.fit(xTrain, yTrain) knnRadiusNeighbors.fit(xTrain, yTrain) # Find matches between predicted & actual values matchesKNeighbors = [i for i,j in zip(knnKNeighbors.predict(xTest), yTest) if i == j] matchesRadiusNeighbors = [i for i,j in zip(knnKNeighbors.predict(xTest), yTest) if i == j] print "Accuracy of KNeighbors: ", (float(len(matchesKNeighbors))/len(yTest)) * 100 print "Accuracy of RadiusNeighbors: ", (float(len(matchesRadiusNeighbors))/len(yTest)) * 100
def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:, 0].min(), xy[:, 0].max() y_min, y_max = xy[:, 1].min(), xy[:, 1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
def runRNC(X_train, y_train, X_test, R=1.0, weights="uniform", outlier=None): # initialize the classifier model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier) rnc = model.fit(X_train, y_train) predictions = rnc.predict(X_test) return predictions
def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:,0].min(), xy[:,0].max() y_min, y_max = xy[:,1].min(), xy[:,1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters, evaluation_metrics): # modify parameters to call the clustering algorithm with modified ones, this mainly purposes the distance parameter modified_parameters = prepare_parameters(parameters) if modified_parameters["distance"] != "mahalanobis": initial_classifier = RadiusNeighborsClassifier( n_jobs=-1, radius=modified_parameters["radius"], metric=modified_parameters["distance"], p=modified_parameters["minkowski_p"]) else: initial_classifier = RadiusNeighborsClassifier( n_jobs=-1, radius=modified_parameters["radius"], metric=modified_parameters["distance"], p=modified_parameters["minkowski_p"], algorithm="brute", metric_params={"VI": np.linalg.inv(np.cov(X_train))}) classifier = initial_classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) evaluation_metrics["accuracy"] = classifier.score(X_test, y_test) return evaluation_metrics
def runRNC(X_train, y_train, X_test, R=1.0, weights='uniform', outlier=None): # initialize the classifier model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier) rnc = model.fit(X_train, y_train) predictions = rnc.predict(X_test) return predictions
def test_model_knn_iris_classifier_multi_reg2_weight_radius(self): iris = datasets.load_iris() X = iris.data.astype(numpy.float32) y = iris.target.astype(numpy.int64) y = numpy.vstack([(y + 1) % 2, y % 2]).T model = RadiusNeighborsClassifier( algorithm='brute', weights='distance') model.fit(X[:13], y[:13]) onx = to_onnx(model, X[:1], options={id(model): {'optim': 'cdist', 'zipmap': False}}, target_opset=TARGET_OPSET) dump_data_and_model( X.astype(numpy.float32)[:11], model, onx, basename="SklearnRadiusNeighborsClassifierMReg2-Out0")
def radiusNeighborsClassification(self,X_train,y_train): """Method to train a radius nearest neighbor classifier Parameters ---------- X_train: Array shape [n_samples,n_features] for training the model with features y_train: Array shape[n_samples] for training the model with features of that target Returns --------- model: The trained nearest neighbor model.""" #Initialize the constructor model = RadiusNeighborsClassifier(radius=1.0, weights = 'uniform' ) #fit the model with training data model.fit(X_train,y_train) return model
def KNN_diabetes_demo(): import numpy as np import pandas as pd #get data data = pd.read_csv("D:/PythonProjects/pima-indians-diabetes.csv") print(data.shape) print("data.head(): ", data.head()) #first 5 rows #split intput and outcome X = data.iloc[:, 0:8] Y = data.iloc[:, 8] #split the training data and test data from sklearn.model_selection import train_test_split #random_state is a seed which will decide the way of splitting X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=22) #prediction from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier model1 = KNeighborsClassifier(n_neighbors=2) model1.fit(X_train, Y_train) score1 = model1.score(X_test, Y_test) model2 = KNeighborsClassifier(n_neighbors=2, weights='distance') model2.fit(X_train, Y_train) score2 = model2.score(X_test, Y_test) model3 = RadiusNeighborsClassifier(n_neighbors=2, radius=500.0) model3.fit(X_train, Y_train) score3 = model3.score(X_test, Y_test) #compare the results of the three models print(score1, score2, score3) #cross validation from sklearn.model_selection import cross_val_score result1 = cross_val_score(model1, X, Y, cv=10) result2 = cross_val_score(model2, X, Y, cv=10) result3 = cross_val_score(model3, X, Y, cv=10) print(result1.mean(), result2.mean(), result3.mean())
class Adaptive_KNN_Model(IMachineLearning): """ This class performs Adaptive K-Nearest-Neighbor """ def __init__(self, regression=True, radius=1.0, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', outlier_label=None, metric_params=None): self._regression = regression self._radius = radius self._weights = weights self._algorithm = algorithm self._leaf_size = leaf_size self._p = p self._metric = metric self._metric_params = metric_params self._outlier_label = outlier_label if regression: self._model = RadiusNeighborsRegressor(radius, weights, algorithm, leaf_size, p, metric, metric_params) else: self._model = RadiusNeighborsClassifier(radius, weights, algorithm, leaf_size, p, metric, metric_params) return super().__init__() def train(self, xData, yData): ## check input ## if not isinstance(xData, pd.DataFrame): raise ValueError('Invalid xData') if not isinstance(yData, pd.DataFrame) and not isinstance( yData, Series): raise ValueError('Invalid yData') ## train SVM ## self._xData = xData self._yData = yData self._model = self._model.fit(self._xData, self._yData) def predict(self, xData): ## check input ## if isinstance(xData, str): raise ValueError('Invalid Argument') if not isinstance(xData, pd.DataFrame): raise ValueError("Invalid Argument") ## predict ## self._prd = self._model.predict(xData) return self._prd
def radius(X_train, X_test, y_train, y_test, string, valor): if (string == "prob"): clf = RadiusNeighborsClassifier(radius=valor, weights='distance', n_jobs=-1) clf.fit(X_train, y_train.values.ravel()) return clf.predict_proba(X_test) clf = RadiusNeighborsClassifier(radius=valor, weights='distance', n_jobs=-1) clf.fit(X_train, y_train.values.ravel()) #pickles.criarModelo(clf,"Rocchio "+string) y_predito = clf.predict(X_test) micro = f1_score(y_test, y_predito, average='micro') macro = f1_score(y_test, y_predito, average='macro') #f1_individual = f1_score(y_test,y_predito,average=None) #salvar_dados.salvar(y_test,y_predito,micro, macro, f1_individual," Knn "+string) print("O f1Score micro do RadiusKnn ", string, " com ", valor, " de raio é: ", micro) print("O f1Score macro do RadiusKnn ", string, " com ", valor, " de raio é: ", macro)
def givenAddressTime(lat, long, time): # Create dataframe from parameter values in order to compare with values later on df_entered = pd.DataFrame({ 'latitude': [lat], 'longitude': [long], 'received_timestamp': [time] }) # Read in CSV file and create dataframe df = pd.read_csv('./data/sfpd_dispatch_data_subset.csv', sep=',') x = df[['latitude', 'longitude', 'received_timestamp']] # Convert values in the timestamp column to float so that they can be compared x['received_timestamp'] = x['received_timestamp'].apply(convertTimestamp) # Creating dataframe "y" so that the values can be fitted against dataframe "x" y = df[['call_type']] # Using scitools module to create neighbor so that it can figure out what the dispatch of the given values are based on K Nearest Neighbors neighbor = RadiusNeighborsClassifier( radius=10.0, weights='distance', outlier_label="Error, cannot predict this far") neighbor.fit(x, y.values.ravel()) return (neighbor.predict(df_entered))
def radiusNeighborClassifier(): maximumValue = 0 returnParameters = ['0', '0'] for neighbor in xrange(100, 1001, 100): neighAutoRadius = RadiusNeighborsClassifier(radius=neighbor, weights='uniform', algorithm='auto', p=2, metric='minkowski') neighAutoRadius.fit(trainData, trainLabel) neighDistanceRadius = RadiusNeighborsClassifier(radius=neighbor, weights='distance', algorithm='auto', p=2, metric='minkowski') neighDistanceRadius.fit(trainData, trainLabel) scoreAuto = neighAutoRadius.score(validationData, validationLabel) scoreDistance = neighDistanceRadius.score(validationData, validationLabel) if max(scoreAuto, scoreDistance) > maximumValue: maximumValue = max(scoreAuto, scoreDistance) returnParameters[0] = str(neighbor) returnParameters[ 1] = 'distance' if scoreDistance > scoreAuto else 'uniform' neighTest = RadiusNeighborsClassifier(radius=int(returnParameters[0]), weights=returnParameters[1], algorithm='auto', p=2, metric='minkowski') neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) guideToGraph['Radius Neighbor'] = scoreTest
def Radius_Neighbors(input_file,Output,test_size): lvltrace.lvltrace("LVLEntree dans radius_kneighbors split_test") try: ncol=tools.file_col_coma(input_file) data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1)) X = data[:,1:] y = data[:,0] n_samples, n_features = X.shape X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) print X_train.shape, X_test.shape clf = RadiusNeighborsClassifier(radius=0.001, weights='uniform', algorithm='auto') clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Radius Neighbors accuracy " print "classification accuracy:", metrics.accuracy_score(y_test, y_pred) print "precision:", metrics.precision_score(y_test, y_pred) print "recall:", metrics.recall_score(y_test, y_pred) print "f1 score:", metrics.f1_score(y_test, y_pred) print "\n" results = Output+"Raidus_Neighbors_metrics_test.txt" file = open(results, "w") file.write("Radius Neighbors estimator accuracy\n") file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred)) file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred)) file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred)) file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred)) file.write("\n") file.write("True Value, Predicted Value, Iteration\n") for n in xrange(len(y_test)): file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1))) file.close() title = "Radius Neighbors %f"%test_size save = Output + "Radius_Neighbors_confusion_matrix"+"_%s.png"%test_size plot_confusion_matrix(y_test, y_pred,title,save) except (ValueError): results = Output+"Raidus_Neighbors_metrics_test.txt" file = open(results, "w") file.write("In configuration.py file: No neighbors found for test samples, you can try using larger radius, give a label for outliers, consider or removing them from your dataset.") file.close() lvltrace.lvltrace("LVLSortie dans radius_kneighbors split_test")
def main(): X_train_all, t_train_all, train_all_ids = create_data_matrix(0, 3086, TRAIN_DIR) X_train, X_valid, t_train, t_valid = train_test_split(X_train_all, t_train_all, test_size=0.20, random_state=37) X_test_all, t_test_all, test_all_ids = create_data_matrix(0, 3724, TEST_DIR) sv = svm.SVC(kernel='poly') sv.fit(X_train, t_train) print "SVM Score was: %f" % clf.score(X_valid, t_valid) rf = RandomForestClassifier(n_estimators=30, min_samples_split=1, random_state=37) rf.fit(X_train, t_train) print "RandomForest Score was: %f" % (rf.score(X_valid, t_valid)) lr = LogisticRegression(penalty='l2',solver='newton-cg',max_iter=500) lr.fit(X_train, t_train) print "LogisticRegression Score was: %f" % (lr.score(X_valid, t_valid)) clf = GaussianNB() clf.fit(X_train, t_train) print "GaussianNB Score was: %f" % (clf.score(X_valid, t_valid)) nn = KNeighborsClassifier(n_neighbors=6, weights='uniform') nn.fit(X_train, t_train) score = nn.score(X_valid, t_valid) print "KNeighbors Score was: %f" % (score) rnc = RadiusNeighborsClassifier(radius=6,outlier_label=8, p=2) rnc.fit(X_train, t_train) print "RadiusNeighbors Score was: %f" % (rnc.score(X_valid, t_valid)) # Get predictions rf = RandomForestClassifier(n_estimators=30, min_samples_split=1) rf.fit(X_train_all, t_train_all) test_predictions = rf.predict(X_test_all) write_to_file("prediction.csv", test_all_ids, test_predictions)
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters): initial_classifier = RadiusNeighborsClassifier( n_jobs=-1, radius=parameters["radius"], metric=parameters["distance"]) cputime_start_train = time.process_time() classifier = initial_classifier.fit(X_train, y_train) cputime_end_train = time.process_time() cputime_start_test = time.process_time() y_pred = classifier.predict(X_test) cputime_end_test = time.process_time() accuracy = classifier.score(X_test, y_test) return accuracy, cputime_end_train - cputime_start_train, cputime_end_test - cputime_start_test
def radiusNeighborClassifier(): maximumValue = 0 returnParameters = ['0','0'] for neighbor in xrange(100,1001,100): neighAutoRadius = RadiusNeighborsClassifier(radius=neighbor, weights='uniform',algorithm='auto', p=2,metric='minkowski') neighAutoRadius.fit(trainData, trainLabel) neighDistanceRadius = RadiusNeighborsClassifier(radius=neighbor, weights='distance',algorithm='auto', p=2,metric='minkowski') neighDistanceRadius.fit(trainData, trainLabel) scoreAuto = neighAutoRadius.score(validationData, validationLabel) scoreDistance = neighDistanceRadius.score(validationData, validationLabel) if max(scoreAuto,scoreDistance) > maximumValue: maximumValue = max(scoreAuto,scoreDistance) returnParameters[0] = str(neighbor) returnParameters[1] = 'distance' if scoreDistance>scoreAuto else 'uniform' neighTest = RadiusNeighborsClassifier(radius=int(returnParameters[0]), weights=returnParameters[1],algorithm='auto', p=2,metric='minkowski') neighTest.fit(trainData, trainLabel) scoreTest = neighTest.score(testData, testLabel) guideToGraph['Radius Neighbor'] = scoreTest
X_test = sample2 y_test = labels[272:, i] else: X_train = training y_train = labels[:172, i] X_test = sampletest y_test = labels[172:, i] posterior = np.empty([100, 72, 6]) box = np.zeros([6, 6]) for j in range(4, 5): for k in range(1, 2): accuracy = np.zeros(100) for m in range(0, 100): rnc = RadiusNeighborsClassifier(radius=j, leaf_size=k) rnc.fit(X_train, y_train) y_pred = rnc.predict(X_test) n = 0 for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: # print i, y_pred[i], y_test[i] n = n + 1 accuracy[m] = accuracy[m] + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 # posterior[m] = knc.predict_proba(X_test) print j, k, np.mean(accuracy) / 0.72, np.std(accuracy) / 0.72 # print 30, 20, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0 """ means = np.empty([72,6]) stds = np.empty([72,6])
def par(X_tr, y_tr, X_te, r): neigh = RadiusNeighborsClassifier(radius = r) neigh.fit(X_tr, y_tr) y_pred = neigh.predict(X_te) return y_pred
X_test = sample2 y_test = labels[272:, i] else: X_train = training y_train = labels[:172, i] X_test = sampletest y_test = labels[172:, i] posterior = np.empty([100, 72, 6]) box = np.zeros([6, 6]) for j in range(4, 5): for k in range(1, 2): accuracy = np.zeros(100) for m in range(0, 100): rnc = RadiusNeighborsClassifier(radius=j, leaf_size=k) rnc.fit(X_train, y_train) y_pred = rnc.predict(X_test) n = 0 for i in range(0, len(y_pred)): if y_pred[i] == y_test[i]: #print i, y_pred[i], y_test[i] n = n + 1 accuracy[m] = accuracy[m] + 1 box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1 #posterior[m] = knc.predict_proba(X_test) print j, k, np.mean(accuracy) / 0.72, np.std(accuracy) / 0.72 #print 30, 20, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0 ''' means = np.empty([72,6])
matrix = confusion_matrix(y_test, y_test_pred) score = gradBoost.score(X_test, y_test) no_selection_performance.append( ('Gradient Boosting Classifier', score, matrix)) print('K Nearest Neighbors') kNeigh = KNeighborsClassifier(n_neighbors=3) kNeigh.fit(X_train, y_train) y_test_pred = kNeigh.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = kNeigh.score(X_test, y_test) no_selection_performance.append(('K Nearest Neighbours', score, matrix)) print('Radius Nearest Neighbors') rNeigh = RadiusNeighborsClassifier(radius=42.0) rNeigh.fit(X_train, y_train) y_test_pred = rNeigh.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = rNeigh.score(X_test, y_test) no_selection_performance.append(('Radius Nearest Neighbours', score, matrix)) print('Decision Tree Classifier') dTree = DecisionTreeClassifier(random_state=0) dTree.fit(X_train, y_train) y_test_pred = dTree.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = dTree.score(X_test, y_test) no_selection_performance.append(('Decision Tree Classifier', score, matrix)) print('Bagging (with K Nearest Neighbors)') bagging = BaggingClassifier(KNeighborsClassifier(),
__author__ = 'Administrator' from sklearn.neighbors import RadiusNeighborsClassifier import src.Utils.FeatureExtractor as FE import src.Utils.Predict as Pre if __name__ == "__main__": temp = FE.ExtractFeatureFile("../../ins/data/dataset1.feature") temp1 = FE.ExtractFeatureFile("../../ins/data/dataset2.feature") clf = RadiusNeighborsClassifier(radius=1.0) print clf.fit(temp[0], temp[1]) Pre.Predict(clf, temp1)
class Model(object): """ Text-classification-system with scikit-learn. For reference see: http://scikit-learn.org/stable/ This Model class is based on Data class. Defines training and test data. Build classification model. Provides evaluation methods. Parameter --------- data : Data, optional Contains a data object with filled data.real_data. data_list : array, shape = [data1 object, data2 object, ...] Contains data objects with filled data.real_data. Attributes ---------- clf : classifier object from sklean moduls. Contains a selected classifier object from sklean modul. see reference: http://scikit-learn.org/stable/supervised_learning.html#supervised-learning classifier_list : array, shape = [string classifier1 name, ...] Contains names of all available classification algorithms. __train_data_set : boolean Contains bolloean value that describes if train_data is set. train_data : Data Contains the data object that is set as training data. test_data : Data Contains the data object that is set as test data. train_targets : numpy array of shape [n_samples] Contains the class labels of training data. A sample is a textpair object, it's class label is found in textpair.target. train_samples : numpy array of shape [n_samples,n_features] Contains the feature values of the training data. A sample is a textpair object, it's feature values are found in textpair.features hash. After vectorize() them, they are stored in textpair.feature_vector. test_targets : numpy array of shape [n_samples] Contains the class labels of test data. A sample is a textpair object, it's class label is found in textpair.target. test_samples : numpy array of shape [n_samples,n_features] Contains the feature values of the test data. A sample is a textpair object, it's feature values are found in textpair.features hash. After vectorize() them, they are stored in textpair.feature_vector. """ def __init__(self, data=None, data_list=None): self.clf = None if data is not None: self.data_list = [data] elif data_list is not None: self.data_list = data_list self.classifier_list = ["svm_linear", "svm_poly", "naive_bayes", "decision_tree", "nearest_centroid", "k_neighbors", "radius_neighbors"] self.__train_data_set = False def set_train_data(self, data_name): """Setter for training data Walk through data_list and set data object with data.name as train_data. Parameter --------- data_name : string Contains the name of the data object, that should be set as train_data for the model. """ data_in_list = False for data in self.data_list: if data.name == data_name: print data_name + " is in model_data_list" self.train_data = data self.train_samples, self.train_targets = self.fill_feature_target(data) print data_name + " is set as train_data" data_in_list = True if data_in_list: self.__train_data_set = True else: print data_name + " not in model_data_list " def set_test_data(self, data_name): """Setter for test data Walk through data_list and set data object with data.name as test_data. Notes ----- Training data has to be set before test data, due to the fact that some features need skeletons that have to be build before seeing the test data. see reference: bag_of_pos.py, bag_of_words.py, tf_idf.py Parameter --------- data_name : string Contains the name of the data object, that should be set as test_data for the model. """ if self.__train_data_set and self.train_data.name == data_name: self.test_data = self.train_data print "train_data and test_data from one data_set" elif not self.__train_data_set: print "please set train_data first" else: data_in_list = False for data in self.data_list: if data.name == data_name: print data_name + " is in model_data_list" self.test_data = data self.test_samples, self.test_targets = self.fill_feature_target(data) data_in_list = True print data_name + " is set as test_data" if not data_in_list: print data_name + " not in model_data_list " def fill_feature_target(self, data): """ Fill the feature samples and target values. The classifier objects from sklearn need a numpy array for classification. Shape of the data class labels : numpy array of shape [n_samples] Shape of the data feature values : numpy array of shape [n_samples,n_features] Vectorize() textpair feature values, for building required numpy arrays. Note ---- Check __train_data_set first, cause there is no need to attache the same features for test data manually in main.py. This will be performed automatically in here. Parameter --------- data : Data Contains a Data object that data.real_data should be vectorized. """ sample_list = [] target_list = [] if self.__train_data_set: for feature in self.train_data.features_fit: if feature == "bag_of_words" or feature == "bag_of_pos" or feature == "tf_idf": data.bow_model = self.train_data.bow_model print self.train_data.features_fit data.attach_feature_list(self.train_data.features_fit) for textpair in data.real_data.values(): textpair.vectorize() target_list.append(textpair.target) sample_list.append(textpair.feature_vector) return np.array(sample_list), np.array(target_list) else: for textpair in data.real_data.values(): textpair.vectorize() target_list.append(textpair.target) sample_list.append(textpair.feature_vector) return np.array(sample_list), np.array(target_list) def set_classifier(self, classifier_name): """ Setter for clf Building instances of classifier objects with corresponding name. Parameter --------- classifier_name : string Contains the corresponding name of the wanted classifier from sklearn. """ if classifier_name == "svm_linear": self.clf = svm.SVC(kernel="linear", class_weight="auto") elif classifier_name == "svm_poly": self.clf = svm.SVC(kernel="poly", class_weight="auto") elif classifier_name == "naive_bayes": self.clf = GaussianNB() elif classifier_name == "decision_tree": self.clf = tree.DecisionTreeClassifier() elif classifier_name == "nearest_centroid": self.clf = NearestCentroid() elif classifier_name == "k_neighbors": self.clf = KNeighborsClassifier(n_neighbors=100) elif classifier_name == "radius_neighbors": self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1) else: raise ClassifierNotExistException(classifier_name) def train(self, fraction): """ Train the model Training the classifier with the wanted fraction of the training data. Parameter ------- fraction : int Contains a number from 0 to 100. Defines the fraction of the training data that will be used for training the classifier. """ if self.clf is None: raise NoClassifierException elif self.train_targets.size == 0 and self.train_samples.size == 0: raise EmptyFeaturesEmptyTargetsException else: count = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0)) self.clf.fit(self.train_samples[:count], self.train_targets[:count]) def predict(self, sample): """ Predict a given sample. Make a prediction for a given sample. Classifier needs a numpy array with the feature values of a sample. Note ---- Requires a trained(fitted) model. Parameters ---------- samples : numpy array of shape [n_samples,n_features] Returns ------- self.clf.predict(sample) : int Contains the prediction value from the model. It is the predicted class label. For a textpair object it can be 0 or 1. """ if self.clf is None: raise NoClassifierException elif self.test_targets.size == 0 and self.test_samples.size == 0: raise EmptyFeaturesEmptyTargetsException else: return self.clf.predict(sample) def evaluate_cross_validation(self, folds): """ Evaluation through a cross-validation Perform a cross-validation on the set training data with measured accuracy. It requires a given number of folds. Note ---- cross validation is performed on the training data, not on the test data. So set your data as training data, if you want to perform a cross validation. Parameter --------- folds : int Contains the number of folds for the cross-validation. Returns ------- accuracy_list : array, shape = [float acc score1, float acc score2, ...] Contains the accuracy scores of all iterations. acc_mean : float Contains the accuracy mean of the all iterations. """ if self.clf is None: raise NoClassifierException elif self.train_targets.size == 0 and self.train_samples.size == 0: raise EmptyFeaturesEmptyTargetsException elif folds > len(self.train_samples): raise FoldSizeToBigException(folds, self.train_samples) else: kf = KFold(len(self.train_samples), n_folds=folds) accuracy_list = [] for train, test in kf: x_train, x_test, y_train, y_test = self.train_samples[train], self.train_samples[test], \ self.train_targets[train], self.train_targets[test] self.clf.fit(x_train, y_train) accuracy_list.append(accuracy_score(np.array(y_test), np.array(self.clf.predict(x_test)))) n = 0 sum_values = 0 for acc_value in accuracy_list: sum_values = sum_values + acc_value n += 1 acc_mean = (sum_values / n) return accuracy_list, acc_mean def evaluate_classification_report(self, fraction): """ A detailed classification report For an easy use to measure how well your trained model performs, the given method uses your set data objects and gives an accuracy score output on the shell. Note ---- There are two scenarios : 1. training data and test data are from the same data object. (means there names are the same !) - Normalization 2. training data and test data are from different data objects. + Normalization The first scenario will use given fraction and divide the training data in train and test data for the classification. If fraction is 100 then it will be trained and tested on the same data object. With a number of 80 fraction it will be trained on 80 percent and tested on 20 percent of the given data object. There is no Normalization for this scenario implemented ! The second scenario needs a number of 100 fraction, to use the whole training data for the training ! Working with normalized values. Parameter --------- fraction : int Contains a number from 0 to 100. Defines the fraction of the training data that will be used for training the classifier. """ if self.clf is None: raise NoClassifierException elif self.train_targets.size == 0 and self.train_samples.size == 0: raise EmptyFeaturesEmptyTargetsException else: # if trained on 100 % fraction, it will be tested on 100 % # fraction, than train and test data are the same # if count_predict is 0 (with 100% count_train), than # self.targets[-count_predict:] == self.targets[:] = True if self.test_data.name == self.train_data.name: print "train_data and test_data from one data_set" count_train = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0)) count_predict = len(self.train_targets) - count_train print "count_train:", count_train print "count_predict:", count_predict # Summarize placed in here, cause data objects are equal and # dived in this method. So training and test data are defined # in here. print "##########train_data summarize##########" summarize_textpair(self.train_data.real_data.values()[:count_train]) print "##########test_data summarize##########" summarize_textpair(self.train_data.real_data.values()[-count_predict:]) # setting train and test data train_samples = self.train_samples[:count_train] train_targets = self.train_targets[:count_train] test_samples = self.train_samples[-count_predict:] test_targets = self.train_targets[-count_predict:] # Training self.clf.fit(train_samples, train_targets) # Testing test_targets_predicted = self.clf.predict(test_samples) # calculating baseline null = 0 eins = 0 for i in test_targets: if i == 0: null += 1 else: eins += 1 if null > eins: baseline = float(null)/(float(null)+float(eins)) else: baseline = float(eins)/(float(null)+float(eins)) print "Anzahl 0:", null print "Anzahl 1:", eins print "Baseline:", baseline print "-------------------------------" # Calculating accuracy score of predicted samples print "accuracy_score: ", accuracy_score(test_targets, test_targets_predicted) else: # Normalization norma = preprocessing.normalize(self.train_samples) count_train = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0)) print "count_train:", count_train print "count_predict:", len(self.test_targets) # Setting train and test data # without normalization take this one instead # train_samples = self.train_samples[:count_train] train_samples = norma[:count_train] train_targets = self.train_targets[:count_train] # without normalization take this one instead # test_samples = self.test_samples test_samples = preprocessing.normalize(self.test_samples) test_targets = self.test_targets # Training self.clf.fit(train_samples, train_targets) # Testing test_targets_predicted = self.clf.predict(test_samples) # Calculating baseline null = 0 eins = 0 for i in test_targets: if i == 0: null += 1 else: eins += 1 if null > eins: baseline = float(null)/(float(null)+float(eins)) else: baseline = float(eins)/(float(null)+float(eins)) print "Anzahl 0:", null print "Anzahl 1:", eins print "Baseline:", baseline print "-------------------------------" # Calculating accuracy score of predicted samples print "accuracy_score: ", accuracy_score(test_targets, test_targets_predicted)
estimated_classes = knn_classifier.predict(test_data) t2 = time.time() calc_accuracy(estimated_classes, test_labels, 'KNN Classifier with k = 5', t2 - t1) knn_classifier = KNeighborsClassifier(n_neighbors=10) t1 = time.time() knn_classifier.fit(train_data, train_labels) estimated_classes = knn_classifier.predict(test_data) t2 = time.time() calc_accuracy(estimated_classes, test_labels, 'KNN Classifier with k = 10', t2 - t1) parzen_classifier = RadiusNeighborsClassifier(radius=2.8) t1 = time.time() parzen_classifier.fit(train_data, train_labels) estimated_classes = parzen_classifier.predict(test_data) t2 = time.time() calc_accuracy(estimated_classes, test_labels, 'Parzen Estimator', t2 - t1) gnb = GaussianNB() t1 = time.time() gnb.fit(train_data, train_labels) t2 = time.time() print('time to learn: ', t2 - t1) t1 = time.time() estimated_calsses = gnb.predict(test_data) t2 = time.time() calc_accuracy(estimated_calsses, test_labels, 'Gaussian Naive Bayes', t2 - t1)
with timer(): knn.fit(train_data, train_labels) with timer(): decision_labels = knn.predict(test_data) print(np.sum(decision_labels.flat == test_labels.flat) / len(test_labels)) class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] cnf_matrix = confusion_matrix(test_labels, decision_labels) plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') plt.show() parzen = RadiusNeighborsClassifier(radius=3.0) with timer(): parzen.fit(train_data, train_labels) with timer(): decision_labels = parzen.predict(test_data) print(np.sum(decision_labels.flat == test_labels.flat) / len(test_labels)) class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] cnf_matrix = confusion_matrix(test_labels, decision_labels) plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') plt.show() gnb = GaussianNB() with timer(): gnb.fit(train_data, train_labels) with timer():
plt.title("C: %d, Gamma: %d" %(C,gamma)) #plt.xticks(()) #plt.yticks(()) #plt.axis([-3, 3, -3, 3]) biz['svm_pred']=(biz.expensive>clf.predict(X)).astype(int) plt.scatter(x=biz[biz.svm_pred==1].X,y=biz[biz.svm_pred==1].Y, s=20, c='g') print("Prop of expensive businesses seen as gentrifiers [%.2f]" %(biz['svm_pred'].sum()/biz.expensive.sum())) print("Prop of expensive businesses seen as gentrifiers [%.2f]" %(biz['svm_pred'].sum()/len(biz.expensive))) #biz['gentrifier']=(biz.expensive>biz.svm_pred).astype(int) ##################################################################################################################################### ################################################################ Nearest Neighbor ################################################### r=.00025 #A block is .001 and two blocks are .003; therefore, .011 scans about 8 blocks in diameter. neigh = RadiusNeighborsClassifier(radius=r) #from qGis nneighbor analysis neigh.fit(X, Y) predictions=neigh.predict(X) plt.scatter(X.iloc[:,0], X.iloc[:,1], s=30, c=Y, cmap=plt.cm.Paired); plt.title('True labels') plt.subplots_adjust(left=0, bottom=0, right=1, top=.95, wspace=0, hspace=0) plt.figure(); plt.scatter(X.iloc[:,0], X.iloc[:,1], s=30, c=predictions, cmap=plt.cm.Paired); plt.title('Predicted labels, rad=%.3f' %r) plt.subplots_adjust(left=0, bottom=0, right=1, top=.95, wspace=0, hspace=0) biz['rnn_gentrifier']=(biz.expensive>predictions).astype(int) plt.scatter(x=biz[biz.rnn_gentrifier==1].X,y=biz[biz.rnn_gentrifier==1].Y, s=20, c='g') print("Prop of expensive businesses seen as gentrifiers [%.2f]" %((biz.rnn_gentrifier.sum()/biz.expensive.sum()))) print("Prop of expensive businesses seen as gentrifiers [%.2f]" %((biz.rnn_gentrifier.sum()/len(biz.expensive)))) #####################################################################################################################################
if self._clasifyData.has_key(coltag): try: tag = self._clasifyData[coltag]['neigh'].predict([[screenspace_x,screenspace_y]]) tag = tag[0] self._clasifyData[coltag]['data'][tag] = [screenspace_x,screenspace_y] except ValueError: return MOCAP_ROGE_DATA return tag return MOCAP_ROGE_DATA def updateBoxesForNextFrame(self): for clotag,data in self._clasifyData.items(): centroids = [] labels = [] for tag,centroid in data['data'].items(): centroids.append(centroid) labels.append(tag) self._clasifyData[clotag]['neigh'].fit(centroids,labels) X = [[229.5, 500.5], [127.0, 497.0]]#[[0,0], [1,1], [2,2], [3,3]] y = [1, 5]#[5, 1, 3, 4] neigh = RadiusNeighborsClassifier(radius=1.0) neigh.fit(X, y) print(neigh.predict([[229.5, 500.5]]))
# 'sum_elev_0_30_25', 'sum_elev_30_60_25', 'sum_elev_60_90_25', # 'elev_0_30_weak', 'elev_30_60_weak', 'elev_60_90_weak', # 'sum_elev_0_30_weak', 'sum_elev_30_60_weak', 'sum_elev_60_90_weak'] # scaler_iid = MinMaxScaler() # scaler_iid.partial_fit(train_data[cols_iid]) # scaler_iid.partial_fit(test_data[cols_iid]) # train_input_iid = scaler_iid.transform(train_data[cols_iid]) # test_input_iid = scaler_iid.transform(test_data[cols_iid]) forest = ensemble.RandomForestClassifier(n_estimators=100, max_depth=3, random_state=57) bayes = GaussianNB(priors=[0.25, 0.25, 0.25, 0.25]) svm = svm.SVC(probability=True, C=0.01, gamma=1, random_state=1289) last_clf.fit(meta_train_data, meta_train_true_labels) forest.fit(train_input, train_data['true_class']) bayes.fit(train_input, train_data['true_class']) #bayes.fit(train_input_iid, train_data['true_class']) svm.fit(train_input, train_data['true_class']) test_proba_forest = forest.predict_proba(test_input) test_proba_bayes = bayes.predict_proba(test_input) #test_proba_bayes = bayes.predict_proba(test_input_iid) test_proba_svm = svm.predict_proba(test_input) test_proba = np.concatenate( (test_proba_forest, test_proba_bayes, test_proba_svm), axis=1) # test_proba = np.concatenate((test_proba_forest, test_proba_svm), axis=1) # test_proba = (test_proba_forest + test_proba_svm + test_proba_bayes)/3
def radius_knn(data, response, rad): X_train, X_test, y_train, y_test = train_test_split(data, response) neigh = RadiusNeighborsClassifier(radius=rad) d = neigh.fit(X_train, y_train).score(X_test, y_test) print 'knn radius classifier accuracy: ' + str(d)
import window_s_p_ft as win from sklearn.neighbors import RadiusNeighborsClassifier from sklearn.cross_validation import train_test_split total_score = 0 stop = 1000 for x in range(stop): clf = RadiusNeighborsClassifier(radius=100.0) data = win.getStudents() data_train, data_test = train_test_split(data, test_size=0.2) data_train_labels = [s.spec for s in data_train] data_test_labels = [s.spec for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test] clf.fit(data_train, data_train_labels) total_score += clf.score(data_test, data_test_labels) total_score = total_score / stop print('all') print(total_score) specs = ['FK', 'FM', 'MN', 'OE'] for sp in specs: total_score = 0 for x in range(stop): clf = RadiusNeighborsClassifier(radius=100.0) data = win.getStudents() data_train, data_test = train_test_split(data, test_size=0.2) data_train_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_train] data_test_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_test] data_train = [s.grades for s in data_train]