def append_without_dublicates(usual, y, knowledge):
    if len(usual['data']) == 0:
        usual['data'] += y
        return

    maxims, averages = get_maxims_and_averages(knowledge)
    usual_to_fit = normalize_fit_input(usual['data'], usual['events'],
                                       usual['fields'], averages, maxims)

    new_data_to_fit = normalize_fit_input(y, usual['events'], usual['fields'],
                                          averages, maxims)

    classifier = RadiusNeighborsClassifier(radius=2,
                                           metric='euclidean',
                                           outlier_label=-1)

    classifier.fit(sparse.csr_matrix(usual_to_fit), [0] * len(usual_to_fit))

    labels = classifier.predict(sparse.csr_matrix(new_data_to_fit))

    for i in range(len(labels) - 1, -1, -1):
        if labels[i] != -1:
            y.pop(i)

    usual['data'] += y
def runRNC(X_train, y_train, X_test, R=1.0, weights="uniform", outlier=None):
    # initialize the classifier
    model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier)
    rnc = model.fit(X_train, y_train)
    predictions = rnc.predict(X_test)

    return predictions
 def __init__(self,
              regression=True,
              radius=1.0,
              weights='distance',
              algorithm='auto',
              leaf_size=30,
              p=2,
              metric='minkowski',
              outlier_label=None,
              metric_params=None):
     self._regression = regression
     self._radius = radius
     self._weights = weights
     self._algorithm = algorithm
     self._leaf_size = leaf_size
     self._p = p
     self._metric = metric
     self._metric_params = metric_params
     self._outlier_label = outlier_label
     if regression:
         self._model = RadiusNeighborsRegressor(radius, weights, algorithm,
                                                leaf_size, p, metric,
                                                metric_params)
     else:
         self._model = RadiusNeighborsClassifier(radius, weights, algorithm,
                                                 leaf_size, p, metric,
                                                 metric_params)
     return super().__init__()
Exemple #4
0
def train_models(dict_of_dicts):
    for midi_key, midi_dict in dict_of_dicts.items():
        X = []
        y = []
        for x in midi_dict:
            if midi_dict[x] == []:
                pass
            else:
                for t in midi_dict[x]:
                    X.append(t)
                    y.append(x)  # n times
        if X != []:
            rad = get_radius(midi_key)

            neigh = RadiusNeighborsClassifier(radius=rad,
                                              weights='distance',
                                              outlier_label=[7])

            X = np.array(X)
            y = np.array(y)
            neigh.fit(X.reshape(-1, 1), y)
            filename = workspace.model_folder + '/' + str(
                midi_key) + '_rrn_model.sav'
            pickle.dump(neigh, open(filename, 'wb'))
        else:
            pass
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters,
                                evaluation_metrics):
    # modify parameters to call the clustering algorithm with modified ones, this mainly purposes the distance parameter
    modified_parameters = prepare_parameters(parameters)

    if modified_parameters["distance"] != "mahalanobis":
        initial_classifier = RadiusNeighborsClassifier(
            n_jobs=-1,
            radius=modified_parameters["radius"],
            metric=modified_parameters["distance"],
            p=modified_parameters["minkowski_p"])
    else:
        initial_classifier = RadiusNeighborsClassifier(
            n_jobs=-1,
            radius=modified_parameters["radius"],
            metric=modified_parameters["distance"],
            p=modified_parameters["minkowski_p"],
            algorithm="brute",
            metric_params={"VI": np.linalg.inv(np.cov(X_train))})

    classifier = initial_classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    evaluation_metrics["accuracy"] = classifier.score(X_test, y_test)

    return evaluation_metrics
Exemple #6
0
def createRadiusNeighborsClassifier(params=None):
    info("Creating Radius Neighbors Classifier", ind=4)
    error("This doesn't work")
    return {"estimator": None, "params": None}

    ## Params
    params = mergeParams(RadiusNeighborsClassifier(), params)
    tuneParams = getRadiusNeighborsClassifierParams()
    grid = tuneParams['grid']

    info("With Parameters", ind=4)
    algorithm = setParam('algorithm', params, grid, force=False)
    info("Param: algorithm = {0}".format(algorithm), ind=6)

    leaf_size = setParam('leaf_size', params, grid, force=False)
    info("Param: leaf_size = {0}".format(leaf_size), ind=6)

    metric = setParam('metric', params, grid, force=False)
    info("Param: metric = {0}".format(metric), ind=6)

    radius = setParam('radius', params, grid, force=False)
    info("Param: radius = {0}".format(radius), ind=6)

    weights = setParam('weights', params, grid, force=False)
    info("Param: weights = {0}".format(weights), ind=6)

    ## Estimator
    reg = RadiusNeighborsClassifier(algorithm=algorithm,
                                    leaf_size=leaf_size,
                                    metric=metric,
                                    radius=radius,
                                    weights=weights)

    return {"estimator": reg, "params": tuneParams}
Exemple #7
0
    def __init__(self, feature_length, num_classes):
        super().__init__(feature_length, num_classes)
        self.num_classes = num_classes

        ###
        # BUILD YOUR MODEL
	self.model = RadiusNeighborsClassifier(weights='distance', metric='')
Exemple #8
0
def palabra(directorio):
    global X, Y
    words = ''
    neigh = RadiusNeighborsClassifier(radius=0.12)
    neigh.fit(X, Y)
    for filename in os.listdir(directorio):
        word = ' '
        fs, x = wv.read(directorio + '/' + filename)
        n = len(x)
        if ((n / fs) > 0.5):
            spectrum, freqs, t, im = plt.specgram(x[:, 1],
                                                  NFFT=1024,
                                                  Fs=fs,
                                                  sides='onesided')
            for i in range(len(spectrum)):
                point = findForm(
                    np.linspace(0, freqs[len(spectrum[i])] / 1000,
                                len(spectrum[i])), spectrum[i])
                if point[0] != 0 and point[1] != 0:
                    try:
                        print(point)
                        val = neigh.predict([point])
                        char = valores(val[0])
                        print(char)
                        if word[-1] != char:
                            word = word + char
                    except:
                        print('No neighbors found for the given radius')
        words = words + ' ' + word
    return words
def Radius_Neighbors(input_file,Output):
    lvltrace.lvltrace("LVLEntree dans Radius_Neighbors")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    clf = RadiusNeighborsClassifier(n_neighbors=1)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    print "#########################################################################################################\n"
    print "Radius Neighbors Accuracy "
    print "classification accuracy:", metrics.accuracy_score(y, y_pred)
    print "precision:", metrics.precision_score(y, y_pred)
    print "recall:", metrics.recall_score(y, y_pred)
    print "f1 score:", metrics.f1_score(y, y_pred)
    print "\n"
    print "#########################################################################################################\n"
    results = Output+"Raidus_Neighbors_metrics.txt"
    file = open(results, "w")
    file.write("Radius Neighbors estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y)):
        file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1)))
    file.close()
    title = "Radius Neighbors"
    save = Output + "Radius_Neighbors_confusion_matrix.png"
    plot_confusion_matrix(y, y_pred,title,save)
    lvltrace.lvltrace("LVLSortie dans Radius_Neighbors")
def runRNC(X_train, y_train, X_test, R=1.0, weights='uniform', outlier=None):
    # initialize the classifier
    model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier)
    rnc = model.fit(X_train, y_train)
    predictions = rnc.predict(X_test)

    return predictions
Exemple #11
0
    def set_classifier(self, classifier_name):
        """ Setter for clf

        Building instances of classifier objects with corresponding name.

        Parameter
        ---------
        classifier_name : string
            Contains the corresponding name of the wanted classifier from
            sklearn.
        """
        if classifier_name == "svm_linear":
            self.clf = svm.SVC(kernel="linear", class_weight="auto")
        elif classifier_name == "svm_poly":
            self.clf = svm.SVC(kernel="poly", class_weight="auto")
        elif classifier_name == "naive_bayes":
            self.clf = GaussianNB()
        elif classifier_name == "decision_tree":
            self.clf = tree.DecisionTreeClassifier()
        elif classifier_name == "nearest_centroid":
            self.clf = NearestCentroid()
        elif classifier_name == "k_neighbors":
            self.clf = KNeighborsClassifier(n_neighbors=100)
        elif classifier_name == "radius_neighbors":
            self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1)
        else:
            raise ClassifierNotExistException(classifier_name)
Exemple #12
0
 def r_neighbors_classifier(self,
                            n_neighbours=5,
                            weights='uniform',
                            algorithm='auto',
                            leaf_size=30,
                            p=2,
                            metric='minkowski'):
     """
     Classifier implementing the k-nearest neighbors radius vote.
     :param n_neighbours: Number of neighbours to use
     :param weights: Weight function used in prediction, inputs:
                             uniform: All points in each neighborhood are weighted equally.
                             distance: Weight points by the inverse of their distance, closer points will have a greater influence.
     :param algorithm: Algorithm used to compute the nearest neighbors, inputs:
                             ball_tree: Fast generalized N-point problems.
                             KDTree: Euclidean tree of n-dimensions.
                             brute: Brute-force search.
                             auto: Will try to decide the most appropriate algorithm given the fit function.
     :param leaf_size: Leaf size passed to the three. This can affect the computation speed/time.
     :param p: Parameter for the Minkwoski metric.
     :param metric: Distance metric to use for the tree, inputs:
                             euclidean, manhattan, chebyshev, minkwoski, seuclidean, mahalanobis
     :return:probability, conf_matrix
     """
     model = RadiusNeighborsClassifier(n_neighbors=n_neighbours,
                                       weights=weights,
                                       algorithm=algorithm,
                                       leaf_size=leaf_size,
                                       p=p,
                                       metric=metric)
     model.fit(self.__x_train, self.__y_train)
     self.__model = model
Exemple #13
0
    def set_classifier(self, classifier_name):
        """ Setter for clf

        Building instances of classifier objects with corresponding name.

        Parameter
        ---------
        classifier_name : string
            Contains the corresponding name of the wanted classifier from
            sklearn.
        """
        if classifier_name == "svm_linear":
            self.clf = svm.SVC(kernel="linear", class_weight="auto")
        elif classifier_name == "svm_poly":
            self.clf = svm.SVC(kernel="poly", class_weight="auto")
        elif classifier_name == "naive_bayes":
            self.clf = GaussianNB()
        elif classifier_name == "decision_tree":
            self.clf = tree.DecisionTreeClassifier()
        elif classifier_name == "nearest_centroid":
            self.clf = NearestCentroid()
        elif classifier_name == "k_neighbors":
            self.clf = KNeighborsClassifier(n_neighbors=100)
        elif classifier_name == "radius_neighbors":
            self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1)
        else:
            raise ClassifierNotExistException(classifier_name)
Exemple #14
0
 def trainAlgo(self):
     self.model = RadiusNeighborsClassifier(
         radius=self.param['radius'],
         weights=self.param['weights'],
         algorithm=self.param['algorithm'],
         p=self.param['p'])
     y = np.argmax(self.outputData['Y'], axis=1)
     self.model.fit(self.inputData['X'], y)
Exemple #15
0
    def train_model(self, X_train, y_train, modelpath):
        model = RadiusNeighborsClassifier(radius=self.radius,
                                          weights=self.weights,
                                          algorithm=self.algorithm,
                                          p=self.power_param,
                                          outlier_label=self.outlier_label)

        model.fit(X_train, y_train)
        self.save_model(model, modelpath)
        return model
class Adaptive_KNN_Model(IMachineLearning):
    """
    This class performs Adaptive K-Nearest-Neighbor
    """
    def __init__(self,
                 regression=True,
                 radius=1.0,
                 weights='distance',
                 algorithm='auto',
                 leaf_size=30,
                 p=2,
                 metric='minkowski',
                 outlier_label=None,
                 metric_params=None):
        self._regression = regression
        self._radius = radius
        self._weights = weights
        self._algorithm = algorithm
        self._leaf_size = leaf_size
        self._p = p
        self._metric = metric
        self._metric_params = metric_params
        self._outlier_label = outlier_label
        if regression:
            self._model = RadiusNeighborsRegressor(radius, weights, algorithm,
                                                   leaf_size, p, metric,
                                                   metric_params)
        else:
            self._model = RadiusNeighborsClassifier(radius, weights, algorithm,
                                                    leaf_size, p, metric,
                                                    metric_params)
        return super().__init__()

    def train(self, xData, yData):
        ## check input ##
        if not isinstance(xData, pd.DataFrame):
            raise ValueError('Invalid xData')

        if not isinstance(yData, pd.DataFrame) and not isinstance(
                yData, Series):
            raise ValueError('Invalid yData')
        ## train SVM ##
        self._xData = xData
        self._yData = yData
        self._model = self._model.fit(self._xData, self._yData)

    def predict(self, xData):
        ## check input ##
        if isinstance(xData, str):
            raise ValueError('Invalid Argument')
        if not isinstance(xData, pd.DataFrame):
            raise ValueError("Invalid Argument")
        ## predict ##
        self._prd = self._model.predict(xData)
        return self._prd
Exemple #17
0
    def clusterFacetSamplesRNN(self, reduceRadius=3):
        """
        cluster the samples of each facet using radius nearest neighbours
        the cluster center and their correspondent normals will be saved
        in self.objsamplepnts_refcls and self.objsamplenrmals_refcls

        :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed
        :return: None

        author: weiwei
        date: 20161130, osaka
        """

        self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0], ),
                                               dtype=np.object)
        self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0], ),
                                                dtype=np.object)
        for i, facet in enumerate(self.facets):
            # print "cluster"
            # print i,len(self.facets)
            self.objsamplepnts_refcls[i] = []
            self.objsamplenrmls_refcls[i] = []
            X = self.objsamplepnts_ref[i]
            nX = X.shape[0]
            if nX > 0:
                neigh = RadiusNeighborsClassifier(radius=1.0)
                neigh.fit(X, range(nX))
                neigharrays = neigh.radius_neighbors(X,
                                                     radius=reduceRadius,
                                                     return_distance=False)
                delset = set([])
                for j in range(nX):
                    if j not in delset:
                        self.objsamplepnts_refcls[i].append(np.array(X[j]))
                        self.objsamplenrmls_refcls[i].append(
                            np.array(self.objsamplenrmls_ref[i][j]))
                        # if self.objsamplepnts_refcls[i].size:
                        #     self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        # else:
                        #     self.objsamplepnts_refcls[i] = np.array([])
                        #     self.objsamplenrmls_refcls[i] = np.array([])
                        #     self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        delset.update(neigharrays[j].tolist())
            if self.objsamplepnts_refcls[i]:
                self.objsamplepnts_refcls[i] = np.vstack(
                    self.objsamplepnts_refcls[i])
                self.objsamplenrmls_refcls[i] = np.vstack(
                    self.objsamplenrmls_refcls[i])
            else:
                self.objsamplepnts_refcls[i] = np.empty(shape=(0, 0))
                self.objsamplenrmls_refcls[i] = np.empty(shape=(0, 0))
def knnClassifier():
    trainData, trainLabel = featureArray(conf['train']['feature_vector'])
    testData, testLabel = featureArray(conf['test']['feature_vector'])

    neigh = KNeighborsClassifier(n_neighbors=1, algorithm='auto', p=2)
    neigh.fit(trainData, trainLabel)
    print(neigh.score(testData,testLabel))


    neighRadius = RadiusNeighborsClassifier(radius=500, weights='distance',algorithm='auto', p=2,metric='minkowski')
    neighRadius.fit(trainData, trainLabel)
    print(neighRadius.score(testData, testLabel))
Exemple #19
0
class r07525032_RadiusNeighbors(classification):
    def trainAlgo(self):
        self.model = RadiusNeighborsClassifier(
            radius=self.param['radius'],
            weights=self.param['weights'],
            algorithm=self.param['algorithm'],
            p=self.param['p'])
        y = np.argmax(self.outputData['Y'], axis=1)
        self.model.fit(self.inputData['X'], y)

    def predictAlgo(self):
        self.result['Y'] = self.model.predict(self.inputData['X'])
        self.result['Y'] = to_categorical(self.result["Y"])
class _RadiusNeighborsClassifierImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemple #21
0
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters):
    initial_classifier = RadiusNeighborsClassifier(
        n_jobs=-1, radius=parameters["radius"], metric=parameters["distance"])

    cputime_start_train = time.process_time()
    classifier = initial_classifier.fit(X_train, y_train)
    cputime_end_train = time.process_time()

    cputime_start_test = time.process_time()
    y_pred = classifier.predict(X_test)
    cputime_end_test = time.process_time()

    accuracy = classifier.score(X_test, y_test)

    return accuracy, cputime_end_train - cputime_start_train, cputime_end_test - cputime_start_test
Exemple #22
0
def knnClassifier():
    trainData, trainLabel = featureArray(conf['train']['feature_vector'])
    testData, testLabel = featureArray(conf['test']['feature_vector'])

    neigh = KNeighborsClassifier(n_neighbors=1, algorithm='auto', p=2)
    neigh.fit(trainData, trainLabel)
    print(neigh.score(testData, testLabel))

    neighRadius = RadiusNeighborsClassifier(radius=500,
                                            weights='distance',
                                            algorithm='auto',
                                            p=2,
                                            metric='minkowski')
    neighRadius.fit(trainData, trainLabel)
    print(neighRadius.score(testData, testLabel))
Exemple #23
0
    def clusterFacetSamplesRNN(self, reduceRadius=3):
        """
        cluster the samples of each facet using radius nearest neighbours
        the cluster center and their correspondent normals will be saved
        in self.objsamplepnts_refcls and self.objsamplenrmals_refcls

        :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed
        :return: None

        author: weiwei
        date: 20161130, osaka
        """

        self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object)
        self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0],), dtype=np.object)
        for i, facet in enumerate(self.facets):
            # print "cluster"
            # print i,len(self.facets)
            self.objsamplepnts_refcls[i] = []
            self.objsamplenrmls_refcls[i] = []
            X = self.objsamplepnts_ref[i]
            nX = X.shape[0]
            if nX > 0:
                neigh = RadiusNeighborsClassifier(radius=1.0)
                neigh.fit(X, range(nX))
                neigharrays = neigh.radius_neighbors(X, radius=reduceRadius, return_distance=False)
                delset = set([])
                for j in range(nX):
                    if j not in delset:
                        self.objsamplepnts_refcls[i].append(np.array(X[j]))
                        self.objsamplenrmls_refcls[i].append(np.array(self.objsamplenrmls_ref[i][j]))
                        # if self.objsamplepnts_refcls[i].size:
                        #     self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        # else:
                        #     self.objsamplepnts_refcls[i] = np.array([])
                        #     self.objsamplenrmls_refcls[i] = np.array([])
                        #     self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        delset.update(neigharrays[j].tolist())
            if self.objsamplepnts_refcls[i]:
                self.objsamplepnts_refcls[i] = np.vstack(self.objsamplepnts_refcls[i])
                self.objsamplenrmls_refcls[i] = np.vstack(self.objsamplenrmls_refcls[i])
            else:
                self.objsamplepnts_refcls[i] = np.empty(shape=(0,0))
                self.objsamplenrmls_refcls[i] = np.empty(shape=(0,0))
    def start(self):
        """ 01. Initialise the data paths and transformation functions.  """
        self.data_dir = '../data/raw_data'
        self.trans_primitives = ['weekday', 'hour', 'time_since_previous']
        self.agg_primitives = [
            'mean', 'max', 'min', 'std', 'count', 'percent_true', 'last',
            'time_since_last', 'mode'
        ]
        self.ignore_cols = [
            'num_contacts', 'num_referrals', 'num_successful_referrals'
        ]
        self.feature_windows = [10, 30, 60, 90]  #[10,20,30]
        self.max_feature_depth = 2

        # list of estimators to use
        self.estimators = [
            ('cbc', CatBoostClassifier()), ('lgbmc', LGBMClassifier()),
            ('gbc',
             GradientBoostingClassifier(validation_fraction=0.15,
                                        n_iter_no_change=50)),
            ('et', ExtraTreeClassifier()), ('abc', AdaBoostClassifier()),
            ('rfc', RandomForestClassifier()), ('bc', BaggingClassifier()),
            ('etc', ExtraTreesClassifier()), ('gnb', GaussianNB()),
            ('mlpc', MLPClassifier()), ('gpc', GaussianProcessClassifier()),
            ('dtc', DecisionTreeClassifier()),
            ('qda', QuadraticDiscriminantAnalysis()),
            ('lr', LogisticRegression()), ('knn3', KNeighborsClassifier(3)),
            ('knn6', KNeighborsClassifier(6)),
            ('knn12', KNeighborsClassifier(12)), ('nc', NearestCentroid()),
            ('rnc', RadiusNeighborsClassifier()), ('lp', LabelPropagation()),
            ('pac', PassiveAggressiveClassifier()), ('rc', RidgeClassifier()),
            ('sgdc', SGDClassifier()), ('svg', SVC()),
            ('ngbc', NGBClassifier(Dist=Bernoulli))
        ]
        self.next(self.load_raw_data)
Exemple #25
0
    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:, 0].min(), xy[:, 0].max()
        y_min, y_max = xy[:, 1].min(), xy[:, 1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:, 0], xy[:, 1], c=self._label, vmin=0)
    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:,0].min(), xy[:,0].max()
        y_min, y_max = xy[:,1].min(), xy[:,1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
Exemple #27
0
def knnClassifier(xTrain, yTrain, xTest, yTest):

    # Create KNeighbor & RadiusNeighbor Classifiers
    knnKNeighbors = KNeighborsClassifier()
    knnRadiusNeighbors = RadiusNeighborsClassifier()

    # Fit data
    knnKNeighbors.fit(xTrain, yTrain)
    knnRadiusNeighbors.fit(xTrain, yTrain)

    # Find matches between predicted & actual values
    matchesKNeighbors = [i for i,j in zip(knnKNeighbors.predict(xTest), yTest) if i == j]
    matchesRadiusNeighbors = [i for i,j in zip(knnKNeighbors.predict(xTest), yTest) if i == j]

    print "Accuracy of KNeighbors: ", (float(len(matchesKNeighbors))/len(yTest)) * 100
    print "Accuracy of RadiusNeighbors: ", (float(len(matchesRadiusNeighbors))/len(yTest)) * 100
 def test_model_knn_iris_classifier_multi_reg2_weight_radius(self):
     iris = datasets.load_iris()
     X = iris.data.astype(numpy.float32)
     y = iris.target.astype(numpy.int64)
     y = numpy.vstack([(y + 1) % 2, y % 2]).T
     model = RadiusNeighborsClassifier(
         algorithm='brute', weights='distance')
     model.fit(X[:13], y[:13])
     onx = to_onnx(model, X[:1],
                   options={id(model): {'optim': 'cdist',
                                        'zipmap': False}},
                   target_opset=TARGET_OPSET)
     dump_data_and_model(
         X.astype(numpy.float32)[:11],
         model, onx,
         basename="SklearnRadiusNeighborsClassifierMReg2-Out0")
Exemple #29
0
def train_model_for_prediction(path_to_csv,
                               path_to_json_dir,
                               company,
                               department,
                               classifier_id='random_forest',
                               needs_type='manual',
                               ratio_cleaner_val=None,
                               random_state=None,
                               remove_ratios=False,
                               remove_needs=False):
    """
    Trains the specified model to make predictions to be used in production.
    :param path_to_csv: String. Path to csv file of all the shifts.
    :param path_to_json_dir: String. Path to a directory containing all json schedules.
    :param company: String. Name of the company.
    :param department: String. Name of the department in the company.
    :param classifier_id: String. The classifier to use for training and prediction. 'random_forest', 'k_neighbors', or
     'radius_neighbors'.
    :param needs_type: String. Specifies type of needs to use. 'manual', 'avg', or 'median'.
    :param ratio_cleaner_val: The cutoff point for ratios to be removed from the data. The max, 7.0, removes all ratios.
    The min, 0.0 removes none. Recommended values to experiment with are 2.0, 1.5, and 1.0. Use None to ignore this.
    :param random_state: Int. Seed to remember the split of the data in StratifiedShuffleSplit.
    :param remove_ratios: Bool. Whether or not the remove the ratios from the features.
    :param remove_needs: Bool. Whether or not the remove the needs from the features.
    :return: Tuple of an instance of a trained RandomForestClassifier, KNeighborsClassifier, or
    RadiusNeighborsClassifier, AND the interpretation_keys.
    """
    (prepared_train_features, prepared_train_targets, prepared_test_features,
     prepared_test_targets, interpretation_keys) = prepare_features_targets(
         path_to_csv,
         path_to_json_dir,
         company,
         department,
         needs_type=needs_type,
         ratio_cleaner_val=ratio_cleaner_val,
         random_state=random_state,
         remove_ratios=remove_ratios,
         remove_needs=remove_needs)

    if classifier_id == 'random_forest':
        classifier = RandomForestClassifier(n_estimators=500,
                                            max_leaf_nodes=16,
                                            n_jobs=1)
    elif classifier_id == 'k_neighbors':
        classifier = KNeighborsClassifier()
    elif classifier_id == 'radius_neighbors':
        classifier = RadiusNeighborsClassifier()
    else:
        raise ValueError(
            'Invalid classifier_id specified:', classifier_id + '.',
            'Must be of type \'random_forest\', \'k_neighbors\', or \'radius_neighbors\'.'
        )

    classifier.fit(prepared_train_features, prepared_train_targets)

    # run the test data through it to gauge effectiveness.
    test_predictions = classifier.predict(prepared_test_features)
    print_model_analysis(test_predictions, prepared_test_targets)

    return classifier, interpretation_keys
Exemple #30
0
def run_main():
	data = load_files(PARAM.data_path, encoding="utf-8")
	X, y, y_names = clean_docs(data.data, True), data.target, data.target_names
	vectorizer = tfidf_vectorize(X, max_features = PARAM.max_features, min_df = PARAM.min_df, max_df = PARAM.max_df, analyzer = PARAM.analyzer, ngram_range = (1, 2))
	X_train, X_test, y_train, y_test = train_test_split(vectorizer.transform(X).toarray(), y, test_size = 0.2, random_state = 0)
	
	if PARAM.classifier == 'knn':
		classifier = KNeighborsClassifier(n_neighbors = PARAM.n_neighbors, weights = PARAM.weight_func)
	elif PARAM.classifier == 'rnn':
		classifier = RadiusNeighborsClassifier(radius = PARAM.radius)
	elif PARAM.classifier == 'nc':
		classifier = NearestCentroid()
	else:
		raise ValueError("[%s] not supported" % (PARAM.classifier))

	classifier.fit(X_train, y_train)
	y_pred = classifier.predict(X_test)

	print_eval(y_test, y_pred)
	if PARAM.model_path:
		model_map = {}
		model_map['clf_name'] = PARAM.classifier
		model_map['accuracy'] = get_accuracy(y_test, y_pred)
		model_map['vectorizer'] = vectorizer
		model_map['classifier'] = classifier
		save_model_map(model_map, PARAM.model_path)
Exemple #31
0
 def __init__(self, method, n_neighbors, weights, radius):
     if method == 'knn_class':
         self.clf = neighbors.KNeighborsClassifier(n_neighbors,
                                                   weights=weights)
     elif method == 'knn_rad':
         self.clf = RadiusNeighborsClassifier(radius=radius)
     elif method == 'knn_cent':
         self.clf = NearestCentroid()
Exemple #32
0
 def nncut_proc(distance, dt, dr, type):
     if dt.shape[0] == 0:
         return [dt, dr]
     nbrs = RadiusNeighborsClassifier().fit(
         dt,
         np.zeros_like(dr).reshape(dt.shape[0], ))
     colcnt = dt.shape[1]
     middle = nbrs.radius_neighbors(np.zeros(colcnt).reshape(1, colcnt),
                                    distance,
                                    return_distance=False)
     if type == 'inner':
         dt = dt.drop(dt.index[np.asarray(middle[0])])
         dr = dr.drop(dr.index[np.asarray(middle[0])])
     if type == 'outer':
         dt = dt[dt.index.isin(dt.index[np.asarray(middle[0])])]
         dr = dr[dr.index.isin(dr.index[np.asarray(middle[0])])]
     return [dt, dr]
Exemple #33
0
 def set_up_radius(self):
     self.classifier = RadiusNeighborsClassifier(radius=self.radius)
     self.classifier.fit(self.X_train, self.y_train)
     y_pred = self.classifier.predict(self.X_test)
     print("confusion_matrix: \n")
     print(confusion_matrix(self.y_test, y_pred))
     print("classification_report\n")
     return classification_report(self.y_test, y_pred)
Exemple #34
0
    def __init__(self, name, **kwargs):
        from sklearn.neighbors import RadiusNeighborsClassifier

        def signal_proba(model, X):
            return RadiusNeighborsClassifier.predict_proba(model, X)[:, 1]

        self._prob_func = signal_proba
        self._range = [0, 1]
        super().__init__(name, RadiusNeighborsClassifier(**kwargs))
Exemple #35
0
 def radiusNeighborsClassification(self,X_train,y_train):
     """Method to train a radius nearest neighbor classifier
     
     Parameters
     ----------
     X_train: Array shape [n_samples,n_features] for training the model with features
     y_train: Array shape[n_samples] for training the model with features of that target
     
     Returns
     ---------
     model: The trained nearest neighbor model."""
     
     #Initialize the constructor
     model = RadiusNeighborsClassifier(radius=1.0, weights = 'uniform' )
     #fit the model with training data
     model.fit(X_train,y_train)
     
     return model
Exemple #36
0
def get_classifiers():
    # basic classifires
    dc = DummyClassifier(random_state=0)
    lr = LogisticRegression()
    gnb = GaussianNB()
    svc = LinearSVC(C=1)
    C0 = {
        "name":
        "Basic",
        "methods": [(dc, "Dummy"), (lr, "Logit"), (gnb, "Naive Bayes"),
                    (svc, "SVC")]
    }

    # decission trees
    dec_tree = DecisionTreeClassifier(random_state=0)
    etc_tree = ExtraTreeClassifier(random_state=0)
    C1 = {
        "name": "Decision Tree",
        "methods": [(dec_tree, "Decision Tree"), (etc_tree, "Extra Tree")]
    }

    # NN classifirer
    knn = KNeighborsClassifier(n_neighbors=25, weights="distance")
    rnn = RadiusNeighborsClassifier(radius=20.0, outlier_label=1)
    nc = NearestCentroid()
    C2 = {
        "name": "Nearest Neighbors",
        "methods": [(knn, "KNN"), (rnn, "Radius NN"), (nc, "Nearest Centroid")]
    }

    # ensamble models
    ada = AdaBoostClassifier()
    bg = BaggingClassifier(n_estimators=50, max_features=3)
    etsc = ExtraTreesClassifier(n_estimators=50, criterion="entropy")
    gb = GradientBoostingClassifier(max_depth=5, random_state=0)
    rfc = RandomForestClassifier(n_estimators=100)
    C3 = {
        "name":
        "Ensemble",
        "methods": [(ada, "Ada Boost"), (bg, "Bagging"), (etsc, "Extra Trees"),
                    (gb, "Gradient Boosting"), (rfc, "Random Forest")]
    }

    # discriminant analysis & GPC
    lda = LinearDiscriminantAnalysis()
    qda = QuadraticDiscriminantAnalysis()
    C4 = {
        "name": "Discriminant Analysis",
        "methods": [(lda, "LDA"), (qda, "QDA")]
    }

    # neural net
    nn = MLPClassifier(alpha=0.1, tol=1e-8)
    C5 = {"name": "Complex Architecture", "methods": [(nn, "Neural Network")]}

    CLF = [C0, C1, C2, C3, C4, C5]
    return CLF
def Radius_Neighbors(input_file,Output,test_size):
    lvltrace.lvltrace("LVLEntree dans radius_kneighbors split_test")
    try:
        ncol=tools.file_col_coma(input_file)
        data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
        X = data[:,1:]
        y = data[:,0]
        n_samples, n_features = X.shape
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
        print X_train.shape, X_test.shape
        clf = RadiusNeighborsClassifier(radius=0.001, weights='uniform', algorithm='auto')
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        print "Radius Neighbors accuracy "
        print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
        print "precision:", metrics.precision_score(y_test, y_pred)
        print "recall:", metrics.recall_score(y_test, y_pred)
        print "f1 score:", metrics.f1_score(y_test, y_pred)
        print "\n"
        results = Output+"Raidus_Neighbors_metrics_test.txt"
        file = open(results, "w")
        file.write("Radius Neighbors estimator accuracy\n")
        file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
        file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
        file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
        file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
        file.write("\n")
        file.write("True Value, Predicted Value, Iteration\n")
        for n in xrange(len(y_test)):
            file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
        file.close()
        title = "Radius Neighbors %f"%test_size
        save = Output + "Radius_Neighbors_confusion_matrix"+"_%s.png"%test_size
        plot_confusion_matrix(y_test, y_pred,title,save)
    except (ValueError):
        results = Output+"Raidus_Neighbors_metrics_test.txt"
        file = open(results, "w")
        file.write("In configuration.py file:  No neighbors found for test samples, you can try using larger radius, give a label for outliers, consider or removing them from your dataset.")
        file.close()
    lvltrace.lvltrace("LVLSortie dans radius_kneighbors split_test")
def main():
    X_train_all, t_train_all, train_all_ids = create_data_matrix(0, 3086, TRAIN_DIR)
    X_train, X_valid, t_train, t_valid = train_test_split(X_train_all, t_train_all, test_size=0.20, random_state=37)
    X_test_all, t_test_all, test_all_ids = create_data_matrix(0, 3724, TEST_DIR)

    sv = svm.SVC(kernel='poly')
    sv.fit(X_train, t_train)
    print "SVM Score was: %f" % clf.score(X_valid, t_valid)

    rf = RandomForestClassifier(n_estimators=30, min_samples_split=1, random_state=37)
    rf.fit(X_train, t_train)
    print "RandomForest Score was: %f" % (rf.score(X_valid, t_valid))

    lr = LogisticRegression(penalty='l2',solver='newton-cg',max_iter=500)
    lr.fit(X_train, t_train)
    print "LogisticRegression Score was: %f" % (lr.score(X_valid, t_valid))

    clf = GaussianNB()
    clf.fit(X_train, t_train)
    print "GaussianNB Score was: %f" % (clf.score(X_valid, t_valid))

    nn = KNeighborsClassifier(n_neighbors=6, weights='uniform')
    nn.fit(X_train, t_train)
    score = nn.score(X_valid, t_valid)
    print "KNeighbors Score was: %f" % (score)

    rnc = RadiusNeighborsClassifier(radius=6,outlier_label=8, p=2)
    rnc.fit(X_train, t_train)
    print "RadiusNeighbors Score was: %f" % (rnc.score(X_valid, t_valid))

    # Get predictions
    rf = RandomForestClassifier(n_estimators=30, min_samples_split=1)
    rf.fit(X_train_all, t_train_all)
    test_predictions = rf.predict(X_test_all)

    write_to_file("prediction.csv", test_all_ids, test_predictions)
import window_s_p_ft as win
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.cross_validation import train_test_split


total_score = 0
stop = 1000
for x in range(stop):
    clf = RadiusNeighborsClassifier(radius=100.0)
    data = win.getStudents()
    data_train, data_test = train_test_split(data, test_size=0.2)
    data_train_labels = [s.spec for s in data_train]
    data_test_labels = [s.spec for s in data_test]
    data_train = [s.grades for s in data_train]
    data_test = [s.grades for s in data_test]
    clf.fit(data_train, data_train_labels)
    total_score += clf.score(data_test, data_test_labels)
total_score = total_score / stop
print('all')
print(total_score)

specs = ['FK', 'FM', 'MN', 'OE']
for sp in specs:
    total_score = 0
    for x in range(stop):
        clf = RadiusNeighborsClassifier(radius=100.0)
        data = win.getStudents()
        data_train, data_test = train_test_split(data, test_size=0.2)
        data_train_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_train]
        data_test_labels = [s.spec if s.spec == sp else 'NOT ' + sp for s in data_test]
        data_train = [s.grades for s in data_train]
Exemple #40
0
def par(X_tr, y_tr, X_te, r):
    neigh = RadiusNeighborsClassifier(radius = r)
    neigh.fit(X_tr, y_tr)
    y_pred = neigh.predict(X_te)
    return y_pred
def knn_classifier_Radius(X_train, categories, X_test, test_categories):
    from sklearn.neighbors import RadiusNeighborsClassifier
    clf = RadiusNeighborsClassifier(outlier_label= 0).fit(X_train, categories)
    y_rknn_predicted = clf.predict(X_test)
    print "\n Here is the classification report for RadiusNeighborsClassifier classifier:"
    print metrics.classification_report(test_categories, y_rknn_predicted)
Exemple #42
0
class Model(object):
    """
    Text-classification-system with scikit-learn.
    For reference see: http://scikit-learn.org/stable/

    This Model class is based on Data class. Defines training
    and test data. Build classification model. Provides
    evaluation methods.

    Parameter
    ---------
    data : Data, optional
        Contains a data object with filled data.real_data.

    data_list : array, shape = [data1 object, data2 object, ...]
        Contains data objects with filled data.real_data.

    Attributes
    ----------
    clf : classifier object from sklean moduls.
        Contains a selected classifier object from sklean modul.
        see reference: http://scikit-learn.org/stable/supervised_learning.html#supervised-learning

    classifier_list : array, shape = [string classifier1 name, ...]
        Contains names of all available classification algorithms.

    __train_data_set : boolean
        Contains bolloean value that describes if train_data is set.

    train_data : Data
        Contains the data object that is set as training data.

    test_data : Data
        Contains the data object that is set as test data.

    train_targets : numpy array of shape [n_samples]
        Contains the class labels of training data. A sample is
        a textpair object, it's class label is found in textpair.target.

    train_samples : numpy array of shape [n_samples,n_features]
        Contains the feature values of the training data. A sample is
        a textpair object, it's feature values are found in textpair.features
        hash. After vectorize() them, they are stored in
        textpair.feature_vector.

    test_targets : numpy array of shape [n_samples]
        Contains the class labels of test data. A sample is
        a textpair object, it's class label is found in textpair.target.

    test_samples : numpy array of shape [n_samples,n_features]
        Contains the feature values of the test data. A sample is
        a textpair object, it's feature values are found in textpair.features
        hash. After vectorize() them, they are stored in
        textpair.feature_vector.
    """

    def __init__(self, data=None, data_list=None):
        self.clf = None

        if data is not None:
            self.data_list = [data]
        elif data_list is not None:
            self.data_list = data_list

        self.classifier_list = ["svm_linear", "svm_poly", "naive_bayes", "decision_tree", "nearest_centroid",
                                "k_neighbors", "radius_neighbors"]

        self.__train_data_set = False

    def set_train_data(self, data_name):
        """Setter for training data

        Walk through data_list and set data object with
        data.name as train_data.

        Parameter
        ---------
        data_name : string
            Contains the name of the data object, that should
            be set as train_data for the model.
        """
        data_in_list = False
        for data in self.data_list:
            if data.name == data_name:
                print data_name + " is in model_data_list"
                self.train_data = data
                self.train_samples, self.train_targets = self.fill_feature_target(data)
                print data_name + " is set as train_data"
                data_in_list = True
        if data_in_list:
            self.__train_data_set = True
        else:
            print data_name + " not in model_data_list "

    def set_test_data(self, data_name):
        """Setter for test data

        Walk through data_list and set data object with
        data.name as test_data.

        Notes
        -----
        Training data has to be set before test data, due to the fact
        that some features need skeletons that have to be build before seeing
        the test data.

        see reference: bag_of_pos.py, bag_of_words.py, tf_idf.py

        Parameter
        ---------
        data_name : string
            Contains the name of the data object, that should
            be set as test_data for the model.
        """
        if self.__train_data_set and self.train_data.name == data_name:
            self.test_data = self.train_data
            print "train_data and test_data from one data_set"
        elif not self.__train_data_set:
            print "please set train_data first"
        else:
            data_in_list = False
            for data in self.data_list:
                if data.name == data_name:
                    print data_name + " is in model_data_list"
                    self.test_data = data
                    self.test_samples, self.test_targets = self.fill_feature_target(data)
                    data_in_list = True
                    print data_name + " is set as test_data"
            if not data_in_list:
                print data_name + " not in model_data_list "

    def fill_feature_target(self, data):
        """ Fill the feature samples and target values.

        The classifier objects from sklearn need a numpy array for
        classification.

        Shape of the data class labels : numpy array of shape [n_samples]
        Shape of the data feature values : numpy array of shape [n_samples,n_features]

        Vectorize() textpair feature values, for building required numpy arrays.

        Note
        ----
        Check __train_data_set first, cause there is no need to attache the
        same features for test data manually in main.py. This will be performed
        automatically in here.

        Parameter
        ---------
        data : Data
            Contains a Data object that data.real_data should be vectorized.
        """
        sample_list = []
        target_list = []

        if self.__train_data_set:
            for feature in self.train_data.features_fit:
                if feature == "bag_of_words" or feature == "bag_of_pos" or feature == "tf_idf":
                    data.bow_model = self.train_data.bow_model

            print self.train_data.features_fit
            data.attach_feature_list(self.train_data.features_fit)

            for textpair in data.real_data.values():
                textpair.vectorize()
                target_list.append(textpair.target)
                sample_list.append(textpair.feature_vector)

            return np.array(sample_list), np.array(target_list)
        else:
            for textpair in data.real_data.values():
                textpair.vectorize()
                target_list.append(textpair.target)
                sample_list.append(textpair.feature_vector)

            return np.array(sample_list), np.array(target_list)

    def set_classifier(self, classifier_name):
        """ Setter for clf

        Building instances of classifier objects with corresponding name.

        Parameter
        ---------
        classifier_name : string
            Contains the corresponding name of the wanted classifier from
            sklearn.
        """
        if classifier_name == "svm_linear":
            self.clf = svm.SVC(kernel="linear", class_weight="auto")
        elif classifier_name == "svm_poly":
            self.clf = svm.SVC(kernel="poly", class_weight="auto")
        elif classifier_name == "naive_bayes":
            self.clf = GaussianNB()
        elif classifier_name == "decision_tree":
            self.clf = tree.DecisionTreeClassifier()
        elif classifier_name == "nearest_centroid":
            self.clf = NearestCentroid()
        elif classifier_name == "k_neighbors":
            self.clf = KNeighborsClassifier(n_neighbors=100)
        elif classifier_name == "radius_neighbors":
            self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1)
        else:
            raise ClassifierNotExistException(classifier_name)

    def train(self, fraction):
        """ Train the model

        Training the classifier with the wanted fraction of the training data.

        Parameter
        -------
        fraction : int
            Contains a number from 0 to 100. Defines the fraction of the
            training data that will be used for training the classifier.
        """
        if self.clf is None:
            raise NoClassifierException
        elif self.train_targets.size == 0 and self.train_samples.size == 0:
            raise EmptyFeaturesEmptyTargetsException
        else:
            count = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0))
            self.clf.fit(self.train_samples[:count], self.train_targets[:count])

    def predict(self, sample):
        """ Predict a given sample.

        Make a prediction for a given sample. Classifier needs a numpy array
        with the feature values of a sample.

        Note
        ----
        Requires a trained(fitted) model.

        Parameters
        ----------
        samples : numpy array of shape [n_samples,n_features]

        Returns
        -------
        self.clf.predict(sample) : int
            Contains the prediction value from the model. It is the predicted
            class label. For a textpair object it can be 0 or 1.
        """
        if self.clf is None:
            raise NoClassifierException
        elif self.test_targets.size == 0 and self.test_samples.size == 0:
            raise EmptyFeaturesEmptyTargetsException
        else:
            return self.clf.predict(sample)

    def evaluate_cross_validation(self, folds):
        """ Evaluation through a cross-validation

        Perform a cross-validation on the set training data
        with measured accuracy.
        It requires a given number of folds.

        Note
        ----
        cross validation is performed on the training data, not
        on the test data. So set your data as training data, if you
        want to perform a cross validation.

        Parameter
        ---------
        folds : int
            Contains the number of folds for the cross-validation.

        Returns
        -------
        accuracy_list : array, shape = [float acc score1, float acc score2, ...]
            Contains the accuracy scores of all iterations.

        acc_mean : float
            Contains the accuracy mean of the all iterations.
        """
        if self.clf is None:
            raise NoClassifierException

        elif self.train_targets.size == 0 and self.train_samples.size == 0:
            raise EmptyFeaturesEmptyTargetsException

        elif folds > len(self.train_samples):
            raise FoldSizeToBigException(folds, self.train_samples)

        else:
            kf = KFold(len(self.train_samples), n_folds=folds)
            accuracy_list = []

            for train, test in kf:
                x_train, x_test, y_train, y_test = self.train_samples[train], self.train_samples[test], \
                                                   self.train_targets[train], self.train_targets[test]

                self.clf.fit(x_train, y_train)
                accuracy_list.append(accuracy_score(np.array(y_test), np.array(self.clf.predict(x_test))))

            n = 0
            sum_values = 0

            for acc_value in accuracy_list:
                sum_values = sum_values + acc_value
                n += 1

            acc_mean = (sum_values / n)

            return accuracy_list, acc_mean

    def evaluate_classification_report(self, fraction):
        """ A detailed classification report

        For an easy use to measure how well your trained model performs,
        the given method uses your set data objects and gives an accuracy
        score output on the shell.

        Note
        ----
        There are two scenarios :

            1. training data and test data are from the same data object.
                (means there names are the same !)
                - Normalization
            2. training data and test data are from different data objects.
                + Normalization

        The first scenario will use given fraction and divide the training
        data in train and test data for the classification. If fraction is
        100 then it will be trained and tested on the same data object.
        With a number of 80 fraction it will be trained on 80 percent and
        tested on 20 percent of the given data object. There is no
        Normalization for this scenario implemented !

        The second scenario needs a number of 100 fraction, to use the
        whole training data for the training ! Working with normalized
        values.

        Parameter
        ---------
        fraction : int
            Contains a number from 0 to 100. Defines the fraction of the
            training data that will be used for training the classifier.
        """
        if self.clf is None:
            raise NoClassifierException

        elif self.train_targets.size == 0 and self.train_samples.size == 0:
            raise EmptyFeaturesEmptyTargetsException

        else:
            # if trained on 100 % fraction, it will be tested on 100 %
            # fraction, than train and test data are the same

            # if count_predict is 0 (with 100% count_train), than
            # self.targets[-count_predict:] == self.targets[:] = True
            if self.test_data.name == self.train_data.name:

                print "train_data and test_data from one data_set"
                count_train = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0))
                count_predict = len(self.train_targets) - count_train

                print "count_train:", count_train
                print "count_predict:", count_predict

                # Summarize placed in here, cause data objects are equal and
                # dived in this method. So training and test data are defined
                # in here.
                print "##########train_data summarize##########"
                summarize_textpair(self.train_data.real_data.values()[:count_train])

                print "##########test_data summarize##########"
                summarize_textpair(self.train_data.real_data.values()[-count_predict:])

                # setting train and test data
                train_samples = self.train_samples[:count_train]
                train_targets = self.train_targets[:count_train]
                test_samples = self.train_samples[-count_predict:]
                test_targets = self.train_targets[-count_predict:]

                # Training
                self.clf.fit(train_samples, train_targets)

                # Testing
                test_targets_predicted = self.clf.predict(test_samples)

                # calculating baseline
                null = 0
                eins = 0
                for i in test_targets:
                    if i == 0:
                        null += 1
                    else:
                        eins += 1
                if null > eins:
                    baseline = float(null)/(float(null)+float(eins))
                else:
                    baseline = float(eins)/(float(null)+float(eins))

                print "Anzahl 0:", null
                print "Anzahl 1:", eins
                print "Baseline:", baseline
                print "-------------------------------"

                # Calculating accuracy score of predicted samples
                print "accuracy_score: ", accuracy_score(test_targets, test_targets_predicted)

            else:
                # Normalization
                norma = preprocessing.normalize(self.train_samples)

                count_train = int(round((float(len(self.train_targets)) / float(100)) * float(fraction), 0))
                print "count_train:", count_train
                print "count_predict:", len(self.test_targets)

                # Setting train and test data

                # without normalization take this one instead
                # train_samples = self.train_samples[:count_train]
                train_samples = norma[:count_train]
                train_targets = self.train_targets[:count_train]

                # without normalization take this one instead
                # test_samples = self.test_samples
                test_samples = preprocessing.normalize(self.test_samples)
                test_targets = self.test_targets

                # Training
                self.clf.fit(train_samples, train_targets)

                # Testing
                test_targets_predicted = self.clf.predict(test_samples)

                # Calculating baseline
                null = 0
                eins = 0
                for i in test_targets:
                    if i == 0:
                        null += 1
                    else:
                        eins += 1
                if null > eins:
                    baseline = float(null)/(float(null)+float(eins))
                else:
                    baseline = float(eins)/(float(null)+float(eins))

                print "Anzahl 0:", null
                print "Anzahl 1:", eins
                print "Baseline:", baseline
                print "-------------------------------"

                # Calculating accuracy score of predicted samples
                print "accuracy_score: ", accuracy_score(test_targets, test_targets_predicted)
Exemple #43
0
__author__ = 'Administrator'
from sklearn.neighbors import RadiusNeighborsClassifier
import src.Utils.FeatureExtractor as FE
import src.Utils.Predict as Pre

if __name__ == "__main__":
    temp = FE.ExtractFeatureFile("../../ins/data/dataset1.feature")
    temp1 = FE.ExtractFeatureFile("../../ins/data/dataset2.feature")
    clf = RadiusNeighborsClassifier(radius=1.0)
    print clf.fit(temp[0], temp[1])
    Pre.Predict(clf, temp1)
plt.subplots_adjust(left=0.00, right=1.00, top=.94, bottom=0.00)
plt.title("C: %d, Gamma: %d" %(C,gamma))
#plt.xticks(())
#plt.yticks(())
#plt.axis([-3, 3, -3, 3])


biz['svm_pred']=(biz.expensive>clf.predict(X)).astype(int)
plt.scatter(x=biz[biz.svm_pred==1].X,y=biz[biz.svm_pred==1].Y, s=20, c='g')
print("Prop of expensive businesses seen as gentrifiers [%.2f]" %(biz['svm_pred'].sum()/biz.expensive.sum()))
print("Prop of expensive businesses seen as gentrifiers [%.2f]" %(biz['svm_pred'].sum()/len(biz.expensive)))
#biz['gentrifier']=(biz.expensive>biz.svm_pred).astype(int)
#####################################################################################################################################
################################################################ Nearest Neighbor ###################################################
r=.00025 #A block is .001 and two blocks are .003; therefore, .011 scans about 8 blocks in diameter.
neigh = RadiusNeighborsClassifier(radius=r) #from qGis nneighbor analysis
neigh.fit(X, Y)
predictions=neigh.predict(X)
plt.scatter(X.iloc[:,0], X.iloc[:,1], s=30, c=Y, cmap=plt.cm.Paired); plt.title('True labels')
plt.subplots_adjust(left=0, bottom=0, right=1, top=.95, wspace=0, hspace=0)


plt.figure(); 
plt.scatter(X.iloc[:,0], X.iloc[:,1], s=30, c=predictions, cmap=plt.cm.Paired); plt.title('Predicted labels, rad=%.3f' %r)
plt.subplots_adjust(left=0, bottom=0, right=1, top=.95, wspace=0, hspace=0)


biz['rnn_gentrifier']=(biz.expensive>predictions).astype(int)
plt.scatter(x=biz[biz.rnn_gentrifier==1].X,y=biz[biz.rnn_gentrifier==1].Y, s=20, c='g')
print("Prop of expensive businesses seen as gentrifiers [%.2f]" %((biz.rnn_gentrifier.sum()/biz.expensive.sum())))
print("Prop of expensive businesses seen as gentrifiers [%.2f]" %((biz.rnn_gentrifier.sum()/len(biz.expensive))))
        if self._clasifyData.has_key(coltag):            
            try:
                tag = self._clasifyData[coltag]['neigh'].predict([[screenspace_x,screenspace_y]])
                tag = tag[0]
                self._clasifyData[coltag]['data'][tag] = [screenspace_x,screenspace_y]       
                
            except ValueError:
                return MOCAP_ROGE_DATA   
            return tag
        return MOCAP_ROGE_DATA
    
    def updateBoxesForNextFrame(self):
        for clotag,data in self._clasifyData.items():
            centroids = []
            labels = []
            for tag,centroid in data['data'].items():
                centroids.append(centroid)
                labels.append(tag)
            self._clasifyData[clotag]['neigh'].fit(centroids,labels)            
        
        
             
X = [[229.5, 500.5], [127.0, 497.0]]#[[0,0], [1,1], [2,2], [3,3]]
y = [1, 5]#[5, 1, 3, 4]
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y) 
print(neigh.predict([[229.5, 500.5]]))



def radiusNeighborClassifier():
    maximumValue = 0
    returnParameters = ['0','0']
    for neighbor in xrange(100,1001,100):
        neighAutoRadius = RadiusNeighborsClassifier(radius=neighbor, weights='uniform',algorithm='auto', p=2,metric='minkowski')
        neighAutoRadius.fit(trainData, trainLabel)
        neighDistanceRadius = RadiusNeighborsClassifier(radius=neighbor, weights='distance',algorithm='auto', p=2,metric='minkowski')
        neighDistanceRadius.fit(trainData, trainLabel)
        scoreAuto = neighAutoRadius.score(validationData, validationLabel)
        scoreDistance = neighDistanceRadius.score(validationData, validationLabel)
        if max(scoreAuto,scoreDistance) > maximumValue:
            maximumValue = max(scoreAuto,scoreDistance)
            returnParameters[0] = str(neighbor)
            returnParameters[1] = 'distance' if scoreDistance>scoreAuto else 'uniform'

    neighTest = RadiusNeighborsClassifier(radius=int(returnParameters[0]), weights=returnParameters[1],algorithm='auto', p=2,metric='minkowski')
    neighTest.fit(trainData, trainLabel)
    scoreTest = neighTest.score(testData, testLabel)
    guideToGraph['Radius Neighbor'] = scoreTest
Exemple #47
0
def radius_knn(data, response, rad):
	X_train, X_test, y_train, y_test = train_test_split(data, response)
	neigh = RadiusNeighborsClassifier(radius=rad)
	d = neigh.fit(X_train, y_train).score(X_test, y_test)
	print 'knn radius classifier accuracy: ' + str(d)
Exemple #48
0
        y_train = labels[100:172, i]
        X_test = sample2
        y_test = labels[272:, i]
    else:
        X_train = training
        y_train = labels[:172, i]
        X_test = sampletest
        y_test = labels[172:, i]

    posterior = np.empty([100, 72, 6])
    box = np.zeros([6, 6])
    for j in range(4, 5):
        for k in range(1, 2):
            accuracy = np.zeros(100)
            for m in range(0, 100):
                rnc = RadiusNeighborsClassifier(radius=j, leaf_size=k)
                rnc.fit(X_train, y_train)
                y_pred = rnc.predict(X_test)

                n = 0
                for i in range(0, len(y_pred)):
                    if y_pred[i] == y_test[i]:
                        # print i, y_pred[i], y_test[i]
                        n = n + 1
                        accuracy[m] = accuracy[m] + 1
                    box[y_test[i] - 1, y_pred[i] - 1] = box[y_test[i] - 1, y_pred[i] - 1] + 1
                # posterior[m] =  knc.predict_proba(X_test)
            print j, k, np.mean(accuracy) / 0.72, np.std(accuracy) / 0.72
            # print 30, 20, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0
        """
    means = np.empty([72,6])