def radiusNeighborClassifier():
    maximumValue = 0
    returnParameters = ['0', '0']
    for neighbor in xrange(100, 1001, 100):
        neighAutoRadius = RadiusNeighborsClassifier(radius=neighbor,
                                                    weights='uniform',
                                                    algorithm='auto',
                                                    p=2,
                                                    metric='minkowski')
        neighAutoRadius.fit(trainData, trainLabel)
        neighDistanceRadius = RadiusNeighborsClassifier(radius=neighbor,
                                                        weights='distance',
                                                        algorithm='auto',
                                                        p=2,
                                                        metric='minkowski')
        neighDistanceRadius.fit(trainData, trainLabel)
        scoreAuto = neighAutoRadius.score(validationData, validationLabel)
        scoreDistance = neighDistanceRadius.score(validationData,
                                                  validationLabel)
        if max(scoreAuto, scoreDistance) > maximumValue:
            maximumValue = max(scoreAuto, scoreDistance)
            returnParameters[0] = str(neighbor)
            returnParameters[
                1] = 'distance' if scoreDistance > scoreAuto else 'uniform'

    neighTest = RadiusNeighborsClassifier(radius=int(returnParameters[0]),
                                          weights=returnParameters[1],
                                          algorithm='auto',
                                          p=2,
                                          metric='minkowski')
    neighTest.fit(trainData, trainLabel)
    scoreTest = neighTest.score(testData, testLabel)
    guideToGraph['Radius Neighbor'] = scoreTest
def radius_neighbors_clustering(X_train, X_test, y_train, y_test, parameters,
                                evaluation_metrics):
    # modify parameters to call the clustering algorithm with modified ones, this mainly purposes the distance parameter
    modified_parameters = prepare_parameters(parameters)

    if modified_parameters["distance"] != "mahalanobis":
        initial_classifier = RadiusNeighborsClassifier(
            n_jobs=-1,
            radius=modified_parameters["radius"],
            metric=modified_parameters["distance"],
            p=modified_parameters["minkowski_p"])
    else:
        initial_classifier = RadiusNeighborsClassifier(
            n_jobs=-1,
            radius=modified_parameters["radius"],
            metric=modified_parameters["distance"],
            p=modified_parameters["minkowski_p"],
            algorithm="brute",
            metric_params={"VI": np.linalg.inv(np.cov(X_train))})

    classifier = initial_classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    evaluation_metrics["accuracy"] = classifier.score(X_test, y_test)

    return evaluation_metrics
Example #3
0
def createRadiusNeighborsClassifier(params=None):
    info("Creating Radius Neighbors Classifier", ind=4)
    error("This doesn't work")
    return {"estimator": None, "params": None}

    ## Params
    params = mergeParams(RadiusNeighborsClassifier(), params)
    tuneParams = getRadiusNeighborsClassifierParams()
    grid = tuneParams['grid']

    info("With Parameters", ind=4)
    algorithm = setParam('algorithm', params, grid, force=False)
    info("Param: algorithm = {0}".format(algorithm), ind=6)

    leaf_size = setParam('leaf_size', params, grid, force=False)
    info("Param: leaf_size = {0}".format(leaf_size), ind=6)

    metric = setParam('metric', params, grid, force=False)
    info("Param: metric = {0}".format(metric), ind=6)

    radius = setParam('radius', params, grid, force=False)
    info("Param: radius = {0}".format(radius), ind=6)

    weights = setParam('weights', params, grid, force=False)
    info("Param: weights = {0}".format(weights), ind=6)

    ## Estimator
    reg = RadiusNeighborsClassifier(algorithm=algorithm,
                                    leaf_size=leaf_size,
                                    metric=metric,
                                    radius=radius,
                                    weights=weights)

    return {"estimator": reg, "params": tuneParams}
Example #4
0
def palabra(directorio):
    global X, Y
    words = ''
    neigh = RadiusNeighborsClassifier(radius=0.12)
    neigh.fit(X, Y)
    for filename in os.listdir(directorio):
        word = ' '
        fs, x = wv.read(directorio + '/' + filename)
        n = len(x)
        if ((n / fs) > 0.5):
            spectrum, freqs, t, im = plt.specgram(x[:, 1],
                                                  NFFT=1024,
                                                  Fs=fs,
                                                  sides='onesided')
            for i in range(len(spectrum)):
                point = findForm(
                    np.linspace(0, freqs[len(spectrum[i])] / 1000,
                                len(spectrum[i])), spectrum[i])
                if point[0] != 0 and point[1] != 0:
                    try:
                        print(point)
                        val = neigh.predict([point])
                        char = valores(val[0])
                        print(char)
                        if word[-1] != char:
                            word = word + char
                    except:
                        print('No neighbors found for the given radius')
        words = words + ' ' + word
    return words
Example #5
0
def train_models(dict_of_dicts):
    for midi_key, midi_dict in dict_of_dicts.items():
        X = []
        y = []
        for x in midi_dict:
            if midi_dict[x] == []:
                pass
            else:
                for t in midi_dict[x]:
                    X.append(t)
                    y.append(x)  # n times
        if X != []:
            rad = get_radius(midi_key)

            neigh = RadiusNeighborsClassifier(radius=rad,
                                              weights='distance',
                                              outlier_label=[7])

            X = np.array(X)
            y = np.array(y)
            neigh.fit(X.reshape(-1, 1), y)
            filename = workspace.model_folder + '/' + str(
                midi_key) + '_rrn_model.sav'
            pickle.dump(neigh, open(filename, 'wb'))
        else:
            pass
Example #6
0
def train_model_for_prediction(path_to_csv,
                               path_to_json_dir,
                               company,
                               department,
                               classifier_id='random_forest',
                               needs_type='manual',
                               ratio_cleaner_val=None,
                               random_state=None,
                               remove_ratios=False,
                               remove_needs=False):
    """
    Trains the specified model to make predictions to be used in production.
    :param path_to_csv: String. Path to csv file of all the shifts.
    :param path_to_json_dir: String. Path to a directory containing all json schedules.
    :param company: String. Name of the company.
    :param department: String. Name of the department in the company.
    :param classifier_id: String. The classifier to use for training and prediction. 'random_forest', 'k_neighbors', or
     'radius_neighbors'.
    :param needs_type: String. Specifies type of needs to use. 'manual', 'avg', or 'median'.
    :param ratio_cleaner_val: The cutoff point for ratios to be removed from the data. The max, 7.0, removes all ratios.
    The min, 0.0 removes none. Recommended values to experiment with are 2.0, 1.5, and 1.0. Use None to ignore this.
    :param random_state: Int. Seed to remember the split of the data in StratifiedShuffleSplit.
    :param remove_ratios: Bool. Whether or not the remove the ratios from the features.
    :param remove_needs: Bool. Whether or not the remove the needs from the features.
    :return: Tuple of an instance of a trained RandomForestClassifier, KNeighborsClassifier, or
    RadiusNeighborsClassifier, AND the interpretation_keys.
    """
    (prepared_train_features, prepared_train_targets, prepared_test_features,
     prepared_test_targets, interpretation_keys) = prepare_features_targets(
         path_to_csv,
         path_to_json_dir,
         company,
         department,
         needs_type=needs_type,
         ratio_cleaner_val=ratio_cleaner_val,
         random_state=random_state,
         remove_ratios=remove_ratios,
         remove_needs=remove_needs)

    if classifier_id == 'random_forest':
        classifier = RandomForestClassifier(n_estimators=500,
                                            max_leaf_nodes=16,
                                            n_jobs=1)
    elif classifier_id == 'k_neighbors':
        classifier = KNeighborsClassifier()
    elif classifier_id == 'radius_neighbors':
        classifier = RadiusNeighborsClassifier()
    else:
        raise ValueError(
            'Invalid classifier_id specified:', classifier_id + '.',
            'Must be of type \'random_forest\', \'k_neighbors\', or \'radius_neighbors\'.'
        )

    classifier.fit(prepared_train_features, prepared_train_targets)

    # run the test data through it to gauge effectiveness.
    test_predictions = classifier.predict(prepared_test_features)
    print_model_analysis(test_predictions, prepared_test_targets)

    return classifier, interpretation_keys
def Radius_Neighbors(input_file,Output):
    lvltrace.lvltrace("LVLEntree dans Radius_Neighbors")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    clf = RadiusNeighborsClassifier(n_neighbors=1)
    clf.fit(X, y)
    y_pred = clf.predict(X)
    print "#########################################################################################################\n"
    print "Radius Neighbors Accuracy "
    print "classification accuracy:", metrics.accuracy_score(y, y_pred)
    print "precision:", metrics.precision_score(y, y_pred)
    print "recall:", metrics.recall_score(y, y_pred)
    print "f1 score:", metrics.f1_score(y, y_pred)
    print "\n"
    print "#########################################################################################################\n"
    results = Output+"Raidus_Neighbors_metrics.txt"
    file = open(results, "w")
    file.write("Radius Neighbors estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y)):
        file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1)))
    file.close()
    title = "Radius Neighbors"
    save = Output + "Radius_Neighbors_confusion_matrix.png"
    plot_confusion_matrix(y, y_pred,title,save)
    lvltrace.lvltrace("LVLSortie dans Radius_Neighbors")
def SequentialRadiusNeighborsClassifier(epsilon, X_train, X_test, Y_train):
    X_train_temp = np.copy(X_train)
    Y_train_temp = np.copy(Y_train)
    Reps = RadiusNeighborsClassifier(radius=epsilon)
    test_size = len(X_test)
    Y_predict = [-1 for x in range(test_size)]
    Y_current = list(set(Y_train))
    test_index = [x for x in range(test_size)]
    for test_time in range(test_size):
        Knn_temp = NearestNeighbors(n_neighbors=1)
        Knn_temp.fit(X_train_temp)
        min_distances = Knn_temp.kneighbors(X_test[test_index])[0]
        min_distances = [np.mean(x) for x in min_distances]
        optimal_indice = min_distances.index(min(min_distances))
        optimal_test = test_index[optimal_indice]
        test_index.remove(optimal_test)
        Reps.fit(X_train_temp, Y_train_temp)
        predict_set = Reps.radius_neighbors(X_test[optimal_test].reshape(
            1, -1))[1]
        predict_set = predict_set[0]
        if predict_set.size > 0:
            y_predict = Reps.predict(X_test[optimal_test].reshape(1, -1))
            y_predict = y_predict[0]
        else:
            y_predict = max(Y_current) + 1
            Y_current.append(y_predict)
        Y_predict[optimal_test] = y_predict
        X_train_temp = np.append(X_train_temp, [X_test[optimal_test]], axis=0)
        Y_train_temp = np.append(Y_train_temp, [y_predict], axis=0)
    return Y_predict
    def start(self):
        """ 01. Initialise the data paths and transformation functions.  """
        self.data_dir = '../data/raw_data'
        self.trans_primitives = ['weekday', 'hour', 'time_since_previous']
        self.agg_primitives = [
            'mean', 'max', 'min', 'std', 'count', 'percent_true', 'last',
            'time_since_last', 'mode'
        ]
        self.ignore_cols = [
            'num_contacts', 'num_referrals', 'num_successful_referrals'
        ]
        self.feature_windows = [10, 30, 60, 90]  #[10,20,30]
        self.max_feature_depth = 2

        # list of estimators to use
        self.estimators = [
            ('cbc', CatBoostClassifier()), ('lgbmc', LGBMClassifier()),
            ('gbc',
             GradientBoostingClassifier(validation_fraction=0.15,
                                        n_iter_no_change=50)),
            ('et', ExtraTreeClassifier()), ('abc', AdaBoostClassifier()),
            ('rfc', RandomForestClassifier()), ('bc', BaggingClassifier()),
            ('etc', ExtraTreesClassifier()), ('gnb', GaussianNB()),
            ('mlpc', MLPClassifier()), ('gpc', GaussianProcessClassifier()),
            ('dtc', DecisionTreeClassifier()),
            ('qda', QuadraticDiscriminantAnalysis()),
            ('lr', LogisticRegression()), ('knn3', KNeighborsClassifier(3)),
            ('knn6', KNeighborsClassifier(6)),
            ('knn12', KNeighborsClassifier(12)), ('nc', NearestCentroid()),
            ('rnc', RadiusNeighborsClassifier()), ('lp', LabelPropagation()),
            ('pac', PassiveAggressiveClassifier()), ('rc', RidgeClassifier()),
            ('sgdc', SGDClassifier()), ('svg', SVC()),
            ('ngbc', NGBClassifier(Dist=Bernoulli))
        ]
        self.next(self.load_raw_data)
Example #10
0
 def r_neighbors_classifier(self,
                            n_neighbours=5,
                            weights='uniform',
                            algorithm='auto',
                            leaf_size=30,
                            p=2,
                            metric='minkowski'):
     """
     Classifier implementing the k-nearest neighbors radius vote.
     :param n_neighbours: Number of neighbours to use
     :param weights: Weight function used in prediction, inputs:
                             uniform: All points in each neighborhood are weighted equally.
                             distance: Weight points by the inverse of their distance, closer points will have a greater influence.
     :param algorithm: Algorithm used to compute the nearest neighbors, inputs:
                             ball_tree: Fast generalized N-point problems.
                             KDTree: Euclidean tree of n-dimensions.
                             brute: Brute-force search.
                             auto: Will try to decide the most appropriate algorithm given the fit function.
     :param leaf_size: Leaf size passed to the three. This can affect the computation speed/time.
     :param p: Parameter for the Minkwoski metric.
     :param metric: Distance metric to use for the tree, inputs:
                             euclidean, manhattan, chebyshev, minkwoski, seuclidean, mahalanobis
     :return:probability, conf_matrix
     """
     model = RadiusNeighborsClassifier(n_neighbors=n_neighbours,
                                       weights=weights,
                                       algorithm=algorithm,
                                       leaf_size=leaf_size,
                                       p=p,
                                       metric=metric)
     model.fit(self.__x_train, self.__y_train)
     self.__model = model
Example #11
0
def runRNC(X_train, y_train, X_test, R=1.0, weights='uniform', outlier=None):
    # initialize the classifier
    model = RadiusNeighborsClassifier(R, weights=weights, outlier_label=outlier)
    rnc = model.fit(X_train, y_train)
    predictions = rnc.predict(X_test)

    return predictions
Example #12
0
    def __init__(self, feature_length, num_classes):
        super().__init__(feature_length, num_classes)
        self.num_classes = num_classes

        ###
        # BUILD YOUR MODEL
	self.model = RadiusNeighborsClassifier(weights='distance', metric='')
Example #13
0
def rnn_model(train_input, train_target, test_input, test_target):
    r_neigh = RadiusNeighborsClassifier(radius=3.0)
    r_neigh.fit(train_input, train_target)
    print("R-NN (r=1) accuracy for training set: %s" %
          (r_neigh.score(train_input, train_target)))
    print("R-NN (r=1) accuracy for testing set: %s" %
          (r_neigh.score(test_input, test_target)))
Example #14
0
def run_main():
	data = load_files(PARAM.data_path, encoding="utf-8")
	X, y, y_names = clean_docs(data.data, True), data.target, data.target_names
	vectorizer = tfidf_vectorize(X, max_features = PARAM.max_features, min_df = PARAM.min_df, max_df = PARAM.max_df, analyzer = PARAM.analyzer, ngram_range = (1, 2))
	X_train, X_test, y_train, y_test = train_test_split(vectorizer.transform(X).toarray(), y, test_size = 0.2, random_state = 0)
	
	if PARAM.classifier == 'knn':
		classifier = KNeighborsClassifier(n_neighbors = PARAM.n_neighbors, weights = PARAM.weight_func)
	elif PARAM.classifier == 'rnn':
		classifier = RadiusNeighborsClassifier(radius = PARAM.radius)
	elif PARAM.classifier == 'nc':
		classifier = NearestCentroid()
	else:
		raise ValueError("[%s] not supported" % (PARAM.classifier))

	classifier.fit(X_train, y_train)
	y_pred = classifier.predict(X_test)

	print_eval(y_test, y_pred)
	if PARAM.model_path:
		model_map = {}
		model_map['clf_name'] = PARAM.classifier
		model_map['accuracy'] = get_accuracy(y_test, y_pred)
		model_map['vectorizer'] = vectorizer
		model_map['classifier'] = classifier
		save_model_map(model_map, PARAM.model_path)
 def __init__(self,
              regression=True,
              radius=1.0,
              weights='distance',
              algorithm='auto',
              leaf_size=30,
              p=2,
              metric='minkowski',
              outlier_label=None,
              metric_params=None):
     self._regression = regression
     self._radius = radius
     self._weights = weights
     self._algorithm = algorithm
     self._leaf_size = leaf_size
     self._p = p
     self._metric = metric
     self._metric_params = metric_params
     self._outlier_label = outlier_label
     if regression:
         self._model = RadiusNeighborsRegressor(radius, weights, algorithm,
                                                leaf_size, p, metric,
                                                metric_params)
     else:
         self._model = RadiusNeighborsClassifier(radius, weights, algorithm,
                                                 leaf_size, p, metric,
                                                 metric_params)
     return super().__init__()
def append_without_dublicates(usual, y, knowledge):
    if len(usual['data']) == 0:
        usual['data'] += y
        return

    maxims, averages = get_maxims_and_averages(knowledge)
    usual_to_fit = normalize_fit_input(usual['data'], usual['events'],
                                       usual['fields'], averages, maxims)

    new_data_to_fit = normalize_fit_input(y, usual['events'], usual['fields'],
                                          averages, maxims)

    classifier = RadiusNeighborsClassifier(radius=2,
                                           metric='euclidean',
                                           outlier_label=-1)

    classifier.fit(sparse.csr_matrix(usual_to_fit), [0] * len(usual_to_fit))

    labels = classifier.predict(sparse.csr_matrix(new_data_to_fit))

    for i in range(len(labels) - 1, -1, -1):
        if labels[i] != -1:
            y.pop(i)

    usual['data'] += y
Example #17
0
    def set_classifier(self, classifier_name):
        """ Setter for clf

        Building instances of classifier objects with corresponding name.

        Parameter
        ---------
        classifier_name : string
            Contains the corresponding name of the wanted classifier from
            sklearn.
        """
        if classifier_name == "svm_linear":
            self.clf = svm.SVC(kernel="linear", class_weight="auto")
        elif classifier_name == "svm_poly":
            self.clf = svm.SVC(kernel="poly", class_weight="auto")
        elif classifier_name == "naive_bayes":
            self.clf = GaussianNB()
        elif classifier_name == "decision_tree":
            self.clf = tree.DecisionTreeClassifier()
        elif classifier_name == "nearest_centroid":
            self.clf = NearestCentroid()
        elif classifier_name == "k_neighbors":
            self.clf = KNeighborsClassifier(n_neighbors=100)
        elif classifier_name == "radius_neighbors":
            self.clf = RadiusNeighborsClassifier(radius=1.0, outlier_label=1)
        else:
            raise ClassifierNotExistException(classifier_name)
Example #18
0
 def set_up_radius(self):
     self.classifier = RadiusNeighborsClassifier(radius=self.radius)
     self.classifier.fit(self.X_train, self.y_train)
     y_pred = self.classifier.predict(self.X_test)
     print("confusion_matrix: \n")
     print(confusion_matrix(self.y_test, y_pred))
     print("classification_report\n")
     return classification_report(self.y_test, y_pred)
Example #19
0
 def trainAlgo(self):
     self.model = RadiusNeighborsClassifier(
         radius=self.param['radius'],
         weights=self.param['weights'],
         algorithm=self.param['algorithm'],
         p=self.param['p'])
     y = np.argmax(self.outputData['Y'], axis=1)
     self.model.fit(self.inputData['X'], y)
Example #20
0
 def __init__(self, method, n_neighbors, weights, radius):
     if method == 'knn_class':
         self.clf = neighbors.KNeighborsClassifier(n_neighbors,
                                                   weights=weights)
     elif method == 'knn_rad':
         self.clf = RadiusNeighborsClassifier(radius=radius)
     elif method == 'knn_cent':
         self.clf = NearestCentroid()
Example #21
0
    def __init__(self, name, **kwargs):
        from sklearn.neighbors import RadiusNeighborsClassifier

        def signal_proba(model, X):
            return RadiusNeighborsClassifier.predict_proba(model, X)[:, 1]

        self._prob_func = signal_proba
        self._range = [0, 1]
        super().__init__(name, RadiusNeighborsClassifier(**kwargs))
Example #22
0
def train_with_model(fea_c1, fea_c2, model):

    train_filename = "data/BEST&MOST{}/train-{}{}.arff".format(
        fea_c2, fea_c1, fea_c2)
    if not os.path.exists(train_filename):
        print("No such file: {}".format(train_filename))
        return

    if os.path.exists("model/{}{}-{}.pkl".format(fea_c1, fea_c2, model)):
        print("This model had been trained: " +
              "model/{}{}-{}.pkl".format(fea_c1, fea_c2, model))

    else:

        x_train, y_train = load_data(train_filename)
        '''
		PCA operation
		print(x_train.shape)
		x_train = pca(x_train)
		print(x_train.shape)
		'''

        if model == 'knn':
            mol = KNeighborsClassifier()
            mol.fit(x_train, y_train)

        elif model == 'rn':
            mol = RadiusNeighborsClassifier(radius=5.0)
            mol.fit(x_train, y_train)

        elif model == 'bnb':
            mol = BernoulliNB()
            mol.fit(x_train, y_train)

        elif model == 'gnb':
            mol = GaussianNB()
            mol.fit(x_train, y_train)

        elif model == 'dtree':
            mol = tree.DecisionTreeClassifier()
            mol.fit(x_train, y_train)

        elif model == 'rdforest':
            mol = RandomForestClassifier(n_estimators=10)
            mol.fit(x_train, y_train)

        elif model == 'lsvc':
            mol = LinearSVC(random_state=0, tol=1e-5)
            mol.fit(x_train, y_train)

        elif model == 'qda':
            mol = QuadraticDiscriminantAnalysis()
            mol.fit(x_train, y_train)

        with open("model/{}{}-{}.pkl".format(fea_c1, fea_c2, model),
                  'wb') as f:
            pickle.dump(mol, f)
Example #23
0
def get_classifiers():
    # basic classifires
    dc = DummyClassifier(random_state=0)
    lr = LogisticRegression()
    gnb = GaussianNB()
    svc = LinearSVC(C=1)
    C0 = {
        "name":
        "Basic",
        "methods": [(dc, "Dummy"), (lr, "Logit"), (gnb, "Naive Bayes"),
                    (svc, "SVC")]
    }

    # decission trees
    dec_tree = DecisionTreeClassifier(random_state=0)
    etc_tree = ExtraTreeClassifier(random_state=0)
    C1 = {
        "name": "Decision Tree",
        "methods": [(dec_tree, "Decision Tree"), (etc_tree, "Extra Tree")]
    }

    # NN classifirer
    knn = KNeighborsClassifier(n_neighbors=25, weights="distance")
    rnn = RadiusNeighborsClassifier(radius=20.0, outlier_label=1)
    nc = NearestCentroid()
    C2 = {
        "name": "Nearest Neighbors",
        "methods": [(knn, "KNN"), (rnn, "Radius NN"), (nc, "Nearest Centroid")]
    }

    # ensamble models
    ada = AdaBoostClassifier()
    bg = BaggingClassifier(n_estimators=50, max_features=3)
    etsc = ExtraTreesClassifier(n_estimators=50, criterion="entropy")
    gb = GradientBoostingClassifier(max_depth=5, random_state=0)
    rfc = RandomForestClassifier(n_estimators=100)
    C3 = {
        "name":
        "Ensemble",
        "methods": [(ada, "Ada Boost"), (bg, "Bagging"), (etsc, "Extra Trees"),
                    (gb, "Gradient Boosting"), (rfc, "Random Forest")]
    }

    # discriminant analysis & GPC
    lda = LinearDiscriminantAnalysis()
    qda = QuadraticDiscriminantAnalysis()
    C4 = {
        "name": "Discriminant Analysis",
        "methods": [(lda, "LDA"), (qda, "QDA")]
    }

    # neural net
    nn = MLPClassifier(alpha=0.1, tol=1e-8)
    C5 = {"name": "Complex Architecture", "methods": [(nn, "Neural Network")]}

    CLF = [C0, C1, C2, C3, C4, C5]
    return CLF
Example #24
0
    def test_requires_probabilistic_classifier(self):
        """
        Assert requires probabilistic classifier
        """
        message = "requires a probabilistic binary classifier"
        assert is_classifier(RadiusNeighborsClassifier)
        assert not is_probabilistic(RadiusNeighborsClassifier)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(RadiusNeighborsClassifier())
Example #25
0
    def train_model(self, X_train, y_train, modelpath):
        model = RadiusNeighborsClassifier(radius=self.radius,
                                          weights=self.weights,
                                          algorithm=self.algorithm,
                                          p=self.power_param,
                                          outlier_label=self.outlier_label)

        model.fit(X_train, y_train)
        self.save_model(model, modelpath)
        return model
Example #26
0
    def clusterFacetSamplesRNN(self, reduceRadius=3):
        """
        cluster the samples of each facet using radius nearest neighbours
        the cluster center and their correspondent normals will be saved
        in self.objsamplepnts_refcls and self.objsamplenrmals_refcls

        :param: reduceRadius: the neighbors that fall inside the reduceradius will be removed
        :return: None

        author: weiwei
        date: 20161130, osaka
        """

        self.objsamplepnts_refcls = np.ndarray(shape=(self.facets.shape[0], ),
                                               dtype=np.object)
        self.objsamplenrmls_refcls = np.ndarray(shape=(self.facets.shape[0], ),
                                                dtype=np.object)
        for i, facet in enumerate(self.facets):
            # print "cluster"
            # print i,len(self.facets)
            self.objsamplepnts_refcls[i] = []
            self.objsamplenrmls_refcls[i] = []
            X = self.objsamplepnts_ref[i]
            nX = X.shape[0]
            if nX > 0:
                neigh = RadiusNeighborsClassifier(radius=1.0)
                neigh.fit(X, range(nX))
                neigharrays = neigh.radius_neighbors(X,
                                                     radius=reduceRadius,
                                                     return_distance=False)
                delset = set([])
                for j in range(nX):
                    if j not in delset:
                        self.objsamplepnts_refcls[i].append(np.array(X[j]))
                        self.objsamplenrmls_refcls[i].append(
                            np.array(self.objsamplenrmls_ref[i][j]))
                        # if self.objsamplepnts_refcls[i].size:
                        #     self.objsamplepnts_refcls[i] = np.vstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.vstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        # else:
                        #     self.objsamplepnts_refcls[i] = np.array([])
                        #     self.objsamplenrmls_refcls[i] = np.array([])
                        #     self.objsamplepnts_refcls[i] = np.hstack((self.objsamplepnts_refcls[i], X[j]))
                        #     self.objsamplenrmls_refcls[i] = np.hstack((self.objsamplenrmls_refcls[i],
                        #                                                 self.objsamplenrmls_ref[i][j]))
                        delset.update(neigharrays[j].tolist())
            if self.objsamplepnts_refcls[i]:
                self.objsamplepnts_refcls[i] = np.vstack(
                    self.objsamplepnts_refcls[i])
                self.objsamplenrmls_refcls[i] = np.vstack(
                    self.objsamplenrmls_refcls[i])
            else:
                self.objsamplepnts_refcls[i] = np.empty(shape=(0, 0))
                self.objsamplenrmls_refcls[i] = np.empty(shape=(0, 0))
def get_hyperparameters_model():
    param_dist = {}

    clf = RadiusNeighborsClassifier()

    model = {
        'radius_neighbors_classifier': {
            'model': clf,
            'param_distributions': param_dist
        }
    }
    return model
def get_classifiers():
    enabled_classifier_names = [
        "RandomForest", "GaussianNB", "MLP", "SGD", "LinearSVC", "LINSVC",
        "RBFSVC"
    ]
    # disabled:
    # GradientBoosting - takes too long / not training
    # DecisionTree - long, not great results
    # RadiusNeighbors - didn't work
    params = {
        "GradientBoosting": {
            "verbose": 1
        },
        "RandomForest": {
            "n_estimators": 50
        },
        "GaussianNB": {},
        "RadiusNeighbors": {},
        "MLP": {},
        "SGD": {},
        "LinearSVC": {},
        "LINSVC": {
            "kernel": "linear"
        },
        "RBFSVC": {
            "kernel": "rbf"
        },
        "DecisionTree": {}
    }
    classifiers = {
        "GradientBoosting":
        GradientBoostingClassifier(**params["GradientBoosting"]),
        "RandomForest":
        RandomForestClassifier(**params["RandomForest"]),
        "GaussianNB":
        GaussianNB(**params["GaussianNB"]),
        "RadiusNeighbors":
        RadiusNeighborsClassifier(**params["RadiusNeighbors"]),
        "MLP":
        MLPClassifier(**params["MLP"]),
        "SGD":
        SGDClassifier(**params["SGD"]),
        "LinearSVC":
        LinearSVC(**params["LinearSVC"]),
        "RBFSVC":
        SVC(**params["RBFSVC"]),
        "DecisionTree":
        DecisionTreeClassifier(**params["DecisionTree"])
    }
    enabled_classifiers = [(name, classifier)
                           for name, classifier in classifiers.items()
                           if name in enabled_classifier_names]
    return enabled_classifiers
 def test_model_knn_classifier_binary_class_radius(self):
     model, X = self._fit_model_binary_classification(
         RadiusNeighborsClassifier())
     model_onnx = convert_sklearn(
         model, "KNN classifier binary",
         [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float32),
         model, model_onnx,
         basename="SklearnRadiusNeighborsClassifierBinary")
Example #30
0
def radius(X_train, X_test, y_train, y_test, string, valor):
    if (string == "prob"):
        clf = RadiusNeighborsClassifier(radius=valor,
                                        weights='distance',
                                        n_jobs=-1)
        clf.fit(X_train, y_train.values.ravel())
        return clf.predict_proba(X_test)
    clf = RadiusNeighborsClassifier(radius=valor,
                                    weights='distance',
                                    n_jobs=-1)
    clf.fit(X_train, y_train.values.ravel())
    #pickles.criarModelo(clf,"Rocchio "+string)
    y_predito = clf.predict(X_test)
    micro = f1_score(y_test, y_predito, average='micro')
    macro = f1_score(y_test, y_predito, average='macro')
    #f1_individual = f1_score(y_test,y_predito,average=None)
    #salvar_dados.salvar(y_test,y_predito,micro, macro, f1_individual," Knn "+string)
    print("O f1Score micro do RadiusKnn ", string, " com ", valor,
          " de raio é: ", micro)
    print("O f1Score macro do RadiusKnn ", string, " com ", valor,
          " de raio é: ", macro)