def init_students_classifier_fn(self, **kwargs):
     if kwargs == {}:
         m = self._academic_clusterer._m; error = 1.e-10; maxiter = 100
         clf = lambda data: cmeans_predict( data.T, self._cntr_sf, m, error, maxiter )
     else:
         clf = lambda data: cmeans_predict( data.T, self._cntr_sf, kwargs )
     self._students_clf = clf
예제 #2
0
파일: cluster.py 프로젝트: GSMSouza/CAP
 def run(self, chromosome: ndarray, samples: ndarray):
     u, u0, d, jm, p, fpc = cmeans_predict(samples.transpose(),
                                           chromosome,
                                           2,
                                           error=0.005,
                                           maxiter=1000)
     return np.argmax(u, axis=0)
예제 #3
0
 def calculate(self, chromosome: ndarray) -> float:
     u, u0, d, jm, p, fpc = cmeans_predict(self.samples.transpose(),
                                           chromosome,
                                           2,
                                           error=0.005,
                                           maxiter=1000)
     return fpc
예제 #4
0
 def test_fuzzy_c_means(self):
     labels_pairwise = FuzzyCMeans().run(self.center, self.samples)
     u, u0, d, jm, p, fpc = cmeans_predict(self.samples.transpose(),
                                           self.center,
                                           2,
                                           error=0.005,
                                           maxiter=1000)
     labels_fuzzy_c_means = np.argmax(u, axis=0)
     equal = labels_fuzzy_c_means == labels_pairwise
     self.assertEqual(equal.all(), True)
예제 #5
0
    def predict(self, test_data: pd.DataFrame) -> np.array:
        """Return predicted cluster assignment."""
        u, u0, d, jm, p, fpc = fuzz.cmeans_predict(test_data.transpose(),
                                                   self.cluster_centers_, 2,
                                                   error=0.005,
                                                   maxiter=self._n_iter)
        # Hard Clustering
        self.labels_ = np.argmax(u, axis=0)
        # Probabilities
        self.probs_ = np.max(u, axis=0)

        return self.labels_
예제 #6
0
    def _test_model(self, model, data):
        """
        Train model with the number of clusters that has better evaluation
        :param model: Trained model
        :param data: Dataframe with test data
        :return: Tested model
        """
        super()._test_model(model, data)
        data_array = data.to_numpy()
        result = fuzz.cmeans_predict(test_data=data_array,
                                     cntr_trained=model[0],
                                     m=2,
                                     error=0.005,
                                     maxiter=1000)

        # Evaluation
        validation = ICValidation(result[5])
        return validation
def fuzzy_clustering(train, train_labels, test, test_labels, size, plot,
                     plot_dims):

    [center, u, u0, d, jm, p, fpc] = skf.cmeans(train.T,
                                                c=2,
                                                m=.5,
                                                error=.001,
                                                maxiter=100)

    [nu, nu0, nd, njm, np, nfpc] = skf.cmeans_predict(test.T,
                                                      center,
                                                      3,
                                                      error=0.005,
                                                      maxiter=1000)
    results_train = u.argmax(axis=0)
    results_test = nu.argmax(axis=0)

    if plot:
        plot_results(test, test_labels, results_test, size, "Fuzzy Clustering")

    return results_train, results_test
예제 #8
0
def DecideTypeOfTest(TestFilename, clf, clusterAlg, cntr, features, folder, means, modelNmf, nmf, normalize,
                     outputFolder, outputfile, roundFactor, scaler):
    if '.' in TestFilename:
        if (TestFilename.__str__().lower() != 'na' and TestFilename != ""):
            try:
                df_neg = pd.read_excel(TestFilename)
                X_train1 = df_neg.abs()
                # X_train1 =df_neg
                if normalize:
                    X_test21 = normaliz(X_train1)
                else:
                    X_test21 = X_train1
                predicted1 = clf.predict(X_test21)
                testPredictedSinglefile = []
                if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg:
                    data2a = df_neg.values.reshape(df_neg.values.shape[1], df_neg.values.shape[0])
                    u, u0, d, jm, p, fpc = fuzz.cmeans_predict(data2a, cntr, 2, error=0.005, maxiter=1500,
                                                               init=None)
                    Cluster_Values2 = getMaximumCmeans(u)
                    AllCluster_Values2 = getMaximumCmeans(u, True)
                    temp = [int(round(y * roundFactor)) for y in Cluster_Values2]
                    # testPredictedSinglefile = np.argmax(u, axis=0)
                    testPredictedSinglefile = temp
                else:
                    testPredictedSinglefile = means.predict(df_neg)
            except:
                print(
                    "The alternate test " + TestFilename + " Doesnot exist in the provided folder,please make sure to have the  file in : " + folder + TestFilename)
                sys.exit(-1)
            Fpredicted = open(
                outputFolder + '_Test_' + outputfile + "_predicted_" + datetime.now().strftime(
                    '%Y-%m-%d-%H-%M') + ".tsv", mode='w')
            Fpredicted.write(
                "{:<18}\t {:<15}\t {:<15}\t {:<15} \t{:<17} \t{:<15} \n".format('V1', 'V2', 'V3',
                                                                                'V4', 'V5',
                                                                                'Predicted_Label',
                                                                                ))
            for values, ja in itertools.zip_longest(df_neg.values, predicted1):
                Fpredicted.write(
                    "{:<18}\t {:<15}\t {:<15}\t {:<15} \t{:<17} \t{:<15} \n".format(values[0], values[1],
                                                                                    values[2], values[3],
                                                                                    values[4], ja))
                Fpredicted.write(
                    "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + (
                            accuracy_score(testPredictedSinglefile,
                                           predicted1) * 100).__str__() + " %" + "\n")
            Fpredicted.close()
    else:
        try:
            testTweetMapper, testFiveDModel, testTotalTweetCount, testTweetsonly = TweetExtractor(
                TestFilename)

            df3 = pd.DataFrame(testFiveDModel, columns=features)
            df3 = df3.abs()
            if 'y' in nmf.lower():
                X_test21a = NMf(df3, modelNmf)
            else:
                X_test21a = df3.values
            if normalize:
                X_test21a = scaler.transform(X_test21a)
                # X_test21a = normaliz(X_test21a)

            predicted12 = clf.predict(X_test21a)
            testPredicted = []
            df2Norma = pd.DataFrame(X_test21a, columns=features)
            if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg:

                data2 = df2Norma.values.reshape(df2Norma.values.shape[1], df2Norma.values.shape[0])

                utest, u0, d, jm, p, fpc = fuzz.cmeans_predict(data2, cntr, 2, error=0.005, maxiter=1500,
                                                               init=None)
                testPredicted = np.argmax(utest, axis=0)
            else:
                testPredicted = means.predict(df2Norma)
            Fpredicted12 = open(
                outputFolder + '_Test_' + outputfile + "_predicted_" + datetime.now().strftime(
                    '%Y-%m-%d-%H-%M') + ".tsv", encoding="utf8", mode='w')
            Fpredicted12.write(
                "{:<20}\t {:<14}\t {:<12}\t {:<250} \t{:<12} \t{:<12} \t{:<12} \t{:<12} \t{:<12}\t{:<15}\n".format(
                    'userName', 'follower count', 'retweetcount', 'tweet',
                    'V1', 'V2', 'V3', 'V4', 'V5', 'predicted label'
                ))
            for y, w, d in itertools.zip_longest(testTweetMapper.values(), testFiveDModel, predicted12):
                Fpredicted12.write(
                    "{:<20}\t {:<14}\t {:<12}\t {:<250} \t{:<12} \t{:<12} \t{:<12} \t{:<12} \t{:<12}\t{:<15}\n".format(
                        y[0], y[2], y[3], y[4], w[0], w[1], w[2],
                        w[3], w[4], d))
            Fpredicted12.write(
                "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + (
                        accuracy_score(testPredicted, predicted12) * 100).__str__() + " %" + "\n")
            print(
                "Accuracy of predicted values with trained clustering model and trained MLPClassifier: " + (
                        accuracy_score(testPredicted, predicted12) * 100).__str__() + " %" + "\n")
            Fpredicted12.close()
        except:
            PrintException()
            pass
예제 #9
0
    cmeans_accuracy2 = 0

    for i, y in enumerate(Y):
        x = X[i]
        y_predicted1 = fcm.predict(x)
        fcm_accuracy += y_predicted1 == y

        y_predicted4 = gk.predict(x)
        gk_accuracy += y_predicted4 == y

        y_predicted2 = kmeans.predict([x])[0]
        kmeans_accuracy += y_predicted2 == y

        y_predicted3 = cmeans_predict(np.expand_dims(x, 0).transpose(),
                                      cntr,
                                      m,
                                      error,
                                      maxiter=2)[0]
        y_predicted3 = np.argmax(y_predicted3, axis=0)[0]
        cmeans_accuracy += y_predicted3 == y
        # print(f"y: {y}, fcm: {y_predicted1} , kmeans: {y_predicted2}, cmeans:{y_predicted3}")

        # if np.argmax(y_predicted, axis=0) == y:
        #     print(f"equal {i}")
        # accuracy = accuracy + 1
        # else:
        #     print(y_predicted)

    print(f"FCM Accuracy: {fcm_accuracy/len(X)}, "
          f"K-Means Score: {kmeans_accuracy/len(X)}, "
          f"CMeans: {cmeans_accuracy/len(X)}, "
예제 #10
0
def main():
    data = np.loadtxt('../data/data_banknote_authentication.txt',
                      delimiter=',')
    print(data.shape)
    print(data[:10])
    np.random.shuffle(data)
    print(data.shape)
    print(data[:10])

    # split train and test
    sample_size = data.shape[0]
    split_loc = int(0.7 * sample_size)
    training_set = data[:split_loc]
    training_x = training_set[:, :4]
    training_y = training_set[:, -1].astype(np.int)
    testing_set = data[split_loc:]
    testing_x = testing_set[:, :4]
    testing_y = testing_set[:, -1].astype(np.int)
    plot_data = []
    # c = 10
    for c in range(2, 100):
        # for c in range(100, 500, 30):
        print("c =", c)
        m = 2
        cntr, Ax, u0, d, jm, p, fpc = fuzz.cluster.cmeans(training_x.T,
                                                          c,
                                                          m,
                                                          error=0.005,
                                                          maxiter=1000)
        cluster_labels = np.argmax(Ax, axis=0)
        cluster_map2_class = {}
        correct_count = 0
        for i in range(c):
            pickup = training_y[cluster_labels == i]
            class1_count = pickup.sum()
            if class1_count >= len(pickup) / 2:
                cluster_map2_class[i] = 1
                correct_count += class1_count
            else:
                cluster_map2_class[i] = 0
                correct_count += len(pickup) - class1_count

        training_error = 1 - correct_count / len(training_y)
        print(training_error)

        Ax_testing, _, _, _, _, _ = fuzz.cmeans_predict(testing_x.T,
                                                        cntr,
                                                        m,
                                                        error=0.005,
                                                        maxiter=1000)
        # Ax_testing, _, _, _, _, _ = fuzz.cmeans_predict(training_x.T, cntr, m, error=0.005, maxiter=1000)
        cluster_labels_testing = np.argmax(Ax_testing, axis=0)
        correct_testing = 0
        for l, gt in zip(cluster_labels_testing, testing_y):
            # for l, gt in zip(cluster_labels_testing, training_y):
            if cluster_map2_class[l] == gt:
                correct_testing += 1
        testing_error = 1 - correct_testing / len(testing_y)
        # testing_error = 1 - correct_testing / len(training_y)
        print(testing_error)
        classification_error = 1 - (correct_count + correct_testing) / (
            len(training_y) + len(testing_y))
        plot_data.append(
            [c, training_error, testing_error, classification_error])
    np.savetxt("../data/results_all.txt", np.array(plot_data))
예제 #11
0
from sklearn.naive_bayes import GaussianNB
import numpy as np
import skfuzzy as fuzz


# data train [height,weight, shoe size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39],[177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]]

Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male']

#classifiers
clf = tree.DecisionTreeClassifier()
clf1 = svm.SVC()
clf2 = neighbors.KNeighborsClassifier()
clf3 = GaussianNB()
clf4 = fuzz.cmeans_predict(Y,X,2, error=0.0005, maxiter=1000)
#train model
clf = clf.fit(X,Y)
clf1 = clf1.fit(X,Y)
clf2 = clf2.fit(X,Y)
clf3 = clf3.fit(X,Y)
clf4 = clf4.fit(X,Y)

_X=[[184,84,44],[198,92,48],[183,83,44],[166,47,36],[170,60,38],[172,64,39],[182,80,42],[180,80,43]]
_Y=['male','male','male','female','female','female','male','male']

#prediction
prediction = clf.predict(_X)
prediction1 = clf1.predict(_X)
prediction2 = clf2.predict(_X)
prediction3 = clf3.predict(_X)