コード例 #1
0
def mind_kl(data, dist_table, K, dim, dist_type):
    N = dist_table.shape[0]

    # compute pr
    print('compute pr')
    pr = get_pr(dist_table, K)
    print('pr=%.2f' % np.min(pr))
    del dist_table
    # compute pr

    # compute pdr
    print('compute pdr')
    kld = sys.maxsize
    d = 1
    for i in range(dim):
        sample_id = np.random.choice(dim, i + 1, replace=False)
        samples = data[:, sample_id]
        nb = knn.KNN(K, dist_type=dist_type, data=samples)
        dtable = nb.get_dist_multip(False)

        pdr = get_pr(dtable, K)
        cur_kld = math.log(N / (N - 1)) + np.mean(np.log(np.divide(pr, pdr)))
        if cur_kld < kld:
            kld = cur_kld
            d = i + 1
        print('[%d\%d]: kld=%.2f' % (i, dim, kld))
    # compute pdr
    return d
コード例 #2
0
def main():
  vertices ={}
  print("Read vertices from file")
  for line in open(args.vertices_file):
    v = graph_f.Vertex(line)
    vertices[v.name] = v
  print("Number of Vertices: {}".format(len(vertices)))
  
  print("Loading KNN graph")
  knn_graph = knn.KNN(vertices, sys.maxsize, args.knn_graph_file).GetMatrix(args.knn_distance_threshold)

  print("Loading projections")
  initial_vertex_projections, all_pos = LoadProjections(args.projections, vertices)
  
  uniform_pos = {pos:1/len(all_pos) for pos in all_pos}

  current_projections = initial_vertex_projections
  for i in range(args.num_iterations):
    print("Iteration:", i+1)
    new_projections = {}
    for v in vertices.values():
      if v in initial_vertex_projections:
        new_projections[v] = initial_vertex_projections[v]
        continue
      nn_array = [(nn, 1-dist) for (nn, dist) in knn_graph[v]]
      
      nominator = MulScalarByVector(args.nu, uniform_pos)      
      denominator = args.nu
      for nn, weight in nn_array:
        nn_pos_vector = current_projections.get(nn, uniform_pos)
        nominator = AddVector(nominator, MulScalarByVector(weight, nn_pos_vector))
        denominator += weight
      new_projections[v] = MulScalarByVector(1/denominator, nominator)
    current_projections = new_projections
コード例 #3
0
ファイル: examples.py プロジェクト: heshenghuan/python-KNN
def example_knn():
    # An example of how to use knn
    print "*" * 60
    print "*" * 16, "An Example of knn's Usage", "*" * 17
    print "*" * 60
    data1 = [(3, 5), (2, 3), (5, 4), (9, 6), (4, 7), (8, 1), (7, 2), (8, 8)]
    data = []
    for i in data1:
        data.append({0: i[0], 1: i[1]})
    label = [1, 1, 1, 0, 1, 0, 1, 0]
    m = knn.KNN(data, label, dimensions=2)
    print "Samples:", m.train_data
    print "\nLabel prb:", m.class_prb
    # print m.decision()
    print "\n\nvisualize the kd-tree: "
    m.visualize_kdtree()
    f = ds.EuclideanDistance
    print "the label of point", {0: 9, 1: 9}, "is",
    print m.classify(point={0: 9, 1: 9}, k=3, dist=f, prbout=1)
    print "the label of point", {0: 2, 1: 8}, "is",
    print m.classify(point={0: 2, 1: 8}, k=3, dist=f, prbout=1)
    knn.saveknn(m, 'testknn.pkl')

    # Pickle test
    print "*" * 60
    print "Load knn model from file: 'testknn.pkl'"
    n = knn.loadknn('testknn.pkl')
    print "Samples:", n.train_data
    print "\nLabel prb:", n.class_prb
    # print n.decision()
    print "\n\nvisualize the kd-tree: "
    n.visualize_kdtree()
コード例 #4
0
def GradeTest():

    #increase rate
    rate = 0.30

    #read the file and get the values
    df = pd.read_csv('studentsgrade.csv')
    numdf = df.values

    #get the last col, get the number of rows and columns in the dataframe

    row_num = numdf.shape[0]
    col_num = numdf.shape[1]
    grade = numdf[:, col_num - 1]
    features = numdf[:, :col_num - 1]

    #number of test data, initialize the error counter
    test_data = int(row_num * rate)
    error_count = 0.0

    #normalization the features
    norm_features, therange, mincols = nm.Norm(features)

    df_test = norm_features[test_data:row_num, :]
    label = grade[test_data:row_num]

    for i in range(test_data):
        classified_result = knn.KNN(df_test, label, norm_features[i, :], 3)
        print('The classifiier returned {}. The real answer is: {}'.format(
            classified_result, grade[i]))

        if (classified_result != grade[i]):
            error_count += 1.0
    print('Total Error rate is: {}. '.format(error_count / float(test_data)))
コード例 #5
0
ファイル: Experiment_5.py プロジェクト: 504703038/Linux
def integrate(train_data, train_label, test_data, test_label):
    # SVM
    Svm = svm.SVM(train_data, train_label, test_data, test_label)
    test_result_svm = Svm.classify()
    # print(test_result_svm)
    print("SVM over.")

    # KNN邻近法
    kn = knn.KNN(train_data, train_label, 7)
    test_result_kn = kn.work(test_data)
    # print(test_result_kn)
    print("KNN over.")

    # 神经网络
    net = network.Network([400, 25, 10], train_data, train_label, test_data,
                          test_label)
    test_result_bp = net.SGD(30, 10, 3.0)
    # print(test_result_bp)
    print("BP over.")

    num = len(test_data)
    err = 0
    for i in range(num):
        e = np.zeros((10, 1))
        e[test_result_svm[i]] += 1
        e[test_result_kn[i]] += 1
        e[test_result_bp[i]] += 1
        tmp_class = np.argmax(e)
        if tmp_class != test_label[i][0]:
            err += 1
    # print(err)
    print('accuracy:', 1 - 1.0 * err / num)
コード例 #6
0
ファイル: model.py プロジェクト: thayeylolu/Grade-Prediction
def ClassifyGrade(inArr):

    #read the file and get the values
    df = pd.read_csv('mlscripts/studentsgrade.csv')
    numdf = df.values

    #get the last col, get the number of rows and columns in the dataframe

    row_num = numdf.shape[0]
    col_num = numdf.shape[1]
    grade = numdf[:, col_num - 1]
    features = numdf[:, :col_num - 1]

    #normalization the features
    norm_features, therange, mincols = nm.Norm(features)

    sub = inArr - mincols[:, None]
    inputs = sub / therange[:, None]

    classified_result = knn.KNN(features, grade, inArr, 3)
    #predicted_grade = grade[classified_result - 1]
    return (classified_result)


#asn = ClassifyGrade([3,5,60, 9, 3])
#print(asn)
コード例 #7
0
def cal_5fold(k, dist_type, train_data):
    f_5 = open("result_02_5fold.txt", "a")
    print("Saving...")
    clf_5 = knn.KNN(k, dist_type)

    f_5.write("============================================\n\n")
    f_5.write("# of K: %d\n" % k)

    if dist_type == 'e' or dist_type == 'E':
        f_5.write("Distance Type: Euclidean\n")

    elif dist_type == 'm' or dist_type == 'M':
        f_5.write("Distance Type: Manhattan\n")

    elif dist_type == 'l' or dist_type == 'L':
        f_5.write("Distance Type: L∞\n")

    c_arr = np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

    acc_test_arr = np.array([])
    pre_test_arr = np.array([])
    re_test_arr = np.array([])
    f1_test_arr = np.array([])

    t_data = train_data
    np.random.shuffle(t_data)
    for i in range(5):
        train5_data = np.delete(t_data, np.s_[160 * i:160 * (i + 1)], axis=0)
        test5_data = t_data[160 * i:160 * (i + 1)]

        clf_5.train(train5_data)

        a_arr_test5 = test5_data[:, 0]
        p_arr_test5 = np.array([])

        for row in test5_data:
            p_arr_test5 = np.append(p_arr_test5, clf_5.predict(row))

        acc_test_arr = np.append(acc_test_arr,
                                 cal_accuracy(a_arr_test5, p_arr_test5, c_arr))
        pre_test_arr = np.append(
            pre_test_arr, cal_precision(a_arr_test5, p_arr_test5, c_arr))
        re_test_arr = np.append(re_test_arr,
                                cal_recall(a_arr_test5, p_arr_test5, c_arr))
        f1_test_arr = np.append(f1_test_arr,
                                cal_f1(a_arr_test5, p_arr_test5, c_arr))

    f_5.write("\n5-fold Cross Validation Metrics\n")
    f_5.write("Accuracy: %.4lf\n" % np.average(acc_test_arr))
    f_5.write("Precision: %.4lf\n" % np.average(pre_test_arr))
    f_5.write("Recall: %.4lf\n" % np.average(re_test_arr))
    f_5.write("F-1 Score: %.4lf\n" % np.average(f1_test_arr))
    f_5.write("\n============================================\n\n")
    print("Done!")
    f_5.close()
    del clf_5
    return
コード例 #8
0
def main():
    vertices = collections.defaultdict(Vertex)  # key -trigram tuple
    if args.f or not os.path.exists(args.vertices_file):
        corpus = Vertex()
        print("Loading tri-grams...")
        for line in open(args.corpus):
            fivegrams = LineToNgrams(line, 5)
            for fivegram in fivegrams:
                vertices[fivegram[1:-1]].Update(fivegram)
                corpus.Update(fivegram)
        print("Number of Vertices: {}".format(len(vertices)))

        print("Updating PMI...")
        for trigram, vertex in vertices.items():
            vertex.UpdatePMI(corpus)

        print("Normalizing features")
        Normalize(vertices, corpus)

        print("Write vertices to file")
        with open(args.vertices_file, "w") as f:
            for v in vertices.values():
                f.write(v.dumps())
    else:
        print("Read vertices from file")
        for line in open(args.vertices_file):
            v = Vertex(line)
            vertices[v.name] = v
        print("Number of Vertices: {}".format(len(vertices)))

    ###### DEBUG
    #DebugFindKNN('have to do', 10, vertices)
    #import pdb; pdb.set_trace()
    ###### DEBUG END

    if args.f or not os.path.exists(args.graph_file):
        print("Building KNN graph")
        knn_graph_builder = knn.KNN(vertices, args.k)
        knn_matrix = knn_graph_builder.Run(args.graph_file)
    else:
        print("Loading KNN graph")
        knn_graph_builder = knn.KNN(vertices, args.k, args.graph_file)
コード例 #9
0
ファイル: main_iris.py プロジェクト: notantony/ml2019-hw
def run_test(k, dist_f, kernel_f, x_train, y_train, name):
    result = np.zeros((n_classes, n_classes))
    margins = []
    model = knn.KNN(x_train, y_train, k=k, dist_f=dist_f, kernel_f=kernel_f)
    for i, sample in x_test.iterrows():
        predict_y, margin = model.run(sample, y_test[i])
        result[predict_y][y_test[i]] += 1
        margins.append(margin)
    print(result)
    print("{} : k={}, dist={}, kernel={}, name={}".format(kernels.f_score(result)[0], k, dist_f.__name__, kernel_f.__name__, name))
    sns.kdeplot(margins)
コード例 #10
0
def TestKNN(train, test, algoType, K):
    accuracy = 0.0
    startTime = time.time()
    if algoType == "normal":
        accuracy = algo.KNN(train, test, K)
    elif algoType == "manhattan":
        accuracy = algo.KNNManhattan(train, test, K)
    elif algoType == "minkow":
        accuracy = algo.KNNMinkow(train, test, K)
    runTime = time.time() - startTime
    return accuracy, runTime
コード例 #11
0
 def setUp(self):
     samples = [
         ["bottom_left", (0.5, 1.0)],
         ["bottom_left", (1.5, 1.0)],
         ["bottom_left", (1.5, 1.5)],
         ["top_right", (9.5, 8.0)],
         ["top_right", (8.5, 7.0)],
         ["top_right", (10.5, 9.0)],
     ]
     k = 2
     target = (2.0, 3.0)
     knn = my_knn.KNN(samples, target, k)
     self.knn = knn
コード例 #12
0
def garbage_classifier(training_data_folder, test_data_folder, k):

    training_labels = preprocessor.get_labels(training_data_folder)

    # HU DESCRIPTOR
    #vectorized_training_data = descriptor.Hu_descriptor(training_data_folder)
    #vectorized_test_data = descriptor.Hu_descriptor(test_data_folder)

    # ORB DESCRIPTOR
    vectorized_training_data = descriptor.ORB_descriptor(training_data_folder)
    vectorized_test_data = descriptor.ORB_descriptor(test_data_folder)

    knn_obj = knn.KNN(k)

    knn_obj.train(vectorized_training_data, training_labels)

    # MANHATTAN
    #predicted_labels = knn_obj.predict_by_Manhattan(vectorized_test_data)

    # EUCLIDEAN
    #predicted_labels = knn_obj.predict_by_Euclidean(vectorized_test_data)

    # COSINE
    predicted_labels = knn_obj.predict_by_Cosine(vectorized_test_data)

    # HAMMING
    #predicted_labels = knn_obj.predict_by_Hamming(vectorized_test_data)

    #print("   predict | actual")
    #print('------------------------')
    display = np.hstack(
        (predicted_labels, preprocessor.get_labels(test_data_folder)))
    #print(display)
    #print(display.shape)
    count = 0

    for num in range(0, 417):
        if display[num, 0] == display[num, 1]:
            count = count + 1
    #print(count)
    print("Accuracy of prediction:")
    print(count / 417)

    return predicted_labels
コード例 #13
0
def main():
    epsilon = 0.01
    max_iter = 200
    # mu = [0, 0, 0]
    # cov = [[1, 0, 0], [0, 100, 0], [0, 0, 100]]
    # data = np.random.multivariate_normal(mu, cov, 1000)

    args = config.parse_args()

    if args.data_filename.endswith('npy'):
        data = np.load(args.data_filename)
    if args.data_filename.endswith('mat'):
        data = scipy.io.loadmat(args.data_filename)
        data = data['feat']
    if args.data_filename.endswith('npz'):
        data = np.load(args.data_filename)
        data = data['feat']
    nrof_image = data.shape[0]
    dim = data.shape[1]

    # compute and sort distance matrix
    if args.if_dist_table:
        obj = knn.KNN(128, args.dist_table_filename, args.data_filename, 
            args.dist_type, args.if_norm)
        obj.get_dist_multip()
    # compute and sort distance matrix

    # load distance matrix if you already have one (comment the lines to compute matrix)
    else:
        dist_table = np.load(args.dist_table_filename)
    # load distance matrix if you already have one
    
    # get dimension
    K_array = [4, 7, 9, 15, 21, 30, 70, 90, 128]
    for K in K_array:
        print('compute dimension')
        dim_est = Dimest(data, K, epsilon, max_iter, dist_table[:,1,0:K])
        i,d0,d2 = dim_est.get_dim()
        print('iteration {}: dim0 = {}; dim = {}'.format(i,d0,d2))
        respath = os.path.join(args.resfolder, 'knn_dim.txt')
        with open(respath, 'a') as f:
            f.write('K=%d: iteration=%d; dim0=%.2f, dim=%.2f;\n' % (K, i, d0, d2))
コード例 #14
0
ファイル: main.py プロジェクト: jedrekf/msi_knn
def test(train_data, test_data, k, metric):
    all_ret_classes = []
    correct_answers = 0
    wrong_answers = 0

    knn_instance = knn.KNN(train_data, k, metric)
    for test_instance in test_data:
        answer_class = test_instance[2]
        ret_class = knn_instance.compute_class(
            (test_instance[0], test_instance[1]))
        all_ret_classes.append(ret_class)
        if answer_class == ret_class:
            correct_answers += 1
        else:
            wrong_answers += 1

    print("{};{}".format(k,
                         correct_answers / (correct_answers + wrong_answers)))

    return all_ret_classes, correct_answers / (correct_answers + wrong_answers)
コード例 #15
0
def main():
    args = config.parse_args()

    # load data
    if args.data_filename.endswith('npy'):
        data = np.load(args.data_filename)
    if args.data_filename.endswith('mat'):
        data = scipy.io.loadmat(args.data_filename)
        data = data['feat']
    if args.data_filename.endswith('npz'):
        data = np.load(args.data_filename)
        data = data['feat']
    nrof_image = data.shape[0]
    dim = data.shape[1]
    # load data

    # compute and sort distance matrix
    if args.if_dist_table:
        obj = knn.KNN(128, args.dist_table_filename, args.data_filename,
                      args.dist_type, args.if_norm)
        obj.get_dist_multip()
    # compute and sort distance matrix

    # load distance matrix if you already have one (comment the lines to compute matrix)
    else:
        dist_table = np.load(args.dist_table_filename)
    # load distance matrix if you already have one

    # compute dimension
    # K_array = [4]
    K_array = [4, 7, 9, 15, 21, 30, 70, 90, 128]
    for K in K_array:
        d = idea(dist_table, K)
        # d = mind_kl(data, dist_table, K, dim, 'Arclength')
        print('K={}: dim = {}'.format(K, d))
        respath = os.path.join(args.resfolder, 'idea_dim.txt')
        with open(respath, 'a') as f:
            f.write('K=%d: dim=%.2f;\n' % (K, d))
コード例 #16
0
ファイル: qz.py プロジェクト: jvtanner/gene_expression
def q1():

    KNN = knn.KNN()
    KNN.load_data("GSE25628_filtered_expression.txt", "GSE25628_samples.txt")

    k = 3
    FN = [.05, .1, .25, .5, .75, .9, 1]

    vals = []
    xs = []
    ys = []
    for fn in FN:
        (s, sp) = KNN.calc_metrics(k, fn)
        xs.append(1 - sp)
        ys.append(s)

    # print(xs, ys)

    plt.scatter(xs, ys)
    plt.title("ROC curve")
    plt.xlabel("1 - Specificity")
    plt.ylabel("Sensitivity")
    plt.show()
コード例 #17
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import knn

#9
curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData)
run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve)

#11
curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)

#12
arr = []
counter = 1
while counter < 20:
    arr.append(counter)
    counter += .5

curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)
コード例 #18
0
def get_distTable(args):
    obj = knn.KNN(128, args.dist_table_filename, args.data_filename,
                  args.dist_type, args.if_norm)
    obj.get_dist_multip()
コード例 #19
0
ファイル: Classify.py プロジェクト: lcharlesb/cs228
    X[:, :, 2, :] = allZCoordinates - meanValue
    return X


train3 = ReduceData(train3)
train4 = ReduceData(train4)
test3 = ReduceData(test3)
test4 = ReduceData(test4)

train3 = CenterData(train3)
train4 = CenterData(train4)
test3 = CenterData(test3)
test4 = CenterData(test4)

trainX, trainy = ReshapeData(train3, train4)
testX, testy = ReshapeData(test3, test4)

knn = knn.KNN()
knn.Use_K_Of(15)
knn.Fit(trainX, trainy)

correctPredictions = 0
for row in range(0, 2000):
    actualClass = testy[row]
    prediction = knn.Predict(testX[row])
    if (actualClass == prediction):
        correctPredictions = correctPredictions + 1

print(correctPredictions)
print((correctPredictions / 2000) * 100)
コード例 #20
0
    animals = ["cats", "dogs", "panda"]
    for anm in animals:
        os.chdir(rootdir)
        chgdir = os.getcwd() + os.sep + anm
        os.chdir(chgdir)
        dir = os.getcwd()
        output_dir = dir + os.sep + "resized"
        resizeImage(dir, output_dir=output_dir)

    (trainX_a, testX, trainY_a, testY) = train_test_split(img_dataset,
                                                          img_labels,
                                                          test_size=.20,
                                                          random_state=0)
    (trainX, valX, trainY, valY) = train_test_split(trainX_a,
                                                    trainY_a,
                                                    test_size=.125,
                                                    random_state=0)

    max_k = 2
    print("running test vs. validation")
    Ypred_val = []
    for i in range(1, max_k + 1):
        #load class knn to find best value for k
        knn_val = knn.KNN(i)

        #load training data into model
        knn_val.train(npy.asarray(valX), npy.asarray(valY))

        #get the prediction for validation
        Ypred_val.append(knn_val.predict(npy.asarray(testX)))
        print("ypred val", Ypred_val)
コード例 #21
0
N_TRAIN = 175
K = 1

# Read data from file
with open(FILE, "r") as data_csv:
    data = csv.reader(data_csv)
    trainset = list()
    trainlabels = list()
    rows = [row for row in data]

random.shuffle(rows)
for row in rows:
    trainlabels.append(float(row[0]))
    trainset.append([float(e) for e in row[1:]])

classifier = knn.KNN(K)
classifier.train(trainset[:N_TRAIN], trainlabels[:N_TRAIN])


def evalClassifier(individual):
    labels = classifier.predict(trainset[N_TRAIN:], individual)
    return sum(x == y for x, y in zip(labels, trainlabels[N_TRAIN:]))  / float(len(trainlabels[N_TRAIN:])), \
           sum(individual) / float(classifier.ndim)


creator.create("FitnessMulti", base.Fitness, weights=(1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMulti)

toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
コード例 #22
0
ファイル: main.py プロジェクト: notantony/ml2019-hw
]

# %%
LEARN = False

if LEARN:
    ks = [1, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 79, 106, 121, 151, 199]
    for k in ks:
        for dist_f in dist_fs:
            for kernel_f in kernel_fs:
                result = np.zeros((n_classes, n_classes))
                margins = []
                for i, sample in x_train.iterrows():
                    model = knn.KNN(x_train.drop(i),
                                    y_train.drop(i),
                                    k=k,
                                    dist_f=dist_f,
                                    kernel_f=kernel_f)
                    predict_y, margin = model.run(sample, y_train[i])
                    margins.append(margin)
                    result[predict_y][y_train[i]] += 1
                print("{:6} : k={:3}, dist={:8}, kernel={:8}".format(
                    kernels.f_score(result)[0], k, dist_f.__name__,
                    kernel_f.__name__))

    ds = [0.01, 0.1, 0.5, 1.0, 2.0, 4.0, 6.0, 8.0, 12.0]
    for d in ds:
        for dist_f in dist_fs:
            for kernel_f in kernel_fs:
                result = np.zeros((n_classes, n_classes))
                margins = []
コード例 #23
0
(trainX, valX, trainY, valY) = train_test_split(trainX_a,
                                                trainY_a,
                                                test_size=.125,
                                                random_state=0)
"""
initialize class knn.KNN with k=1
Find the best k-value
this is a for loop to go k=1:max_k

"""

Ypred_val = []
for i in range(1, max_k + 1):
    print("running validation vs. test for k =", i)
    #load class knn to find best value for k=i
    knn_val = knn.KNN(i)

    #load validation-set as training data into model
    knn_val.train(np.asarray(trainX), np.asarray(trainY))

    #get the prediction
    Ypred_val.append(knn_val.predict(np.asarray(valX)))
"""
This will evaluate the different k values for l1 and l2 to determine the most accurate value for k
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html
"""

best_k = np.zeros((max_k, 2), dtype=float)
report_val_l1 = []
report_val_l2 = []
"""
コード例 #24
0
    def keyPressEvent(self, event):
        k = event.key()

        # load model
        if k == Qt.Key_L:
            self.knn_clf = knn.KNN(None, None)
            self.knn_clf.clf = ModelUtils.load_model('./model/model1.m')
            print("加载模型成功")
            return

        # save model
        if k == Qt.Key_S:
            if self.knn_clf.clf == None:
                print("模型为None,不能保存")
                return
            ModelUtils.save_model(self.knn_clf.clf, './model/model1.m')
            print("已保存模型")
            return


        # fit
        if k == Qt.Key_T:
            self.knn_clf = knn.KNN(self.Data.X, self.Data.y, is_pca = False)
            self.knn_clf.fitWithoutPca()

            # self.knn_clf = knn.KNN()
            # self.knn_clf.fitWithoutPca(self.Data.X, self.Data.y)
            return

        # 数据转为训练集并,显示X和y的shape
        if k == Qt.Key_W:
            self.Data.get_X_y()
            print(self.Data.X.shape)
            print(self.Data.y.shape)
            return

        # 显示数据个数
        if k == Qt.Key_E:
            print(len(self.Data.sub_imgs))
            print(len(self.Data.target))
            return

        # 清屏
        if k == Qt.Key_Q:
            self.clearScreen()
            self.update()
            return

        # 构造数据
        if k == Qt.Key_Space:
            if self.knn_clf != None:
                print("模型已经存在,不需要构造")
                return
            x = self.pos().x()
            y = self.pos().y()
            x = x + 10
            y = y + 50

            h, w = self.height() - 20, self.width()
            screen = QApplication.primaryScreen()

            pix = screen.grabWindow(0, x, y, w, h)
            pix.save("draw.jpg")
            # numbers = GetNumber.read_img()
            self.Data.getNumber()
            # self.show_number(numbers)
            return

        # 识别
        if k == Qt.Key_F:
            x = self.pos().x()
            y = self.pos().y()
            x = x + 10
            y = y + 50

            h, w = self.height() - 20, self.width()
            screen = QApplication.primaryScreen()

            pix = screen.grabWindow(0, x, y, w, h)
            pix.save("draw.jpg")
            numbers = GetNumber.read_img(self.knn_clf.clf)
            self.show_number(numbers)
コード例 #25
0
# coding=utf-8
#khai bao duong dan xuong Bus
import sys
sys.path.append('../Bus/')

#khai bao cac ham duoi Bus
import knn
import naiveBayes as nb
import svm

#su dung ham duoi Bus
#knn
print(knn.KNN())
print(knn._2dDraw(['education', 'spouse_occupation'], 1300))

#nb
print(nb._GaussianNB()[0])
print(nb._GaussianNB()[1])
print(nb._MultinomialNB()[0])
print(nb._MultinomialNB()[1])
print(nb._BernoulliNB()[0])
print(nb._BernoulliNB()[1])

#svm
print(svm._SVM()[0])
print(svm._SVM()[1])
print(svm._LinearSVC()[0])
print(svm._LinearSVC()[1])
コード例 #26
0
ファイル: test.py プロジェクト: yytdfc/PyLearn
import numpy as np
import knn
import navie_bayes as nb
import decision_tree as dt
import random_forrest as rf
import boosting as bt
print('Loading data.txt...')
data = np.loadtxt('data.txt')
trainin = data[:, :-1]
trainout = data[:, -1]
print('classify with KNN')
knn1 = knn.KNN(k=3)
knn1.train(trainin, trainout)
knn1.test(cross_fold=10)
print('\n')
print('classify with Navie Bayes')
nb1 = nb.NB()
nb1.train(trainin, trainout)
nb1.test(cross_fold=10)
print('\n')
print('classify with Decision Tree')
dt1 = dt.DT(N=5)
dt1.train(trainin, trainout)
dt1.test(cross_fold=10)
print('\n')
print('classify with Random Forrest')
rf1 = rf.RF(N=5, NTree=5)
rf1.train(trainin, trainout)
rf1.test(cross_fold=10)
print('\n')
print('classify with Boosting')
コード例 #27
0
def readCommand(argv):
    """Processes the command used to run from the command line."""
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)


    parser.add_option('-r', '--run',  help=default('automatically runs training and test cycle for 5 times'),
                      default= False, action='store_true')

    parser.add_option('-c', '--classifier', help=default('The type of classifier'),
                      choices=['mostFrequent', 'naiveBayes', 'perceptron', 'knn'],
                      default='mostFrequent')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits')
    parser.add_option('-t', '--training', help=default('The ratio of the training set to use'), default=1.0,
                      type="float")
    parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False,
                      action="store_true")
    parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False,
                      action="store_true")
    parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int")
    parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int")
    parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"),
                      type="float", default=2.0)
    parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False,
                      action="store_true")
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    print("using enhanced features?:\t" + str(options.features))

    if options.data == "digits":
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print("Training set size should be a positive integer (you provided: %d)" % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print("Please provide a positive number for smoothing (you provided: %f)" % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "naiveBayes":
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if options.autotune:
            print
            "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" % options.smoothing)
    elif options.classifier == "perceptron":
        classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations)

    elif options.classifier == "knn":
        classifier = knn.KNN(legalLabels)

    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
コード例 #28
0
digits_train, digits_test = utils.get_deskew_imgs(
    digits_train), utils.get_deskew_imgs(digits_test)
holes_train, holes_test = utils.get_hole_features(
    digits_train), utils.get_hole_features(digits_test)
pix_train, pix_test = utils.get_pix_features(
    digits_train), utils.get_pix_features(digits_test)
X_train, X_test = np.hstack([pix_train,
                             holes_train]), np.hstack([pix_test, holes_test])

mean_normalizer = utils.normalization(X_train)
X_train = mean_normalizer.transform(X_train)
X_test = mean_normalizer.transform(X_test)

mx_score = 0
best = (-1, -1)
clf = knn.KNN(mode='weighted')
for n_component in range(3, 61, 3):
    for k in range(1, 11):
        _pca = pca.PCA(X_train)
        X_train_reduced = _pca.transform(X_train, n_component)
        X_test_reduced = _pca.transform(X_test, n_component)

        start_time = timeit.default_timer()
        validation_scores = []
        kf = KFold(n_splits=10)
        for t_idx, v_idx in kf.split(X_train_reduced):
            X_train_T, X_train_V = X_train_reduced[t_idx], X_train_reduced[
                v_idx]
            y_train_T, y_train_V = y_train[t_idx], y_train[v_idx]
            clf.fit(X_train_T, y_train_T)
            validation_score = clf.score(X_train_V, y_train_V, k)
コード例 #29
0
#This script will show as an example of the use of a KNN and SVM learners
#Created by Elijah Flinders

#svm setup and training
print("*********************************************************************")
print("Creating and testing SVM on it's own dataset. Support Vector Machine")
print("*********************************************************************")
svm = svm.SVM(10000, 0.000001)
svm.fit()
print("Finished running the SVM!\n")

#KNN setup and prediction
print("************************************************")
print("Creating and testing KNN. K-th Nearest Neighbor")
print("************************************************")
knnTester = knn.KNN()

# load the Iris data set and convert to specific type
dataset = knnTester.loadCsvListKnn('iris.csv')
for i in range(len(dataset[0]) - 1):
    knnTester.colToFloat(dataset, i)

# convert columns to ints
knnTester.colToInt(dataset, len(dataset[0]) - 1)

# define number of model neighbors and set record
neighbors = 5
testSetosa = [4.5, 2.3, 1.3, 0.3]
testVersicolor = [7.0, 3.2, 4.7, 1.4]
testVirginica = [6.3, 3.3, 6.0, 2.5]
# try to predict labels for each type
コード例 #30
0
import knn
import util

test = knn.KNN()

data = util.openFile("data/iris.csv")

print(test.knnRun(data, 5))