예제 #1
0
def KNNclassifier(training, test, tLabels, k, d, *args):
    '''Implements the k-nearest neighbors classifier, using a training data set
    and the test data set to be labeled. Receives k by argument, as well as the
    distance function to be used. Any other arguments that might be needed by
    the distance function are stored in *args'''

    # Saving dimensions
    q = len(training)
    n = len(training[0]) - 1
    j = len(test)

    trainingRDD = sc.parallelize(training)

    labels = []
    for i in test:
        dist = trainingRDD.map(lambda x: euclidean_distance(x, i)).collect()

        k_labels = []
        # Getting labels of k-nearest neighbors
        for i in range(k):
            nNeighbor = min(dist)
            nnIndex = dist.index(nNeighbor)
            k_labels.append(tLabels[nnIndex])
            dist.remove(nNeighbor)

        labels.append(stat.mode(k_labels))

    return labels
예제 #2
0
def bisect(lines):
    distance = []
    for index, l in enumerate(lines):
        if index != 0:
            distance.append(l[0] - lines[index - 1][0])

    max_distance = max(distance)
    index_distance = distance.index(max_distance)
    av_distance = max_distance // 2

    lines.append([lines[index_distance][0] + av_distance, lines[index_distance][0] + av_distance])
    return lines
예제 #3
0
def threesect(lines):
    distance = []
    for index, l in enumerate(lines):
        if index != 0:
            distance.append(l[0] - lines[index - 1][0])

    max_distance = max(distance)
    index_distance = distance.index(max_distance)

    #45 - this in length three character + epsilon
    if max_distance >= 45:
        av_distance = max_distance // 2
        lines.append([lines[index_distance][0] + av_distance, lines[index_distance][0] + av_distance])
        lines.append([lines[index_distance][0] + 2 * av_distance, lines[index_distance][0] + 2 * av_distance])

    else:
        lines = bisect(lines)
        lines = bisect(lines)

    return lines
 def filtering(self, image):
     # 各関数の呼び出し
     self.modeling()
     intensity, intensity_all = self.calcLikelihood(image)
     self.x_means(intensity, intensity_all)
     self.normalize()
     # リストの用意
     self.X = [''] * self.cluster
     self.Y = [''] * self.cluster
     self.W = [''] * self.cluster
     self.H = [''] * self.cluster
     self.bx = np.zeros((self.cluster, 1))
     self.by = np.zeros((self.cluster, 1))
     self.bw = np.zeros((self.cluster, 1))
     self.bh = np.zeros((self.cluster, 1))
     self.px = [''] * self.cluster
     # リサンプリング
     self.resampling()
     for i in range(self.cluster):
         self.X[i] = self.x_kmeans[i][0][:, 0][self.sample[i]]
         self.Y[i] = self.x_kmeans[i][0][:, 1][self.sample[i]]
         self.W[i] = self.x_kmeans[i][0][:, 2][self.sample[i]]
         self.H[i] = self.x_kmeans[i][0][:, 3][self.sample[i]]
     # 対象推定
     for i in range(self.cluster):
         for j in range(len(self.X[i])):
             self.bx[i][0] += float(self.X[i][j]) * float(
                 self.weights[i][j])
         for j in range(len(self.Y[i])):
             self.by[i][0] += float(self.Y[i][j]) * float(
                 self.weights[i][j])
         for j in range(len(self.W[i])):
             self.bw[i][0] += float(self.W[i][j]) * float(
                 self.weights[i][j])
         for j in range(len(self.H[i])):
             self.bh[i][0] += float(self.H[i][j]) * float(
                 self.weights[i][j])
         self.px[i] = [
             self.bx[i][0], self.by[i][0], self.bw[i][0], self.bh[i][0]
         ]  # 各クラスタのバウンディングボックスのx,y,w,h
     # ハンガリアン法
     if self.frame_count > 1 and self.cluster > 1:
         id = self.hungarian()  # ハンガリアン関数の呼び出し
         self.pre_px = np.array([x[:] for x in self.px
                                 ])  # コピー self.px → self.pre_px
         self.pre_id = id[:]  # コピー id → self.pre_id
         self.flag_count = 0
     # クラスタが一つの場合
     elif self.frame_count > 1 and self.cluster == 1:
         self.flag_count += 1
         p1 = np.array(self.px)
         p2 = self.pre_px
         print "p1:{}".format(len(p1))
         print "p2:{}".format(len(p2))
         distance = []
         # ユークリッド距離を比較し、一番ユークリッド距離が小さいidとする
         for px in p2:
             distance.append(np.linalg.norm(p1[0] - px))
         id = [self.pre_id[distance.index(min(distance))]]
         # flag_countが5以上になったら、追跡対象が一匹になったと判断し、idを保存する
         if self.flag_count > 5:
             self.pre_px = np.array([x[:] for x in self.px])
             self.pre_id = id[:]
     # 1フレーム目
     else:
         id = range(self.cluster)
     # フレームアウト
     average, id = self.frameout(id)
     return average, id
예제 #5
0
    wav_file = '/number/eight.wav'
    print(str(8) + ' making spectrogram arr')
    eight = graph_spectrogram(wav_file)

    wav_file = '/number/nine.wav'
    print(str(9) + ' making spectrogram arr')
    nine = graph_spectrogram(wav_file)

    userInput = input()
    print('user input making spectrogram arr')
    example = graph_spectrogram(userInput)

    one = find_similarity(example, one)
    two = find_similarity(example, two)
    three = find_similarity(example, three)
    four = find_similarity(example, four)
    five = find_similarity(example, five)
    six = find_similarity(example, six)
    seven = find_similarity(example, seven)
    eight = find_similarity(example, eight)
    nine = find_similarity(example, nine)
    zero = find_similarity(example, zero)

    print('\n minimal gap distance arr')
    distance = [zero, one, two, three, four, five, six, seven, eight, nine]

    print(distance)

    print(distance.index(min(distance)))
예제 #6
0
# train_data = train_data[0:10,:]
labels = np.zeros(train_data.shape[0])
# print labels.shape

for cluster in number_of_clusters:
    directory_path = './' + str(cluster) + '_centroids_images/'
    centroid_index = random.sample(range(1, train_data.shape[0]), cluster)
    centroid = train_data[centroid_index]
    # print centroid[0].shape

    for iterations in range(0, iteration):
        print "Iteration " + str(iterations)
        for idx, data in enumerate(train_data):
            dist = []
            for center in centroid:
                dist.append(np.linalg.norm(data - center))
            labels[idx] = dist.index(min(dist))
        print labels
        #Update Clusterss
        for cluster_number in range(0, cluster):
            index = []
            for idx, label in enumerate(labels):
                if label == cluster_number:
                    index.append(idx)
            temp_data = train_data[index]
            print 'Updating cluster ' + str(cluster_number)
            centroid[cluster_number] = np.mean(temp_data, axis=0)
    print centroid
    np.savez(str(cluster) + '_centroids.npz', centroids=centroid)
    vis.visualize(cluster)
print("")
print(
    "-------------------------------------------ici commence la correspondance-------------------------------------------"
)
for file in fichiers:
    if not os.path.isdir("./" + dir_features + "/" + file):
        #print(file)
        with open("./" + dir_features + "/" + file, "rb") as fic:
            data = pickle.load(fic)
            dist = []
            for c in centres:
                d = DistanceHu(data[2], c)
                dist.append(d)
            minima = min(dist)
            #print(dist)
            kindice = dist.index(minima)
            groupe = labels[kindice]
            if (groupe == part):
                print("voici son fichier de correspondance:", file,
                      "et son groupe est ", groupe)
            break

#while i <nbr_img-1:
#print("voici le probleme:",all_files[i])
#		histB=histogramme(rep, all_files[i])
#		img2 = cv2.imread('images/' + all_files[i], 0)
#fonction de calcul da la distance d'histogramme
#		img2_couleur = cv2.imread('images/' + all_files[i], 1)
#       matrix_co=coocurrence(img2_couleur)
#		i=i+1