def assign_pooling(data): image_name, feature_matrix = data[0] clusterCenters = data[1] feature_matrix = np.array(feature_matrix) model = KMeansModel(clusterCenters) bow = np.zeros(len(clusterCenters)) for x in feature_matrix: k = model.predict(x) dist = distance.euclidean(clusterCenters[k], x) bow[k] = max(bow[k], dist) clusters = bow.tolist() group = clusters.index(min(clusters)) + 1 return [image_name, group]
def assign_pooling(row, clusterCenters, pooling): image_name = row['fileName'] feature_matrix = np.array(row['features']) clusterCenters = clusterCenters.value model = KMeansModel(clusterCenters) bow = np.zeros(len(clusterCenters)) for x in feature_matrix: k = model.predict(x) dist = distance.euclidean(clusterCenters[k], x) if pooling == "max": bow[k] = max(bow[k], dist) elif pooling == "sum": bow[k] = bow[k] + dist clusters = bow.tolist() group = clusters.index(min(clusters)) + 1 #print(image_name + " in group: " + str(group)) return [(image_name, group)]
def assign_pooling(data): row = data[0] clusterCenters = data[1] pooling = data[2] image_name = row['fileName'] feature_matrix = np.array(row['features']) model = KMeansModel(clusterCenters) bow = np.zeros(len(clusterCenters)) for x in feature_matrix: k = model.predict(x) dist = distance.euclidean(clusterCenters[k], x) if pooling == "max": bow[k] = max(bow[k], dist) elif pooling == "sum": bow[k] = bow[k] + dist clusters = bow.tolist() group = clusters.index(min(clusters)) + 1 return [image_name, group]
ref = [] minIndex, minValue = min(enumerate(totalDist), key=operator.itemgetter(1)) ref = perm[minIndex] # dataPoint = [] correct = 0 incorrect = 0 with open('/home/ronald/data.csv', 'r') as f: csvReader = csv.DictReader(f) for row in csvReader: data = [] for i in row: if i != 'target': data.append(row[i]) if ref[model.predict(Vectors.dense(data))] == int(row['target']): correct += 1 else: # print(str(ref[model.predict(Vectors.dense(data))])+' '+str(row['target'])) incorrect += 1 # dataPoint.append(data) print(str(correct / (incorrect + correct) * 100) + '%') centers = [] with open('/home/ronald/kmeansModel', 'r') as f: line = f.readline() while line: centers.append(line) line = f.readline()