def build_tv(t_set, V_SIZE): 'build validation set from training set' v_labels = np.random.choice(np.unique(toLabelArray(t_set)), size=V_SIZE, replace=False) v_set = [] nt_set = [] for t_img in t_set: if t_img.label in v_labels: v_set.append(copy.deepcopy(t_img)) else: nt_set.append(copy.deepcopy(t_img)) tracker = {} # get rid of those who only appeared in one camera for v_img in v_set: if (v_img.label, v_img.camId) in tracker: if tracker[(v_img.label, v_img.camId)] == 1: tracker[(v_img.label, v_img.camId)] = 2 else: tracker[(v_img.label, v_img.camId)] = 1 nv_set = [] for v_img in v_set: try: b = tracker[(v_img.label, 1)] == 2 and tracker[(v_img.label, 2)] == 2 except KeyError: b = False if b: nv_set.append(v_img) vq_set, vg_set = build_qg(nv_set) return nt_set, vq_set, vg_set
def train(clf, t_set, q_set, g_set): t_f = clf.fit_transform(toFeatureArray(t_set), toLabelArray(t_set)) #ratio = pca.explained_variance_ratio_ q_f = clf.transform(toFeatureArray(q_set)) g_f = clf.transform(toFeatureArray(g_set)) for i, t_img in enumerate(t_set): t_img.feature = t_f[i] for i, q_img in enumerate(q_set): q_img.feature = q_f[i] for i, g_img in enumerate(g_set): g_img.feature = g_f[i] return None
def linAssign(k_labels, g_set): 'returns reassigned kmean_label' g_labels = toLabelArray(g_set) k = max(k_labels) + 1 # the label is used in assignment. its content is the original label g_label_translator = np.unique(g_labels) cost_mtx = np.zeros((k, k)) for k_lbl, g_lbl in zip(k_labels, g_labels): cost_mtx[np.where(g_label_translator == g_lbl)[0][0]][k_lbl] -= 1 assign = linear_assignment(cost_mtx) ass_mtx = np.zeros(k) for a in assign: ass_mtx[a[1]] = g_label_translator[a[0]] return ass_mtx
def build_mlp_test(q_set, g_set): X_set = np.zeros((1, 2*len(q_set[0].feature))) y_set = [] qg_index = np.zeros((1, 2)) ran_labels = np.random.choice(np.unique(toLabelArray(q_set)), size=50, replace=False) for i, q_img in enumerate(q_set): if q_img.label in ran_labels: for j, g_img in enumerate(g_set): if j % 100 == 0: print('Building MLP testing data... q:{:5d} g:{:5d}'.format(i, j)) if g_img.label in ran_labels: X_img = np.concatenate((q_img.feature, g_img.feature)) if g_img.label == q_img.label: if g_img.camId != q_img.camId: X_set = np.vstack((X_set, X_img)) y_set = y_set + [0] qg_index = np.vstack((qg_index, np.asarray([i, j]))) else: X_set = np.vstack((X_set, X_img)) y_set = y_set + [1] qg_index = np.vstack((qg_index, np.asarray([i, j]))) return X_set[1:,:], y_set, qg_index[1:,:]
elif train_method == 'mlp': X_train, y_train = build_mlp_data(t_set) X_test, y_test, qg_index = build_mlp_test(q_set, g_set) train_mlp(mlp, X_train, y_train, X_test, y_test, qg_index, q_set, g_set, k_nn_val) lap('Train with MLP', tr) else: lap('Skip training', tr) # ------------------------------------------------------------------------------ # NN print('[---NN]------------------------------------------------------K-NN & mAP') if train_method == 'mlp': ran_labels = np.random.choice(np.unique(toLabelArray(q_set)), size=50, replace=False) q_set = [q_img for q_img in q_set if q_img.label in ran_labels] g_set = [g_img for g_img in g_set if g_img.label in ran_labels] nn_g_set = allNN(q_set, g_set, f_dist) lap('Calculate all pair-wise distances for NN', tr) # ------------------------------------------------------------------------------ # K-NN for k in k_nn_val: knn_set = kNN(nn_g_set, k) success_array = successArray(q_set, knn_set) success_rate = np.count_nonzero(success_array) / len(q_set) print ('[-Main] With {:2d}-NN, success rate is [{:.2%}]'.format(k, success_rate)) # ------------------------------------------------------------------------------ # mAP
def kmean(g_set): 'returns centers after k-means' k = len(np.unique(toLabelArray(g_set))) cl = KMeans(n_clusters = k) cl.fit(toFeatureArray(g_set)) return toImageArray(cl.cluster_centers_, list(range(1, 701))), cl.labels_