def classify_embedding(): import classification_based class_labels = unpickle('cifar-10-batches-py/batches.meta')['label_names'] # print class_labels Y_GO_train = np.array(Y_train) X_GO_train = np.array(X_train) #removed_indices = np.where(Y_8_train!=8) #Y_8_train = Y_8_train[removed_indices] #X_8_train = X_8_train[removed_indices] #removed_indices = np.where(Y_8_train!=9) #Y_8_train = Y_8_train[removed_indices] #X_8_train = X_8_train[removed_indices] enc = OneHotEncoder(sparse=False) Y_GO_train = enc.fit_transform(np.array(Y_GO_train).reshape(-1, 1)) Y_GO_validation = np.array(Y_validation) X_GO_validation = np.array(X_validation) #removed_indices = np.where(Y_8_validation!=8) #Y_8_validation = Y_8_validation[removed_indices] #X_8_validation = X_8_validation[removed_indices] #removed_indices = np.where(Y_8_validation!=9) #Y_8_validation = Y_8_validation[removed_indices] #X_8_validation = X_8_validation[removed_indices] Y_GO_validation = enc.fit_transform( np.array(Y_GO_validation).reshape(-1, 1)) classification_based.train(X_GO_train, Y_GO_train, X_GO_validation, Y_GO_validation) #Y_2_validation = np.array(Y_validation) #X_2_validation = np.array(X_validation) #indices = np.where(Y_2_validation>=8) #Y_2_validation = Y_2_validation[indices] #X_2_validation = X_2_validation[indices] validaiton_probab = classification_based.predict_probabilites( X_GO_validation) print validaiton_probab.shape print validaiton_probab[0] print Y_GO_validation[0] weights = [] for i in class_labels: weights.append(embeddings[i]) weights = np.array(weights, dtype=np.float32) validaiton_embeddings = np.dot(validaiton_probab, weights) print validaiton_embeddings.shape targets_embeddings = [] targets_embeddings_words = [] for i in class_labels: targets_embeddings.append(embeddings[i]) targets_embeddings_words.append(i) for k in embeddings: if k not in class_labels: targets_embeddings.append(embeddings[k]) targets_embeddings_words.append(k) targets_embeddings = np.array(targets_embeddings, dtype=np.float32) from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from scipy.spatial.distance import cosine Y_pred_validation = [] for i in validaiton_embeddings: cos = [] for j in targets_embeddings: val = 1 - cosine(i, j) cos.append(val) #vec = np.zeros(len(targets_embeddings)) #vec[np.argmax(cos)] = 1 Y_pred_validation.append(np.argmax(cos)) # neigh = KNeighborsClassifier(n_neighbors=1) # neigh.fit(targets_embeddings, [0,1,2,3,4,5,6,7,8,9]) # Y_pred_validation = neigh.predict(validaiton_embeddings) # print Y_2_validation # print Y_pred_validation print accuracy_score(np.argmax(Y_GO_validation, axis=1), Y_pred_validation)
def classify_embedding(): import classification_based class_labels = unpickle('cifar-10-batches-py/batches.meta')['label_names'] # print class_labels Y_8_train = np.array(Y_train) X_8_train = np.array(X_train) removed_indices = np.where(Y_8_train!=8) Y_8_train = Y_8_train[removed_indices] X_8_train = X_8_train[removed_indices] removed_indices = np.where(Y_8_train!=9) Y_8_train = Y_8_train[removed_indices] X_8_train = X_8_train[removed_indices] enc = OneHotEncoder(sparse=False) Y_8_train = enc.fit_transform(np.array(Y_8_train).reshape(-1,1)) Y_8_validation = np.array(Y_validation) X_8_validation = np.array(X_validation) removed_indices = np.where(Y_8_validation!=8) Y_8_validation = Y_8_validation[removed_indices] X_8_validation = X_8_validation[removed_indices] removed_indices = np.where(Y_8_validation!=9) Y_8_validation = Y_8_validation[removed_indices] X_8_validation = X_8_validation[removed_indices] Y_8_validation = enc.fit_transform(np.array(Y_8_validation).reshape(-1,1)) classification_based.train(X_8_train, Y_8_train, X_8_validation, Y_8_validation) Y_2_validation = np.array(Y_validation) X_2_validation = np.array(X_validation) indices = np.where(Y_2_validation>=8) Y_2_validation = Y_2_validation[indices] X_2_validation = X_2_validation[indices] validaiton_probab = classification_based.predict_probabilites(X_2_validation) weights = [] for i in class_labels[:-2]: weights.append(embeddings[i]) weights = np.array(weights, dtype=np.float32) validaiton_embeddings = np.dot(validaiton_probab, weights) targets_embeddings = [] for i in class_labels: targets_embeddings.append(embeddings[i]) targets_embeddings = np.array(targets_embeddings, dtype=np.float32) from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from scipy.spatial.distance import cosine Y_pred_validation = [] for i in validaiton_embeddings: cos = [] for j in targets_embeddings: val = cosine(i,j) cos.append(val) Y_pred_validation.append(np.argmax(cos)) # neigh = KNeighborsClassifier(n_neighbors=1) # neigh.fit(targets_embeddings, [0,1,2,3,4,5,6,7,8,9]) # Y_pred_validation = neigh.predict(validaiton_embeddings) # print Y_2_validation # print Y_pred_validation print accuracy_score(Y_2_validation, Y_pred_validation) for i,j in zip(Y_2_validation, Y_pred_validation): print (i,j)