def eval_c(feature_selector, n): x_train_combined = np.vstack([folds[k_train]['x'], folds[k_val]['x']]) y_train_combined = np.hstack([folds[k_train]['y'], folds[k_val]['y']]) U = feature_selector(x_train_combined, y_train_combined, x_train_combined, y_train_combined, n) x_train_combined = np.vstack([folds[k_train]['x'][:, U], folds[k_val]['x'][:, U]]) y_train_combined = np.hstack([folds[k_train]['y'], folds[k_val]['y']]) pred = utils.kNN(x_train_combined, y_train_combined, folds[k_test]['x'][:, U], 1, utils.euclidean_distance) return utils.accuracy(folds[k_test]['y'], pred['labels'])
def SBS(x_train, y_train, x_val, y_val, n): all_attrs = np.arange(0, x_train.shape[1]) U = set(all_attrs) while len(U) > n: print_attributes(U, 'Attributes') worst_attr = -1 best_acc = -1 for j in range(all_attrs.shape[0]): if j not in U: continue pred = utils.kNN(x_train[:, list(U - {j})], y_train, x_val[:, list(U - {j})], 1, utils.euclidean_distance) acc = utils.accuracy(y_val, pred['labels']) if acc > best_acc: best_acc = acc worst_attr = j U -= {worst_attr} print_attributes(U, 'Selected Attributes') return sorted(list(U))
def iterateVelTask(curr_task_id): curr_labels = [] for item in np.arange(curr_task_id): if item == 0: old = getVelFeatures(item) else: addold = getVelFeatures(item) old = np.vstack((old,addold)) consider = getVelFeatures(curr_task_id) #reshape old and consider to include traditional features as well as velocity features if select!='': print 'traditional features added' old = np.hstack((old,receiver.feat_array[starts[0]:starts[curr_task_id],:])) consider = np.hstack((consider,receiver.feat_array[starts[curr_task_id]:starts[curr_task_id+1],:])) print 'old/consider,', old.shape, consider.shape i = 0 for frame in consider: [knn_label,count_info] = utils.kNN(frame,old,task.labels, k=20) curr_labels.append(knn_label) #print '\n\nframe number: ', i #print count_info i += 1 return [int(x) for x in curr_labels]
def exercicio3(): utils.print_header(3) train_data = load_nebulosa( os.path.join(constants.DATA_DIR, constants.FILENAME_NEBULOSA_TRAIN_DATABASE)) test_data = load_nebulosa( os.path.join(constants.DATA_DIR, constants.FILENAME_NEBULOSA_TEST_DATABASE)) train_data = handle_incomplete(train_data) test_data = handle_incomplete(test_data, train_data) print('a)') x_train, y_train = train_data[:, :-1], train_data[:, -1] x_test, y_test = test_data[:, :-1], test_data[:, -1] pred = utils.kNN(x_train, y_train, x_test, k=1, distance=utils.euclidean_distance) acc = utils.accuracy(y_test, pred['labels']) print('\tAccuracy (NN): {:.3f}'.format(acc)) pred = utils.rocchio(x_train, y_train, x_test, distance=utils.euclidean_distance) acc = utils.accuracy(y_test, pred) print('\tAccuracy (Rocchio): {:.3f}'.format(acc)) sns.pairplot(pd.DataFrame(train_data), markers="+", plot_kws=dict(s=50, edgecolor="b", linewidth=1)) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio3-a.pdf') plt.savefig(plot_fname, bbox_inches='tight') plt.show() print('b)') train_data = remove_outliers(train_data) print('\tLast two attributes are redundant. Remove one.') # train_data = clip(train_data) train_data = remove_redundant_attribute(train_data) test_data = remove_redundant_attribute(test_data) train_data = remove_duplicate_samples(train_data) train_data = disambiguate_samples(train_data, utils.euclidean_distance) print('\tNb of samples: {}'.format(train_data.shape[0])) x_train, y_train = train_data[:, 2:-1], train_data[:, -1] x_test, y_test = test_data[:, 2:-1], test_data[:, -1] pred = utils.kNN(x_train, y_train, x_test, k=1, distance=utils.euclidean_distance) acc = utils.accuracy(y_test, pred['labels']) print('\tAccuracy (NN): {:.3f}'.format(acc)) pred = utils.rocchio(x_train, y_train, x_test, distance=utils.euclidean_distance) acc = utils.accuracy(y_test, pred) print('\tAccuracy (Rocchio): {:.3f}'.format(acc)) sns.pairplot(pd.DataFrame(train_data[:, 2:]), markers='+', plot_kws=dict(s=50, edgecolor='b', linewidth=1)) plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio3-b.pdf') plt.savefig(plot_fname, bbox_inches='tight') plt.show()
def iterateTask(curr_task_id,receiver,starts,task,testvalue): '''for use after main.begin has been called to get receiver,giver,starts,task,curr_labels''' def updateCounts(count_info,proportions): #print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions for ind,state in enumerate(count_info[0]): count_info[1][ind] = count_info[1][ind] * proportions[state] #print 'POSTupdate: ', count_info[0],count_info[1] return count_info def updatePosition(mixed,base_state,new_state,count_new,curr_position,position_threshold = 4): ''' pseudocode: if base_state == new_state, return count_new = 0, mixed unaltered else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return return count_new ''' if base_state == new_state: count_new = 0 else: count_new += 1 if count_new > position_threshold: print 'curr/next = ', curr_position, '/',np.argmax(np.array(task.path[curr_position:])==new_state), 'new: ', new_state curr_position += np.argmax(np.array(task.path[curr_position:])==new_state) print curr_position, 'llllll' if curr_position == 0: curr_position = len(task.path)-1 #if there is not corresponding position, then default to the last position mixed = rayleigh.MixedRayleigh(task, curr_position) #update the mixedRayleigh #k = i-position_threshold base_state = new_state return base_state, count_new, mixed, curr_position def guessFromPast(curr_labels,past_length=3): ''' gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels ''' numlabels = len(curr_labels) if numlabels>0: if numlabels > past_length: consider = curr_labels[-past_length:] else: consider = curr_labels best,aux = utils.majorityVote(consider) return best else: return -1 def taskPercentRemaining(task,curr_position,differential): curr_state_time_remaining = max(task.times[curr_position]-differential, 0) if curr_position == len(task.path)-1: future_states_times = 0 else: future_states_times = np.sum(task.times[(curr_position+1):]) total_task_time = np.sum(task.times) percent_complete = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time) percent_remaining = 100 - percent_complete return percent_remaining curr_labels = [] old = receiver.feat_array[starts[0]:starts[curr_task_id]] consider = receiver.feat_array[starts[curr_task_id]:starts[curr_task_id+1]] curr_position = 0 mixed = rayleigh.MixedRayleigh(task, position=curr_position) i = 0 k = 0 for frame in consider: if i == 100: pass if i == 0: curr_labels.append(task.path[0]) base_state = task.path[0] count_new = 0 i += 1 continue #need to check if mixedRayleigh needs to be updated to the new position, so if the initial state value changes and stays like that for n iterations, then update the position kNNnumber = 20 [knn_label,count_info] = utils.kNN(frame,receiver.feat_array[starts[0]:starts[curr_task_id]],task.labels, k=kNNnumber) print 'mixed position: ', mixed.position proportions = mixed.proportionate(i-k) #choose label by adding in proportions to consideration count_info_updated = updateCounts(count_info,proportions) #incorporate smoothing by further weighting the past few states expectedfrompast = guessFromPast(curr_labels) '''if i == 5: print '1: ', np.argmax(np.array(count_info_updated[0])==expectedfrompast) print '2: ', expectedfrompast print '3; ', count_info_updated print '4: ', count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] break ''' if expectedfrompast != -1: x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] x += 3 x *= testvalue count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])] curr_labels.append(knn_label_updated) new_base_state, new_count_new, new_mixed, new_curr_position= updatePosition(mixed, base_state,knn_label_updated,count_new,curr_position,position_threshold=4) base_state = new_base_state count_new = new_count_new mixed = new_mixed if curr_position != new_curr_position: k = i curr_position = new_curr_position percent_complete = taskPercentRemaining(task, curr_position, i-k) '''uncomment the following two lines to print the kNN count info associated with each frame''' print '\n\nframe number: ', i, i-k print count_info_updated print 'percent complete: ', percent_complete i += 1 return [int(x) for x in curr_labels]
def onlineUpdate(kNN_number=20,complete=False): # define used global variables global base_state global count_new global mixed global curr_mixed_position global last_state_change_frame global curr_position global curr_task_labels #global task #global data def updateCounts(count_info,proportions): #print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions for ind,state in enumerate(count_info[0]): count_info[1][ind] = float(count_info[1][ind]) * float(proportions[state]) #print 'POSTupdate: ', count_info[0],count_info[1] return count_info def updatePosition(mixed,base_state,new_state,count_new,curr_position,position_threshold = 4): ''' pseudocode: if base_state == new_state, return count_new = 0, mixed unaltered else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return return count_new ''' print 'Current rayleigh position: ', curr_position if base_state == new_state: count_new = 0 else: count_new += 1 if count_new > position_threshold: print 'curr/next = ', curr_position, '/',np.argmax(np.array(task.path[curr_position:])==new_state), 'new: ', new_state curr_position += np.argmax(np.array(task.path[curr_position:])==new_state) print curr_position, 'llllll' if curr_position == 0: curr_position = len(task.path)-1 #if there is not corresponding position, then default to the last position mixed = rayleigh.MixedRayleigh(task, curr_position) #update the mixedRayleigh base_state = new_state print 'New rayleigh position: ', curr_position, 'base state new: ', base_state print task.path, task.times return base_state, count_new, mixed, curr_position def guessFromPast(curr_labels,past_length=3): ''' gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels ''' numlabels = len(curr_labels) if numlabels>0: if numlabels > past_length: consider = curr_labels[-past_length:] else: consider = curr_labels print 'Guess from past function consider: ', consider best,aux = utils.majorityVote(consider) return best else: return -1 def taskPercentRemaining(task,curr_position,differential): curr_state_time_remaining = max(task.times[curr_position]-differential, 0) print 'task.times[curr_position]-differential: ', task.times[curr_position], '-', differential,'=', task.times[curr_position]-differential print 'Current state time remaining: ', curr_state_time_remaining if curr_position == len(task.path)-1: future_states_times = 0 else: future_states_times = np.sum(task.times[(curr_position+1):]) print 'Expected future state time remaining: ', future_states_times total_task_time = np.sum(task.times) percent_remaining = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time) percent_complete = 100 - percent_remaining return percent_complete # add the new line of data and get features data.addData(tmpfile) data.getFeatures() # separate labeled data from new data new_data = data.feat_array[data.num_vectors-1,:] print 'New data considered: ', new_data # some local variables for determining current frame and expected frame along the state-transition-path frames_in_curr_task = data.num_vectors - last_task_end #frames since last task ended # get init guess at knn_label and probability proportions based on current position within the task [knn_label,count_info] = utils.kNN(new_data,labeled_data,task.labels,k=kNN_number) print 'Initial Knn: ', knn_label, count_info proportions = mixed.proportionate(frames_in_curr_task-last_state_change_frame) print 'MixedRayleigh proportions: ', proportions # incorporate proportions and past few labels count_info_updated = updateCounts(count_info,proportions) print 'After proportions considerations: ', count_info_updated expectedfrompast = guessFromPast(curr_task_labels) print 'Guess from past: ', expectedfrompast # if labels have been added, used expectedfrompast to weight the most likely candidate if expectedfrompast != -1: x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] x += 3 x *= 1.1 count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x # determine new label based on weighted kNN print 'After guess from past applied: ', count_info_updated knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])] print 'New knn label chosen: ', knn_label_updated curr_task_labels.append(knn_label_updated) # update mixedRayleigh distribution for the new frame/possibly new base state new_base_state,new_count_new,new_mixed,new_curr_mixed_position = updatePosition(mixed,base_state,knn_label_updated,count_new,curr_mixed_position,position_threshold=4) # update global variables base_state = new_base_state count_new = new_count_new mixed = new_mixed if curr_mixed_position != new_curr_mixed_position: last_state_change_frame = frames_in_curr_task curr_mixed_position = new_curr_mixed_position # get the percent complete. if if complete: percent_complete = 100.0 else: frames_since_state_change = frames_in_curr_task-last_state_change_frame print 'Frames since state change: ', frames_since_state_change percent_complete = taskPercentRemaining(task,curr_mixed_position,frames_since_state_change) # define information string task_id = 0 #not implemented to determine which task is happening, so just making it task 0 for now.... information = str(curr_task_labels[-1])+'\t'+str(percent_complete)+'\t'+str(task_id) print 'information: '+information return information, percent_complete
def getCurrentLabel(self,new_data,data_object,curr_frame_count,mixed,kNN_number=20,complete_threshold=80.0): ''' Purpose: Takes a new row of data as input and uses kNN, mixedRayleigh proportions, and recent labels to append a new state to the curr_labels of the online task. Also, updates the mixedRaleigh position. Inputs: new_data - data_object - curr_frame_count - mixed - kNN_number - complete_threshold - Outputs: percent_complete - ''' def updateCounts(count_info,proportions): #print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions for ind,state in enumerate(count_info[0]): #print 'Trouble info:', count_info[1][ind], proportions[state] count_info[1][ind] = float(count_info[1][ind]) * float(proportions[state]) #print 'POSTupdate: ', count_info[0],count_info[1] return count_info def guessFromPast(curr_labels,past_length=3): ''' gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels ''' numlabels = len(curr_labels) if numlabels>0: if numlabels > past_length: consider = curr_labels[-past_length:] else: consider = curr_labels #print 'Guess from past function consider: ', consider best,aux = utils.majorityVote(consider) return best else: return -1 def updatePosition(mixed,path,base_state,new_state,count_new,curr_position,position_threshold = 2): ''' pseudocode: if base_state == new_state, return count_new = 0, mixed unaltered else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return return count_new ''' #print 'Current rayleigh position: ', curr_position if base_state == new_state: count_new = 0 else: count_new += 1 if count_new > position_threshold: #print 'curr/next = ', curr_position, '/',np.argmax(np.array(path[curr_position:])==new_state), 'new: ', new_state # if the count for the new state is acceptable, then if the new state does not exist in the future, shift back to the past and change the necessary time step if new_state in path[curr_position:]: curr_position_update_amount = np.argmax(np.array(path[curr_position:])==new_state) else: if curr_position > 0: if new_state == path[curr_position-1]: curr_position_update_amount = -1 else: curr_position_update_amount = 0 curr_position += curr_position_update_amount #if curr_position == 0: # curr_position = len(path)-1 #if there is not corresponding position, then default to the last position mixed.updateSelf(curr_position) #update the mixedRayleigh base_state = new_state #print 'New rayleigh position: ', curr_position, 'base state new: ', base_state return base_state, count_new, mixed, curr_position def taskPercentRemaining(path,times,curr_position,differential): curr_state_time_remaining = max(times[curr_position]-differential, 0) #print 'task.times[curr_position]-differential: ', times[curr_position], '-', differential,'=', times[curr_position]-differential #print 'Current state time remaining: ', curr_state_time_remaining if curr_position == len(path)-1: future_states_times = 0 else: future_states_times = np.sum(times[(curr_position+1):]) #the last state in pick and place tasks is typically very short and very quickly completed, so it should be included as a given in the percent complete #print 'Expected future state time remaining: ', future_states_times total_task_time = np.sum(np.array(times)) #see details for 'future_state_times' percent_remaining = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time) percent_complete = 100 - percent_remaining return percent_complete #define base state if len(self.curr_labels) == 0: self.base_state = self.path[0] self.count_new = 0 self.curr_mixed_position = 0 self.last_state_change_frame = 0 self.percent_complete = 0.0 self.frames_since_state_change = 0 mixed.updateSelf(self.curr_mixed_position) #get labeled data with correct rows from the data object and correct task-specific features all_labeled_data = data_object.all_features[self.data_inds,:] #select a random subset of the labeled data [(min(max_labeled_data_count,len(all_labeled_data))) points] to keep speed costs max_labeled_data_count = self.max_labeled_data_count if len(all_labeled_data) <= max_labeled_data_count: labeled_data = all_labeled_data[:,self.feature_inds] else: labeled_data_selection_inds = np.random.permutation(len(all_labeled_data))[0:max_labeled_data_count] labeled_data = all_labeled_data[labeled_data_selection_inds,:] labeled_data = labeled_data[:,self.feature_inds] #pick out the newest data curr_data = new_data[self.feature_inds] #get initial kNN count [knn_label,count_info] = utils.kNN(curr_data,labeled_data,self.labels,k=kNN_number) #print 'Initial Knn: ', knn_label, count_info #print 'Trouble1: ', curr_frame_count-self.last_state_change_frame proportions = mixed.proportionate(curr_frame_count-self.last_state_change_frame) #print 'MixedRayleigh proportions: ', proportions # incorporate proportions and past few labels if proportions == -1: knn_label_updated = knn_label else: count_info_updated = updateCounts(count_info,proportions) expectedfrompast = guessFromPast(self.curr_labels) # if labels have been added, used expectedfrompast to weight the most likely candidate if expectedfrompast != -1: x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] x += 1 #this addition and multiplication gives a bit of a chance to low scoring values x *= 1.1 count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x # determine new label based on weighted kNN #print 'After guess from past applied: ', count_info_updated knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])] # if curr_frame_count%50 == 0: # print 'Init/New label: '+str(int(knn_label))+' / '+str(knn_label_updated)+'\t position: '+str(self.curr_mixed_position) self.curr_labels.append(knn_label_updated) # update mixedRayleigh distribution for the new frame/possibly new base state new_base_state,new_count_new,new_mixed,new_curr_mixed_position = updatePosition(mixed,self.path,self.base_state,knn_label_updated,self.count_new,self.curr_mixed_position,position_threshold=1) # update global variables self.base_state = new_base_state self.count_new = new_count_new # if the mixed position has changed to a more advanced state, update to that state naturally, else update to the new state taking into account how many frames have passed in the already completed states if self.curr_mixed_position < new_curr_mixed_position: mixed_position_changed = True self.last_state_change_frame = curr_frame_count - 3 #assumes the actual state started just a few frames prior to this new changed mixed rayleigh implementation self.curr_mixed_position = new_curr_mixed_position elif self.curr_mixed_position > new_curr_mixed_position: mixed_position_changed = True self.last_state_change_frame = self.last_state_change_frame-self.frames_since_state_change #assumes the previous mixed_position was a fluke and that those states should have actually been given to this new previous mixed_position self.curr_mixed_position = new_curr_mixed_position # get the percent complete. if self.frames_since_state_change = curr_frame_count-self.last_state_change_frame #print 'Frames since state change: ', frames_since_state_change self.percent_complete = taskPercentRemaining(self.path,self.times,self.curr_mixed_position,self.frames_since_state_change) # if curr_frame_count%50==0: # print 'Percent complete: \t\t\t'+str(int(self.percent_complete)) # print '---------' return self.percent_complete, new_mixed
def exercicio9(): utils.print_header(9) n_folds = 3 x, y, mapping = load_car(os.path.join(constants.DATA_DIR, constants.FILENAME_CAR_DATABASE), standardization=True) n_samples = x.shape[0] n_labels = np.unique(y).shape[0] print('Nb of samples: {}'.format(n_samples)) indices = np.arange(n_samples) fold_sizes = (n_samples // n_folds) * np.ones(n_folds, dtype=np.int) fold_sizes[:n_samples % n_folds] += 1 current = 0 folds = [] for fold_size in fold_sizes: start, stop = current, current + fold_size folds.append({ 'x': x[indices[start:stop]], 'y': y[indices[start:stop]], }) current = stop # grid search distances = [ utils.manhattan_distance, utils.euclidean_distance, utils.cosine_similarity ] k_values = np.arange(1, 11) best_results = [] for i in range(n_folds): best_acc = -1 grid = -1 * np.ones((len(distances), k_values.shape[0])) print('Fold {}'.format(i + 1)) for d in range(len(distances)): print('\tDistance: {}'.format(distances[d].__name__)) for k in range(len(k_values)): k_val, k_train, k_test = i, (i + 1) % n_folds, (i + 2) % n_folds pred = utils.kNN(folds[k_train]['x'], folds[k_train]['y'], folds[k_val]['x'], k_values[k], distances[d]) acc = utils.accuracy(folds[k_val]['y'], pred['labels']) grid[d, k] = acc if acc > best_acc: best_acc = acc print('\t\tk: {}\tacc: {:.3f}'.format(k + 1, acc)) d, k = np.unravel_index(grid.argmax(), grid.shape) pred = utils.kNN(folds[k_train]['x'], folds[k_train]['y'], folds[k_test]['x'], k_values[k], distances[d]) best_combination = { 'k': k_values[k], 'd': d, 'distance': distances[d].__name__, 'acc': utils.accuracy(folds[k_test]['y'], pred['labels']), 'confusion_matrix': utils.confusion_matrix(folds[k_test]['y'], pred['labels'], n_labels), } best_results.append(best_combination) print('\tBest config (fold {}): distance={}, k={}'.format( i + 1, best_combination['distance'], best_combination['k'])) for d in range(len(distances)): plt.plot(k_values, grid[d, :], label=distances[d].__name__) plt.xlim([k_values[0], k_values[-1]]) plt.ylim([80, 100]) plt.legend() plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio9-fold-{}.pdf'.format(i + 1)) plt.savefig(plot_fname, bbox_inches='tight') plt.show() # print(best_results) print('avg. accuracy: {:.3f}%'.format( utils.mean( np.array([ utils.accuracy_from_cm(best_results[i]['confusion_matrix']) for i in range(n_folds) ])))) print('avg. macro-precision: {:.3f}%'.format( utils.mean( np.array([ utils.precision_from_cm(best_results[i]['confusion_matrix']) for i in range(n_folds) ])))) print('avg. macro-recall: {:.3f}%'.format( utils.mean( np.array([ utils.recall_from_cm(best_results[i]['confusion_matrix']) for i in range(n_folds) ])))) cm_avg = np.sum([ utils.normalize_confusion_matrix(best_results[i]['confusion_matrix']) for i in range(n_folds) ], 0) print('avg. confusion matrix:\n{}'.format(100. * cm_avg / n_folds)) exit()
def exercicio8(): utils.print_header(8) x, y = np.array([[50, 50], [60, 150], [160, 40]]), np.array([[0], [1], [2]]) x_test = np.array([190, 130]) # create a grid to plot the 'voronoi' diagram step = 0.5 x_min, x_max = 0, 200 y_min, y_max = 0, 200 xx, yy = np.meshgrid(np.arange(x_min - 1, x_max + 1, step), np.arange(y_min - 1, y_max + 1, step)) def display_plot(voronoi, fname, title): markers = np.array(['s', 'D', '^']) marker_colors = ['blue', 'gray', 'red'] cmap = colors.ListedColormap(['lightblue', 'lightgray', 'lightcoral']) plt.imshow(voronoi, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), cmap=cmap, aspect='auto', origin='lower') for i in range(y.shape[0]): plt.scatter(x[np.where(y == i)[0], 0], x[np.where(y == i)[0], 1], c=marker_colors[i], marker=markers[i], lw=0, s=100) plt.scatter(x_test[0], x_test[1], c=['green'], marker='o', lw=0, s=100) plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) plt.title(title) fig_fname = os.path.join(constants.OUTPUT_DIR, fname) plt.savefig(fig_fname, bbox_inches='tight') plt.show() return fig_fname print('a) a plot using the \'Euclidean Distance\' will be displayed...') knn_euclidean = utils.kNN(x, y, np.c_[xx.ravel(), yy.ravel()], k=1, distance=utils.euclidean_distance) plot_fname = display_plot(knn_euclidean['labels'].reshape(xx.shape), 'exercicio8-a.pdf', 'Euclidean Distance') print('\tThis plot was saved: {}'.format(plot_fname)) print('b) a plot using the \'Cosine Similarity\' will be displayed...') knn_cosine = utils.kNN(x, y, np.c_[xx.ravel(), yy.ravel()], k=1, distance=utils.cosine_similarity) plot_fname = display_plot(knn_cosine['labels'].reshape(xx.shape), 'exercicio8-b.pdf', 'Cosine Similarity') print('\tThis plot was saved: {}'.format(plot_fname)) print('c)') test_euclidean = utils.kNN(x, y, [x_test], k=1, distance=utils.euclidean_distance) test_cosine = utils.kNN(x, y, [x_test], k=1, distance=utils.cosine_similarity) print('\tUsing Euclidean Distance: Class {}'.format( test_euclidean['labels'].squeeze())) print('\tUsing Cosine Similarity: Class {}'.format( test_cosine['labels'].squeeze()))