Python kNNの例、utils.kNN Pythonの例

コード例 #1

0

ファイルを表示

 def eval_c(feature_selector, n):
     x_train_combined = np.vstack([folds[k_train]['x'], folds[k_val]['x']])
     y_train_combined = np.hstack([folds[k_train]['y'], folds[k_val]['y']])
     U = feature_selector(x_train_combined, y_train_combined, x_train_combined, y_train_combined, n)
     x_train_combined = np.vstack([folds[k_train]['x'][:, U], folds[k_val]['x'][:, U]])
     y_train_combined = np.hstack([folds[k_train]['y'], folds[k_val]['y']])
     pred = utils.kNN(x_train_combined, y_train_combined, folds[k_test]['x'][:, U], 1, utils.euclidean_distance)
     return utils.accuracy(folds[k_test]['y'], pred['labels'])

コード例 #2

0

ファイルを表示

def SBS(x_train, y_train, x_val, y_val, n):
    all_attrs = np.arange(0, x_train.shape[1])
    U = set(all_attrs)
    while len(U) > n:
        print_attributes(U, 'Attributes')
        worst_attr = -1
        best_acc = -1
        for j in range(all_attrs.shape[0]):
            if j not in U:
                continue
            pred = utils.kNN(x_train[:, list(U - {j})], y_train, x_val[:, list(U - {j})], 1, utils.euclidean_distance)
            acc = utils.accuracy(y_val, pred['labels'])
            if acc > best_acc:
                best_acc = acc
                worst_attr = j
        U -= {worst_attr}
    print_attributes(U, 'Selected Attributes')
    return sorted(list(U))

コード例 #3

0

ファイルを表示

ファイル: test.py プロジェクト: jvahala/lucid-robotics

	def iterateVelTask(curr_task_id): 
		curr_labels = []
		for item in np.arange(curr_task_id): 
			if item == 0: 
				old = getVelFeatures(item)
			else:
				addold = getVelFeatures(item)
				old = np.vstack((old,addold))
		consider = getVelFeatures(curr_task_id)

		#reshape old and consider to include traditional features as well as velocity features
		if select!='': 
			print 'traditional features added'
			old = np.hstack((old,receiver.feat_array[starts[0]:starts[curr_task_id],:]))
			consider = np.hstack((consider,receiver.feat_array[starts[curr_task_id]:starts[curr_task_id+1],:]))
		print 'old/consider,', old.shape, consider.shape
		i = 0
		for frame in consider:
			[knn_label,count_info] = utils.kNN(frame,old,task.labels, k=20) 
			curr_labels.append(knn_label)
			#print '\n\nframe number: ', i
			#print count_info
			i += 1
		return [int(x) for x in curr_labels]

コード例 #4

0

ファイルを表示

ファイル: exercicio3.py プロジェクト: rodrigoberriel/aprendizado-de-maquina-2017-1

def exercicio3():
    utils.print_header(3)
    train_data = load_nebulosa(
        os.path.join(constants.DATA_DIR,
                     constants.FILENAME_NEBULOSA_TRAIN_DATABASE))
    test_data = load_nebulosa(
        os.path.join(constants.DATA_DIR,
                     constants.FILENAME_NEBULOSA_TEST_DATABASE))

    train_data = handle_incomplete(train_data)
    test_data = handle_incomplete(test_data, train_data)

    print('a)')
    x_train, y_train = train_data[:, :-1], train_data[:, -1]
    x_test, y_test = test_data[:, :-1], test_data[:, -1]
    pred = utils.kNN(x_train,
                     y_train,
                     x_test,
                     k=1,
                     distance=utils.euclidean_distance)
    acc = utils.accuracy(y_test, pred['labels'])
    print('\tAccuracy (NN): {:.3f}'.format(acc))
    pred = utils.rocchio(x_train,
                         y_train,
                         x_test,
                         distance=utils.euclidean_distance)
    acc = utils.accuracy(y_test, pred)
    print('\tAccuracy (Rocchio): {:.3f}'.format(acc))
    sns.pairplot(pd.DataFrame(train_data),
                 markers="+",
                 plot_kws=dict(s=50, edgecolor="b", linewidth=1))
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio3-a.pdf')
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()

    print('b)')
    train_data = remove_outliers(train_data)
    print('\tLast two attributes are redundant. Remove one.')
    # train_data = clip(train_data)
    train_data = remove_redundant_attribute(train_data)
    test_data = remove_redundant_attribute(test_data)
    train_data = remove_duplicate_samples(train_data)
    train_data = disambiguate_samples(train_data, utils.euclidean_distance)
    print('\tNb of samples: {}'.format(train_data.shape[0]))
    x_train, y_train = train_data[:, 2:-1], train_data[:, -1]
    x_test, y_test = test_data[:, 2:-1], test_data[:, -1]

    pred = utils.kNN(x_train,
                     y_train,
                     x_test,
                     k=1,
                     distance=utils.euclidean_distance)
    acc = utils.accuracy(y_test, pred['labels'])
    print('\tAccuracy (NN): {:.3f}'.format(acc))
    pred = utils.rocchio(x_train,
                         y_train,
                         x_test,
                         distance=utils.euclidean_distance)
    acc = utils.accuracy(y_test, pred)
    print('\tAccuracy (Rocchio): {:.3f}'.format(acc))
    sns.pairplot(pd.DataFrame(train_data[:, 2:]),
                 markers='+',
                 plot_kws=dict(s=50, edgecolor='b', linewidth=1))
    plot_fname = os.path.join(constants.OUTPUT_DIR, 'exercicio3-b.pdf')
    plt.savefig(plot_fname, bbox_inches='tight')
    plt.show()

コード例 #5

0

ファイルを表示

ファイル: main.py プロジェクト: jvahala/lucid-robotics

def iterateTask(curr_task_id,receiver,starts,task,testvalue): 
	'''for use after main.begin has been called to get receiver,giver,starts,task,curr_labels'''
	def updateCounts(count_info,proportions): 
		#print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions
		for ind,state in enumerate(count_info[0]): 
			count_info[1][ind] = count_info[1][ind] * proportions[state]
		#print 'POSTupdate: ', count_info[0],count_info[1]
		return count_info

	def updatePosition(mixed,base_state,new_state,count_new,curr_position,position_threshold = 4): 
		'''
		pseudocode: 
		if base_state == new_state, return count_new = 0, mixed unaltered
		else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return 
		return count_new 
		'''
		if base_state == new_state: 
			count_new = 0 
		else: 
			count_new += 1
			if count_new > position_threshold: 
				print 'curr/next = ', curr_position, '/',np.argmax(np.array(task.path[curr_position:])==new_state), 'new: ', new_state
				curr_position += np.argmax(np.array(task.path[curr_position:])==new_state)
				print curr_position, 'llllll'
				if curr_position == 0: 
					curr_position = len(task.path)-1		#if there is not corresponding position, then default to the last position
				mixed = rayleigh.MixedRayleigh(task, curr_position)		#update the mixedRayleigh
				#k = i-position_threshold
				base_state = new_state
		return base_state, count_new, mixed, curr_position

	def guessFromPast(curr_labels,past_length=3):
		'''
		gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels 
		'''
		numlabels = len(curr_labels)
		if numlabels>0:
			if numlabels > past_length:
				consider = curr_labels[-past_length:]
			else: 
				consider = curr_labels 
			best,aux = utils.majorityVote(consider)
			return best
		else: 
			return -1

	def taskPercentRemaining(task,curr_position,differential): 
		curr_state_time_remaining = max(task.times[curr_position]-differential, 0)
		if curr_position == len(task.path)-1: 
			future_states_times = 0
		else: 
			future_states_times = np.sum(task.times[(curr_position+1):])
		total_task_time = np.sum(task.times)
		percent_complete = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time)
		percent_remaining = 100 - percent_complete
		return percent_remaining

	curr_labels = []
	old = receiver.feat_array[starts[0]:starts[curr_task_id]]
	consider = receiver.feat_array[starts[curr_task_id]:starts[curr_task_id+1]]
	curr_position = 0
	mixed = rayleigh.MixedRayleigh(task, position=curr_position)
	i = 0
	k = 0
	for frame in consider:
		if i == 100: 
			pass
		if i == 0: 
			curr_labels.append(task.path[0])
			base_state = task.path[0]
			count_new = 0
			i += 1
			continue
		#need to check if mixedRayleigh needs to be updated to the new position, so if the initial state value changes and stays like that for n iterations, then update the position
		kNNnumber = 20
		[knn_label,count_info] = utils.kNN(frame,receiver.feat_array[starts[0]:starts[curr_task_id]],task.labels, k=kNNnumber) 
		print 'mixed position: ', mixed.position
		proportions = mixed.proportionate(i-k)
		#choose label by adding in proportions to consideration
		count_info_updated = updateCounts(count_info,proportions)	
		#incorporate smoothing by further weighting the past few states
		expectedfrompast = guessFromPast(curr_labels)
		'''if i == 5: 
			print '1: ', np.argmax(np.array(count_info_updated[0])==expectedfrompast)
			print '2: ', expectedfrompast
			print '3; ', count_info_updated 
			print '4: ', count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)]
			break '''
		if expectedfrompast != -1: 
			x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)]
			x += 3
			x *= testvalue
			count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x
		knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])]
		curr_labels.append(knn_label_updated)

		new_base_state, new_count_new, new_mixed, new_curr_position= updatePosition(mixed, base_state,knn_label_updated,count_new,curr_position,position_threshold=4)

		base_state = new_base_state
		count_new = new_count_new
		mixed = new_mixed
		if curr_position != new_curr_position:
			k = i
			curr_position = new_curr_position

		percent_complete = taskPercentRemaining(task, curr_position, i-k)


		'''uncomment the following two lines to print the kNN count info associated with each frame'''
		print '\n\nframe number: ', i, i-k
		print count_info_updated
		print 'percent complete: ', percent_complete

		i += 1
	return [int(x) for x in curr_labels]

コード例 #6

0

ファイルを表示

ファイル: pyvrep.py プロジェクト: jvahala/lucid-robotics

	def onlineUpdate(kNN_number=20,complete=False): 
		# define used global variables
		global base_state
		global count_new 
		global mixed 
		global curr_mixed_position 
		global last_state_change_frame
		global curr_position 
		global curr_task_labels
		#global task 
		#global data 

		def updateCounts(count_info,proportions): 
			#print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions
			for ind,state in enumerate(count_info[0]): 
				count_info[1][ind] = float(count_info[1][ind]) * float(proportions[state])
			#print 'POSTupdate: ', count_info[0],count_info[1]
			return count_info

		def updatePosition(mixed,base_state,new_state,count_new,curr_position,position_threshold = 4): 
			'''
			pseudocode: 
			if base_state == new_state, return count_new = 0, mixed unaltered
			else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return 
			return count_new 
			'''
			print 'Current rayleigh position: ', curr_position
			if base_state == new_state: 
				count_new = 0 
			else: 
				count_new += 1
				if count_new > position_threshold: 
					print 'curr/next = ', curr_position, '/',np.argmax(np.array(task.path[curr_position:])==new_state), 'new: ', new_state
					curr_position += np.argmax(np.array(task.path[curr_position:])==new_state)
					print curr_position, 'llllll'
					if curr_position == 0: 
						curr_position = len(task.path)-1		#if there is not corresponding position, then default to the last position
					mixed = rayleigh.MixedRayleigh(task, curr_position)		#update the mixedRayleigh
					base_state = new_state
			print 'New rayleigh position: ', curr_position, 'base state new: ', base_state
			print task.path, task.times
			return base_state, count_new, mixed, curr_position

		def guessFromPast(curr_labels,past_length=3):
			'''
			gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels 
			'''
			numlabels = len(curr_labels)
			if numlabels>0:
				if numlabels > past_length:
					consider = curr_labels[-past_length:]
				else: 
					consider = curr_labels 
				print 'Guess from past function consider: ', consider
				best,aux = utils.majorityVote(consider)
				return best
			else: 
				return -1

		def taskPercentRemaining(task,curr_position,differential): 
			curr_state_time_remaining = max(task.times[curr_position]-differential, 0)
			print 'task.times[curr_position]-differential: ', task.times[curr_position], '-', differential,'=', task.times[curr_position]-differential
			print 'Current state time remaining: ', curr_state_time_remaining
			if curr_position == len(task.path)-1: 
				future_states_times = 0
			else: 
				future_states_times = np.sum(task.times[(curr_position+1):])
			print 'Expected future state time remaining: ', future_states_times
			total_task_time = np.sum(task.times)
			percent_remaining = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time)
			percent_complete = 100 - percent_remaining
			return percent_complete
			

		# add the new line of data and get features
		data.addData(tmpfile)
		data.getFeatures()
		# separate labeled data from new data
		new_data = data.feat_array[data.num_vectors-1,:]
		print 'New data considered: ', new_data

		# some local variables for determining current frame and expected frame along the state-transition-path
		frames_in_curr_task = data.num_vectors - last_task_end		#frames since last task ended
		
		# get init guess at knn_label and probability proportions based on current position within the task
		[knn_label,count_info] = utils.kNN(new_data,labeled_data,task.labels,k=kNN_number) 
		print 'Initial Knn: ', knn_label, count_info 
		proportions = mixed.proportionate(frames_in_curr_task-last_state_change_frame)
		print 'MixedRayleigh proportions: ', proportions

		# incorporate proportions and past few labels
		count_info_updated = updateCounts(count_info,proportions)
		print 'After proportions considerations: ', count_info_updated	
		expectedfrompast = guessFromPast(curr_task_labels)
		print 'Guess from past: ', expectedfrompast

		# if labels have been added, used expectedfrompast to weight the most likely candidate
		if expectedfrompast != -1: 
			x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)]
			x += 3
			x *= 1.1
			count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x

		# determine new label based on weighted kNN 
		print 'After guess from past applied: ', count_info_updated
		knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])]
		print 'New knn label chosen: ', knn_label_updated
		curr_task_labels.append(knn_label_updated)

		# update mixedRayleigh distribution for the new frame/possibly new base state 
		new_base_state,new_count_new,new_mixed,new_curr_mixed_position = updatePosition(mixed,base_state,knn_label_updated,count_new,curr_mixed_position,position_threshold=4)

		# update global variables

		base_state = new_base_state
		count_new = new_count_new
		mixed = new_mixed
		if curr_mixed_position != new_curr_mixed_position:
			last_state_change_frame = frames_in_curr_task
			curr_mixed_position = new_curr_mixed_position

		# get the percent complete. if 
		if complete: 
			percent_complete = 100.0
		else: 
			frames_since_state_change = frames_in_curr_task-last_state_change_frame
			print 'Frames since state change: ', frames_since_state_change
			percent_complete = taskPercentRemaining(task,curr_mixed_position,frames_since_state_change)

		# define information string
		task_id = 0		#not implemented to determine which task is happening, so just making it task 0 for now....
		information = str(curr_task_labels[-1])+'\t'+str(percent_complete)+'\t'+str(task_id)
		print 'information: '+information
		return information, percent_complete

コード例 #7

0

ファイルを表示

ファイル: process.py プロジェクト: jvahala/lucid-robotics

	def getCurrentLabel(self,new_data,data_object,curr_frame_count,mixed,kNN_number=20,complete_threshold=80.0): 
		'''
		Purpose: 
		Takes a new row of data as input and uses kNN, mixedRayleigh proportions, and recent labels to append a new state to the curr_labels of the online task. Also, updates the mixedRaleigh position. 

		Inputs: 
		new_data - 
		data_object - 
		curr_frame_count - 
		mixed - 
		kNN_number - 
		complete_threshold - 

		Outputs: 
		percent_complete - 

		'''
		def updateCounts(count_info,proportions): 
			#print 'PREupdate: ', count_info[0],count_info[1], '* ', proportions
			for ind,state in enumerate(count_info[0]): 
				#print 'Trouble info:', count_info[1][ind], proportions[state]
				count_info[1][ind] = float(count_info[1][ind]) * float(proportions[state])
			#print 'POSTupdate: ', count_info[0],count_info[1]
			return count_info
		def guessFromPast(curr_labels,past_length=3):
			'''
			gets majority vote from past maximum 'past_length' number of labels (or all existing labels) in curr_labels 
			'''
			numlabels = len(curr_labels)
			if numlabels>0:
				if numlabels > past_length:
					consider = curr_labels[-past_length:]
				else: 
					consider = curr_labels 
				#print 'Guess from past function consider: ', consider
				best,aux = utils.majorityVote(consider)
				return best
			else: 
				return -1
		def updatePosition(mixed,path,base_state,new_state,count_new,curr_position,position_threshold = 2): 
			'''
			pseudocode: 
			if base_state == new_state, return count_new = 0, mixed unaltered
			else count_new++, if count_new > position_threshold, increment position and recreate mixed, count_new = 0, base_state = new_state, else return 
			return count_new 
			'''
			#print 'Current rayleigh position: ', curr_position
			if base_state == new_state: 
				count_new = 0 
			else: 
				count_new += 1
				if count_new > position_threshold: 
					#print 'curr/next = ', curr_position, '/',np.argmax(np.array(path[curr_position:])==new_state), 'new: ', new_state
					# if the count for the new state is acceptable, then if the new state does not exist in the future, shift back to the past and change the necessary time step
					if new_state in path[curr_position:]: 
						curr_position_update_amount = np.argmax(np.array(path[curr_position:])==new_state)
					else: 
						if curr_position > 0: 
							if new_state == path[curr_position-1]:
								curr_position_update_amount = -1
							else:
								curr_position_update_amount = 0

					curr_position += curr_position_update_amount

					#if curr_position == 0: 
					#	curr_position = len(path)-1		#if there is not corresponding position, then default to the last position
					mixed.updateSelf(curr_position)		#update the mixedRayleigh
					base_state = new_state
			#print 'New rayleigh position: ', curr_position, 'base state new: ', base_state
			return base_state, count_new, mixed, curr_position
		def taskPercentRemaining(path,times,curr_position,differential): 
			curr_state_time_remaining = max(times[curr_position]-differential, 0)
			#print 'task.times[curr_position]-differential: ', times[curr_position], '-', differential,'=', times[curr_position]-differential
			#print 'Current state time remaining: ', curr_state_time_remaining
			if curr_position == len(path)-1: 
				future_states_times = 0
			else: 
				future_states_times = np.sum(times[(curr_position+1):])		#the last state in pick and place tasks is typically very short and very quickly completed, so it should be included as a given in the percent complete 
			#print 'Expected future state time remaining: ', future_states_times
			total_task_time = np.sum(np.array(times))		#see details for 'future_state_times'
			percent_remaining = 100*(curr_state_time_remaining+future_states_times)/float(total_task_time)
			percent_complete = 100 - percent_remaining
			return percent_complete

		#define base state
		if len(self.curr_labels) == 0:
			self.base_state = self.path[0]
			self.count_new = 0 
			self.curr_mixed_position = 0
			self.last_state_change_frame = 0
			self.percent_complete = 0.0
			self.frames_since_state_change = 0
			mixed.updateSelf(self.curr_mixed_position)

		#get labeled data with correct rows from the data object and correct task-specific features
		all_labeled_data = data_object.all_features[self.data_inds,:]	

		#select a random subset of the labeled data [(min(max_labeled_data_count,len(all_labeled_data))) points] to keep speed costs
		max_labeled_data_count = self.max_labeled_data_count
		if len(all_labeled_data) <= max_labeled_data_count: 
			labeled_data = all_labeled_data[:,self.feature_inds]
		else: 
			labeled_data_selection_inds = np.random.permutation(len(all_labeled_data))[0:max_labeled_data_count]
			labeled_data = all_labeled_data[labeled_data_selection_inds,:]
			labeled_data = labeled_data[:,self.feature_inds]

		#pick out the newest data
		curr_data = new_data[self.feature_inds]

		#get initial kNN count
		[knn_label,count_info] = utils.kNN(curr_data,labeled_data,self.labels,k=kNN_number) 
		#print 'Initial Knn:               ', knn_label, count_info 
		#print 'Trouble1: ', curr_frame_count-self.last_state_change_frame
		proportions = mixed.proportionate(curr_frame_count-self.last_state_change_frame)
		#print 'MixedRayleigh proportions: ', proportions

		# incorporate proportions and past few labels
		if proportions == -1: 
			knn_label_updated = knn_label 
		else: 
			count_info_updated = updateCounts(count_info,proportions)
			expectedfrompast = guessFromPast(self.curr_labels)

			# if labels have been added, used expectedfrompast to weight the most likely candidate
			if expectedfrompast != -1: 
				x = count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)]
				x += 1		#this addition and multiplication gives a bit of a chance to low scoring values
				x *= 1.1
				count_info_updated[1][np.argmax(np.array(count_info_updated[0])==expectedfrompast)] = x

			# determine new label based on weighted kNN 
			#print 'After guess from past applied: ', count_info_updated
			knn_label_updated = count_info_updated[0][np.argmax(count_info_updated[1])]
		# if curr_frame_count%50 == 0: 
		# 	print 'Init/New label:      '+str(int(knn_label))+'  /  '+str(knn_label_updated)+'\t position: '+str(self.curr_mixed_position)
		self.curr_labels.append(knn_label_updated)

		# update mixedRayleigh distribution for the new frame/possibly new base state 
		new_base_state,new_count_new,new_mixed,new_curr_mixed_position = updatePosition(mixed,self.path,self.base_state,knn_label_updated,self.count_new,self.curr_mixed_position,position_threshold=1)

		# update global variables
		self.base_state = new_base_state
		self.count_new = new_count_new
		# if the mixed position has changed to a more advanced state, update to that state naturally, else update to the new state taking into account how many frames have passed in the already completed states
		if self.curr_mixed_position < new_curr_mixed_position:
			mixed_position_changed = True
			self.last_state_change_frame = curr_frame_count - 3 	#assumes the actual state started just a few frames prior to this new changed mixed rayleigh implementation
			self.curr_mixed_position = new_curr_mixed_position
		elif self.curr_mixed_position > new_curr_mixed_position: 
			mixed_position_changed = True
			self.last_state_change_frame = self.last_state_change_frame-self.frames_since_state_change 	#assumes the previous mixed_position was a fluke and that those states should have actually been given to this new previous mixed_position
			self.curr_mixed_position = new_curr_mixed_position

		# get the percent complete. if 
		self.frames_since_state_change = curr_frame_count-self.last_state_change_frame
		#print 'Frames since state change: ', frames_since_state_change
		self.percent_complete = taskPercentRemaining(self.path,self.times,self.curr_mixed_position,self.frames_since_state_change)
		# if curr_frame_count%50==0:
		# 	print 'Percent complete: \t\t\t'+str(int(self.percent_complete))
		# 	print '---------'

		return self.percent_complete, new_mixed

コード例 #8

0

ファイルを表示

def exercicio9():
    utils.print_header(9)
    n_folds = 3
    x, y, mapping = load_car(os.path.join(constants.DATA_DIR,
                                          constants.FILENAME_CAR_DATABASE),
                             standardization=True)
    n_samples = x.shape[0]
    n_labels = np.unique(y).shape[0]
    print('Nb of samples: {}'.format(n_samples))

    indices = np.arange(n_samples)
    fold_sizes = (n_samples // n_folds) * np.ones(n_folds, dtype=np.int)
    fold_sizes[:n_samples % n_folds] += 1
    current = 0
    folds = []
    for fold_size in fold_sizes:
        start, stop = current, current + fold_size
        folds.append({
            'x': x[indices[start:stop]],
            'y': y[indices[start:stop]],
        })
        current = stop

    # grid search
    distances = [
        utils.manhattan_distance, utils.euclidean_distance,
        utils.cosine_similarity
    ]
    k_values = np.arange(1, 11)
    best_results = []
    for i in range(n_folds):
        best_acc = -1
        grid = -1 * np.ones((len(distances), k_values.shape[0]))
        print('Fold {}'.format(i + 1))
        for d in range(len(distances)):
            print('\tDistance: {}'.format(distances[d].__name__))
            for k in range(len(k_values)):
                k_val, k_train, k_test = i, (i + 1) % n_folds, (i +
                                                                2) % n_folds
                pred = utils.kNN(folds[k_train]['x'], folds[k_train]['y'],
                                 folds[k_val]['x'], k_values[k], distances[d])
                acc = utils.accuracy(folds[k_val]['y'], pred['labels'])
                grid[d, k] = acc
                if acc > best_acc:
                    best_acc = acc
                print('\t\tk: {}\tacc: {:.3f}'.format(k + 1, acc))
        d, k = np.unravel_index(grid.argmax(), grid.shape)
        pred = utils.kNN(folds[k_train]['x'], folds[k_train]['y'],
                         folds[k_test]['x'], k_values[k], distances[d])
        best_combination = {
            'k':
            k_values[k],
            'd':
            d,
            'distance':
            distances[d].__name__,
            'acc':
            utils.accuracy(folds[k_test]['y'], pred['labels']),
            'confusion_matrix':
            utils.confusion_matrix(folds[k_test]['y'], pred['labels'],
                                   n_labels),
        }
        best_results.append(best_combination)
        print('\tBest config (fold {}): distance={}, k={}'.format(
            i + 1, best_combination['distance'], best_combination['k']))
        for d in range(len(distances)):
            plt.plot(k_values, grid[d, :], label=distances[d].__name__)
        plt.xlim([k_values[0], k_values[-1]])
        plt.ylim([80, 100])
        plt.legend()
        plot_fname = os.path.join(constants.OUTPUT_DIR,
                                  'exercicio9-fold-{}.pdf'.format(i + 1))
        plt.savefig(plot_fname, bbox_inches='tight')
        plt.show()

    # print(best_results)
    print('avg. accuracy: {:.3f}%'.format(
        utils.mean(
            np.array([
                utils.accuracy_from_cm(best_results[i]['confusion_matrix'])
                for i in range(n_folds)
            ]))))
    print('avg. macro-precision: {:.3f}%'.format(
        utils.mean(
            np.array([
                utils.precision_from_cm(best_results[i]['confusion_matrix'])
                for i in range(n_folds)
            ]))))
    print('avg. macro-recall: {:.3f}%'.format(
        utils.mean(
            np.array([
                utils.recall_from_cm(best_results[i]['confusion_matrix'])
                for i in range(n_folds)
            ]))))
    cm_avg = np.sum([
        utils.normalize_confusion_matrix(best_results[i]['confusion_matrix'])
        for i in range(n_folds)
    ], 0)
    print('avg. confusion matrix:\n{}'.format(100. * cm_avg / n_folds))
    exit()

コード例 #9

0

ファイルを表示

ファイル: exercicio8.py プロジェクト: rodrigoberriel/aprendizado-de-maquina-2017-1

def exercicio8():
    utils.print_header(8)
    x, y = np.array([[50, 50], [60, 150], [160, 40]]), np.array([[0], [1],
                                                                 [2]])
    x_test = np.array([190, 130])

    # create a grid to plot the 'voronoi' diagram
    step = 0.5
    x_min, x_max = 0, 200
    y_min, y_max = 0, 200
    xx, yy = np.meshgrid(np.arange(x_min - 1, x_max + 1, step),
                         np.arange(y_min - 1, y_max + 1, step))

    def display_plot(voronoi, fname, title):
        markers = np.array(['s', 'D', '^'])
        marker_colors = ['blue', 'gray', 'red']
        cmap = colors.ListedColormap(['lightblue', 'lightgray', 'lightcoral'])
        plt.imshow(voronoi,
                   interpolation='nearest',
                   extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                   cmap=cmap,
                   aspect='auto',
                   origin='lower')
        for i in range(y.shape[0]):
            plt.scatter(x[np.where(y == i)[0], 0],
                        x[np.where(y == i)[0], 1],
                        c=marker_colors[i],
                        marker=markers[i],
                        lw=0,
                        s=100)
        plt.scatter(x_test[0], x_test[1], c=['green'], marker='o', lw=0, s=100)
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)
        plt.title(title)
        fig_fname = os.path.join(constants.OUTPUT_DIR, fname)
        plt.savefig(fig_fname, bbox_inches='tight')
        plt.show()
        return fig_fname

    print('a) a plot using the \'Euclidean Distance\' will be displayed...')
    knn_euclidean = utils.kNN(x,
                              y,
                              np.c_[xx.ravel(), yy.ravel()],
                              k=1,
                              distance=utils.euclidean_distance)
    plot_fname = display_plot(knn_euclidean['labels'].reshape(xx.shape),
                              'exercicio8-a.pdf', 'Euclidean Distance')
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('b) a plot using the \'Cosine Similarity\' will be displayed...')
    knn_cosine = utils.kNN(x,
                           y,
                           np.c_[xx.ravel(), yy.ravel()],
                           k=1,
                           distance=utils.cosine_similarity)
    plot_fname = display_plot(knn_cosine['labels'].reshape(xx.shape),
                              'exercicio8-b.pdf', 'Cosine Similarity')
    print('\tThis plot was saved: {}'.format(plot_fname))

    print('c)')
    test_euclidean = utils.kNN(x,
                               y, [x_test],
                               k=1,
                               distance=utils.euclidean_distance)
    test_cosine = utils.kNN(x,
                            y, [x_test],
                            k=1,
                            distance=utils.cosine_similarity)
    print('\tUsing Euclidean Distance: Class {}'.format(
        test_euclidean['labels'].squeeze()))
    print('\tUsing Cosine Similarity: Class {}'.format(
        test_cosine['labels'].squeeze()))