Esempio n. 1
0
	def test_word_crossover_word_pairs(self):
		groupedPredictor = GroupedPredictions()
		testData = {'word1':[{'sent':'the the the the big cat', 
		'tokens':['the','the','the','the','big','cat']},
		{'sent':'the big big big big dog', 
		'tokens':['the','big','big','big','big','dog']},
		{'sent':'big big big big apples are green', 
		'tokens':['big','big','big','big','apples','are','green']},
		{'sent':'people like the the the the cars', 
		'tokens':['people','like','the','the','the','the','cars']},
		{'sent':'people like big big big big boats', 
		'tokens':['people','like','big','big','big','big','boats']},
		{'sent':'the the the the apples are red', 
		'tokens':['the','the','the','the','apples','are','red']},
		{'sent':'the big mouse', 'tokens':['the','big','mouse']},
		{'sent':'people like cat', 'tokens':['people','like','cat']},
		{'sent':'tomatoes are red', 'tokens':['tomatoes','are','red']}]}
		correctGroups = [set(['the the the the big cat',
			'the the the the apples are red',
			'people like the the the the cars']), 
			set(['people like cat', 'the big mouse','tomatoes are red']),
			set(['people like big big big big boats',
			'the big big big big dog','big big big big apples are green'])]
		results = groupedPredictor.wordCrossoverSelection(testData, 3, True)
		for group in results['word1']:
			self.assertTrue(set(group) in correctGroups)
Esempio n. 2
0
	def test_grouped_caluculate_accuracy_4_by_4(self):
		groupedPredictor = GroupedPredictions()
		testData = {'word1':[
		{'sent':'a1','tokens':['a1']},{'sent':'a2','tokens':['a2']},
		{'sent':'a3','tokens':['a3']},{'sent':'a4','tokens':['a4']}, 
		{'sent':'b1','tokens':['b1']},{'sent':'b2','tokens':['b2']},
		{'sent':'b3','tokens':['b3']},{'sent':'b4','tokens':['b4']},
		{'sent':'c1','tokens':['c1']},{'sent':'c2','tokens':['c2']},
		{'sent':'c3','tokens':['c3']},{'sent':'c4','tokens':['c4']},
		{'sent':'d1','tokens':['d1']},{'sent':'d2','tokens':['d2']},
		{'sent':'d3','tokens':['d3']},{'sent':'d4','tokens':['d4']}]}

		# all groups correct
		results = {'word1':[['d2','d1','d4','d3'],['a4','a2','a3','a1'],
		['b1','b4','b2','b3'],['c2','c1','c4','c3']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 1)

		# 2 groups correct
		results = {'word1':[['d2','d1','d4','d3'],['a4','b2','a3','a1'],\
		['b1','b4','a2','b3'],['c2','c1','c4','c3']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 0)

		# 0 groups correct
		results = {'word1':[['d2','d1','d4','a3'],['a4','a2','b3','a1'],\
		['b1','b4','b2','c3'],['c2','c1','c4','d3']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 0)
Esempio n. 3
0
	def test_creation_of_all_possible_groups_of_3_by_3_and_4_by_4(self):
		groupedPredictor = GroupedPredictions()

		group3 = [('a'),('b'),('c'),('d'),('e'),('f'),('g'),('h'),('i')]
		allGroups = groupedPredictor.createAllGroupsOfSize3(group3)
		self.assertEqual(len(allGroups), 280)

		# Takes a long time to run
		"""
Esempio n. 4
0
	def test_calculate_group_score(self):
		groupedPredictor = GroupedPredictions()

		group = ['a', 'b', 'c']
		letterToIDMap = {'a':0, 'b':1, 'c':2}
		simValues = self.get_sim_values() 

		groupScoreManual = simValues[0][0] + simValues[0][1] + simValues[0][2] +\
			simValues[1][0] + simValues[1][1] + simValues[1][2] +\
			simValues[2][0] + simValues[2][1] + simValues[2][2] 
		groupScore = groupedPredictor.calculateGroupScore(group, simValues, 
			letterToIDMap)
		self.assertEqual(groupScore, groupScoreManual)
def runGroupedTest(data, method, model, accuracyMeasure):
    """
	Runs a grouped evaluation problem prediction on the given data and returns
	the accuracy using the selected accuracy measure.

	Args:
	data: The data to perform the prediction on.
	method: The selection of the prediction method to be used, valid arguments
	are 'random', 'wordCrossover' or 'word2vec'
	model: A trained word2vec model if method is 'word2vec' else None.
	accuracyMeasure: The measure by which the accuracy will be measured either
	'total' or 'pairs'.

	Returns:
	The accuracy as a float of using the selected prediction method on the
	given data using the selected accuracy measure.

	"""
    dataTest = GroupedPredictions()
    groupTestData = ds.createGroupedTestData(data)
    # sl.saveGroupedData('oxfordGroupedTest', groupTestData)
    if method == 'random':
        selections = dataTest.randomSelection(groupTestData, 3)
    elif method == 'wordCrossover':
        selections = dataTest.wordCrossoverSelection(groupTestData, 3)
    elif method == 'word2vec':
        selections = dataTest.word2VecSimilaritySelection(
            groupTestData, 3, model)

    if accuracyMeasure == 'total':
        return dataTest.calculateAccuracy(selections, groupTestData)
    elif accuracyMeasure == 'pairs':
        return dataTest.calculateAccuracyPairs(selections, groupTestData)
Esempio n. 6
0
	def test_grouped_calculate_accuracy_3_by_3(self):
		groupedPredictor = GroupedPredictions()
		testData = {'word1':['a1','a2','a3','b1','b2','b3','c1','c2','c3']}
		testData = self.formatTestData(testData)

		# all groups correct 
		results = {'word1':[['b3','b1','b2'],['c2','c1','c3'],['a1','a3','a2']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 1)		

		# 1 group correct 
		results = {'word1':[['b3','b1','b2'],['c2','c1','a3'],['a1','c3','a2']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 0)

		# 0 groups correct
		results = {'word1':[['b3','a3','c2'],['c3','a1','a2'],['c1','b3','b2']]}
		accuracy = groupedPredictor.calculateAccuracy(results, testData)
		self.assertEqual(accuracy, 0)
Esempio n. 7
0
	def test_group_by_similarity_brute_force(self):
		groupedPredictor = GroupedPredictions()
		examples = [{'sent':'a', 'tokens':['a']},{'sent':'b', 'tokens':['b']},
		{'sent':'c', 'tokens':['c']},{'sent':'d', 'tokens':['d']},
		{'sent':'e', 'tokens':['e']},{'sent':'f', 'tokens':['f']},
		{'sent':'g', 'tokens':['g']},{'sent':'h', 'tokens':['h']},
		{'sent':'i', 'tokens':['i']}]
		simValues = self.get_sim_values()
		results = groupedPredictor.groupBySimilarityBF(examples, simValues, 3, False)
		correctGroupings = [set(['a','b','d']),set(['c','e','h']),set(['f','g','i'])]
		for group in results:
			groupSet = set(group)					
			self.assertTrue(groupSet in correctGroupings)

		simValues = self.get_sim_values_inverse()
		results = groupedPredictor.groupBySimilarityBF(examples, simValues, 3, True)
		for group in results:
			groupSet = set(group)					
			self.assertTrue(groupSet in correctGroupings)	
Esempio n. 8
0
	def test_grouped_random_selection_4_by_4(self):	
		groupedPredictor = GroupedPredictions()
		groupSize = 4
		letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p']
		examples = [{'sent':letter, 'token':word_tokenize(letter)} for letter in letters]
		testData = {'word1':examples} 
		predictions = groupedPredictor.randomSelection(testData, groupSize)
		for key in predictions:
			self.assertTrue(key in testData)
			results = predictions[key]
			self.assertEqual(len(results), 4)
			used = []
			for group in results:
				self.assertEqual(len(group), 4)
				for item in group:
					self.assertTrue(item not in used)
					used.append(item)
		self.assertEqual(len(examples), len(used))
		self.assertEqual(set(letters), set(used))			
Esempio n. 9
0
	def test_grouped_random_selection_3_by_3(self):
		groupedPredictor = GroupedPredictions()
		groupSize = 3
		testData = {'word1':['a','b','c','d','e','f','g','h','i'], 
		'word2':['j','k','l','m','n','o','p','r','s']}
		for key in testData:
			examples = testData[key]
			examples = [{'sent':example, 'tokens':word_tokenize(example)} 
				for example in examples]
			testData[key] = examples
		predictions = groupedPredictor.randomSelection(testData, groupSize)
		for key in predictions:
			self.assertTrue(key in testData)
			results = predictions[key]
			self.assertEqual(len(results), 3)
			used = []
			for group in results:
				self.assertEqual(len(group), 3)
				for item in group:
					self.assertTrue(item not in used)
					used.append(item)
Esempio n. 10
0
	def test_grouped_calculate_accuracy_pairs_4_by_4(self):
		groupedPredictor = GroupedPredictions()
		testData = {'word1':['a1','a2','a3','a4','b1','b2','b3','b4',
		'c1','c2','c3','c4','d1','d2','d3','d4']}
		
		testData = self.formatTestData(testData)

		# all pairs correct
		results = {'word1':[['d2','d1','d4','d3'],['a4','a2','a3','a1'],\
		['b1','b4','b2','b3'],['c2','c1','c4','c3']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 24/float(24))

		# 12 pairs correct
		results = {'word1':[['d2','d1','d4','d3'],['a4','c2','a3','a1'],\
		['c3','b4','c4','b3'],['b2','b1','c1','a2']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 12/float(24))

		# 6 pairs correct
		results = {'word1':[['b3','c1','b2','b4'],['c3','a2','c2','d1'],\
		['b1','a3','a1','d3'],['d4','a4','c4','d2']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 6/float(24))
		
		# 0 pairs correct
		results = {'word1':[['a1','b1','c1','d1'],['a2','b2','c2','d2'],\
		['a3','b3','c3','d3'],['a4','b4','c4','d4']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 0/float(24))
Esempio n. 11
0
def runGroupedTest(data, method, model, accuracyMeasure):
	"""
	Runs a grouped evaluation problem prediction on the given data and returns 
	the accuracy using the selected accuracy measure.

	Args:
	data: The data to perform the prediction on. 
	method: The selection of the prediction method to be used, valid arguments 
	are 'random', 'wordCrossover' or 'word2vec' 
	model: A trained word2vec model if method is 'word2vec' else None.
	accuracyMeasure: The measure by which the accuracy will be measured either
	'total' or 'pairs'.

	Returns:
	The accuracy as a float of using the selected prediction method on the 
	given data using the selected accuracy measure.

	"""
	dataTest = GroupedPredictions()
	groupTestData = ds.createGroupedTestData(data)
	#sl.saveGroupedData('oxfordGroupedTest', groupTestData)
	if method == 'random':
		selections = dataTest.randomSelection(groupTestData, 3)
	elif method == 'wordCrossover': 
		selections = dataTest.wordCrossoverSelection(groupTestData, 3)
	elif method == 'word2vec':	
		selections = dataTest.word2VecSimilaritySelection(groupTestData, 3, model)
	
	if accuracyMeasure == 'total':
		return dataTest.calculateAccuracy(selections, groupTestData)
	elif accuracyMeasure == 'pairs':	
		return dataTest.calculateAccuracyPairs(selections, groupTestData)
Esempio n. 12
0
	def test_grouped_word2vec_selection_3_by_3(self):
		model = Word2Vec.load(self.brownFilePath)
		groupedPredictor = GroupedPredictions()
		examples1 = ['cat','dog','horse','apple','orange','lemon',\
		'England','France','Spain']
		examples2 = ['boy','girl','man','bus','car','boat','pencil','pen','rubber'] 
		allExamples = examples1 + examples2
		examples1 = [{'sent':word, 'tokens':word_tokenize(word)} for word in examples1]
		examples2 = [{'sent':word, 'tokens':word_tokenize(word)} for word in examples2]
		groupSize = 3
		testData = {'word1': examples1, 'word2': examples2}
		predictions = groupedPredictor.word2VecSimilaritySelection(testData, groupSize, model)
		used = []
		for key in predictions:
			self.assertTrue(key in testData)
			results = predictions[key]
			self.assertEqual(len(results), 3)
			for group in results:
				self.assertEqual(len(group), 3)
				for item in group:
					self.assertTrue(item not in used)
					used.append(item)
		self.assertEqual(len(allExamples), len(used))
		self.assertEqual(set(allExamples), set(used))
Esempio n. 13
0
	def test_grouped_calculate_accuracy_pairs_3_by_3(self):
		groupedPredictor = GroupedPredictions()
		testData = {'word1':['a1','a2','a3','b1','b2','b3','c1','c2','c3']}
		testData = self.formatTestData(testData)

		# all pairs correct 
		results = {'word1':[['b3','b1','b2'],['c2','c1','c3'],['a1','a3','a2']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 9/float(9))

		# 5 pairs correct
		results = {'word1':[['b3','c1','c2'],['a3','a2','a1'],['b1','c3','b2']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 5/float(9))

		# 3 pairs correct
		results = {'word1':[['b3','c1','b2'],['c3','a2','c2'],['b1','a3','a1']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 3/float(9))

		# 0 pairs correct
		results = {'word1':[['a2','c1','b3'],['b2','a3','c2'],['b1','c3','a1']]}
		accuracy = groupedPredictor.calculateAccuracyPairs(results, testData)
		self.assertEqual(accuracy, 0/float(9))