def testRemoveNoise(self): data = cp.read_data(uuid=self.testUUID) # Test to make sure the code doesn't break on an empty dataset new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) == len(bins) == 0) #Test to make sure some or no data was filtered out, but that nothing was added after filtering new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) <= len(data)) # Make sure there are not more bins than data; that wouldnt make sense self.assertTrue(len(bins) <= len(data))
def testClusterToTourModel(self): data = cp.cluster_to_tour_model(None, None) #for a negative test case self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset data = cp.read_data(size=100)#this and the following lines form the positive test case data, bins = cp.remove_noise(data, 300) n, labels, data = cp.cluster(data, len(bins)) tour_dict = cp.main() self.assertTrue(len(tour_dict) <= n)
def testClusterToTourModel(self): # Test to make sure it doesn't crash on a empty dataset data = cp.cluster_to_tour_model(None, None) self.assertFalse(data) # Test with the real dataset data = cp.read_data(uuid=self.testUUID) data, bins = cp.remove_noise(data, self.RADIUS) n, labels, data = cp.cluster(data, len(bins)) tour_dict = cp.main(uuid=self.testUUID) self.assertTrue(len(tour_dict) <= n)
def testCluster(self): data = cp.read_data(size=100) clusters, labels, newdata = cp.cluster([], 10) self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset clusters, labels, newdata = cp.cluster(data, 10) self.assertTrue(clusters == 0 or 10 <= clusters <= 15) self.assertTrue(len(labels) == len(newdata)) self.assertTrue(cmp(newdata, data) == 0) data, bins = cp.remove_noise(data, 200) clusters, labels, newdata = cp.cluster(data, 20) self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
def testClusterToTourModelNew(self): data = cp.cluster_to_tour_model(None, None, old=False) #for a negative test case self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset user_name = "test1" data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case data, bins = cp.remove_noise(data, 300, old=False) n, labels, data = cp.cluster(data, len(bins), old=False) tour_dict = cp.main(uuid=user_name, old=False) print 'n = %s | len(tour_dict) = %s' % (n, len(tour_dict)) self.assertTrue(len(tour_dict) <= n)
def testClusterNew(self): data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case clusters, labels, newdata = cp.cluster([], 10, old=False) self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset clusters, labels, newdata = cp.cluster(data, 10, old=False) self.assertTrue(clusters == 0 or 10 <= clusters <= 15) self.assertTrue(len(labels) == len(newdata)) self.assertTrue(cmp(newdata, data) == 0) data, bins = cp.remove_noise(data, 200, old=False) clusters, labels, newdata = cp.cluster(data, 20, old=False) self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
def generate_route_clusters(user): print "In profile, generating route clusters for %s" % user data = cp.read_data(uuid=user) data, bins = cp.remove_noise(data, 300) num_clusters, labels, data = cp.cluster(data, len(bins)) clusters = {} for i in range(num_clusters): idx = labels.index(i) tripid = data[idx].trip_id clusters[tripid] = [] for j in range(len(labels)): if labels[j] == i: clusters[tripid].append(data[j].trip_id) update_user_routeClusters(user, clusters)
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth(data, labels, colors, map_individuals=False) #map clusters, make last parameter true to map individual clusters
def testCluster(self): data = cp.read_data(uuid=self.testUUID) # Test to make sure empty dataset doesn't crash the program clusters, labels, new_data = cp.cluster([], 10) self.assertTrue(len(new_data) == clusters == len(labels) == 0) # Test to make sure clustering with noise works clusters, labels, new_data = cp.cluster(data, 10) self.assertEqual(len(labels), len(new_data)) self.assertEqual(cmp(new_data, data), 0) # Test to make sure clustering without noise works data, bins = cp.remove_noise(data, self.RADIUS) clusters, labels, new_data = cp.cluster(data, len(bins)) self.assertTrue(clusters == 0 or len(bins) <= clusters <= len(bins) + 10)
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth( data, labels, colors, map_individuals=False ) #map clusters, make last parameter true to map individual clusters
def testRemoveNoise(self): data = cp.read_data(size=100) newdata, bins = cp.remove_noise(None, 200) self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset newdata, bins = cp.remove_noise(data, 100) self.assertTrue(len(newdata) <= len(data))
def testRemoveNoiseNew(self): data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case newdata, bins = cp.remove_noise(None, 200, old=False) self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset newdata, bins = cp.remove_noise(data, 100, old=False) self.assertTrue(len(newdata) <= len(data))