def testReadDataNew(self): db = edb.get_trip_new_db() trips = db.find() uuids = set() for t in trips: if self.testUUID in t: uuids.add(t[self.testUUID]) if len(uuids) == 0: uuids.add(None) self.uuids = uuids data = cp.read_data(size=10, old=False) print 'there are ' + str(len(data)) self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database uuid = 'baduuid' data = cp.read_data(uuid=uuid, old=False) self.assertTrue(not data) uuid = self.uuids.pop() data = cp.read_data(uuid=uuid, old=False) self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count()) self.uuids.add(uuid) sum = 0 for uuid in self.uuids: data = cp.read_data(uuid=uuid, old=False) sum += len(data) data = cp.read_data(uuid=uuid, old=False) self.assertTrue(0 <= len(data) <= db.find().count()) print "len(data) = %s" % len(data) self.assertTrue(len(data) == sum)
def testReadData(self): db = edb.get_fake_trips_db() trips = db.find() uuids = set() for t in trips: uuids.add(t['user_id']) if len(uuids) == 0: uuids.add(None) self.uuids = uuids data = cp.read_data(size=10) print 'there are ' + str(len(data)) self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database uuid = 'baduuid' data = cp.read_data(uuid=uuid) self.assertTrue(not data) uuid = self.uuids.pop() data = cp.read_data(uuid = uuid) self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count()) self.uuids.add(uuid) sum = 0 for uuid in self.uuids: data = cp.read_data(uuid=uuid) sum += len(data) data = cp.read_data() self.assertTrue(0 <= len(data) <= db.find().count()) self.assertTrue(len(data) == sum) for d in data: self.assertTrue(d.trip_start_location and d.trip_end_location and d.start_time)
def testReadData(self): data = cp.read_data(uuid=self.testUUID) # Test to make sure something is happening self.assertTrue(len(data) > 5) # Test to make sure that the trips are mapped to the correct uuid bad_data = cp.read_data(uuid="FakeUUID") self.assertEqual(len(bad_data), 0)
def __init__(self, *args, **kwargs): super(SimilarityTests, self).__init__(*args, **kwargs) self.data = cp.read_data(size=100) #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) print 'there are ' + str(len(self.data))
def testClusterToTourModel(self): data = cp.cluster_to_tour_model(None, None) #for a negative test case self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset data = cp.read_data(size=100)#this and the following lines form the positive test case data, bins = cp.remove_noise(data, 300) n, labels, data = cp.cluster(data, len(bins)) tour_dict = cp.main() self.assertTrue(len(tour_dict) <= n)
def setUp(self): self.testUUID = uuid.uuid4() self.data = cp.read_data() #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) logging.info("Found %s trips" % len(self.data)) self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def _setup(self): self.data = cp.read_data() #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) print 'there are ' + str(len(self.data)) self.testUUID = uuid.uuid4() self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def __init__(self, *args, **kwargs): super(RepresentativesTests, self).__init__(*args, **kwargs) self.data = cp.read_data(size=100) #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) print 'there are ' + str(len(self.data)) n = len(self.data)/5 self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n)
def __init__(self, *args, **kwargs): super(RepresentativesTests, self).__init__(*args, **kwargs) self.data = cp.read_data(size=100) #if len(self.data) == 0: # tg.create_fake_trips() # self.data = cp.read_data(size=100) print 'there are ' + str(len(self.data)) n = len(self.data) / 5 self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n)
def testClusterToTourModelNew(self): data = cp.cluster_to_tour_model(None, None, old=False) #for a negative test case self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset user_name = "test1" data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case data, bins = cp.remove_noise(data, 300, old=False) n, labels, data = cp.cluster(data, len(bins), old=False) tour_dict = cp.main(uuid=user_name, old=False) print 'n = %s | len(tour_dict) = %s' % (n, len(tour_dict)) self.assertTrue(len(tour_dict) <= n)
def testClusterNew(self): data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case clusters, labels, newdata = cp.cluster([], 10, old=False) self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset clusters, labels, newdata = cp.cluster(data, 10, old=False) self.assertTrue(clusters == 0 or 10 <= clusters <= 15) self.assertTrue(len(labels) == len(newdata)) self.assertTrue(cmp(newdata, data) == 0) data, bins = cp.remove_noise(data, 200, old=False) clusters, labels, newdata = cp.cluster(data, 20, old=False) self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
def testCluster(self): data = cp.read_data(size=100) clusters, labels, newdata = cp.cluster([], 10) self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset clusters, labels, newdata = cp.cluster(data, 10) self.assertTrue(clusters == 0 or 10 <= clusters <= 15) self.assertTrue(len(labels) == len(newdata)) self.assertTrue(cmp(newdata, data) == 0) data, bins = cp.remove_noise(data, 200) clusters, labels, newdata = cp.cluster(data, 20) self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
def testClusterToTourModel(self): # Test to make sure it doesn't crash on a empty dataset data = cp.cluster_to_tour_model(None, None) self.assertFalse(data) # Test with the real dataset data = cp.read_data(uuid=self.testUUID) data, bins = cp.remove_noise(data, self.RADIUS) n, labels, data = cp.cluster(data, len(bins)) tour_dict = cp.main(uuid=self.testUUID) self.assertTrue(len(tour_dict) <= n)
def testRemoveNoise(self): data = cp.read_data(uuid=self.testUUID) # Test to make sure the code doesn't break on an empty dataset new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) == len(bins) == 0) #Test to make sure some or no data was filtered out, but that nothing was added after filtering new_data, bins = cp.remove_noise(None, self.RADIUS) self.assertTrue(len(new_data) <= len(data)) # Make sure there are not more bins than data; that wouldnt make sense self.assertTrue(len(bins) <= len(data))
def generate_route_clusters(user): print "In profile, generating route clusters for %s" % user data = cp.read_data(uuid=user) data, bins = cp.remove_noise(data, 300) num_clusters, labels, data = cp.cluster(data, len(bins)) clusters = {} for i in range(num_clusters): idx = labels.index(i) tripid = data[idx].trip_id clusters[tripid] = [] for j in range(len(labels)): if labels[j] == i: clusters[tripid].append(data[j].trip_id) update_user_routeClusters(user, clusters)
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth(data, labels, colors, map_individuals=False) #map clusters, make last parameter true to map individual clusters
def testCluster(self): data = cp.read_data(uuid=self.testUUID) # Test to make sure empty dataset doesn't crash the program clusters, labels, new_data = cp.cluster([], 10) self.assertTrue(len(new_data) == clusters == len(labels) == 0) # Test to make sure clustering with noise works clusters, labels, new_data = cp.cluster(data, 10) self.assertEqual(len(labels), len(new_data)) self.assertEqual(cmp(new_data, data), 0) # Test to make sure clustering without noise works data, bins = cp.remove_noise(data, self.RADIUS) clusters, labels, new_data = cp.cluster(data, len(bins)) self.assertTrue(clusters == 0 or len(bins) <= clusters <= len(bins) + 10)
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth( data, labels, colors, map_individuals=False ) #map clusters, make last parameter true to map individual clusters
def testRemoveNoiseNew(self): data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case newdata, bins = cp.remove_noise(None, 200, old=False) self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset newdata, bins = cp.remove_noise(data, 100, old=False) self.assertTrue(len(newdata) <= len(data))
def __init__(self, *args, **kwargs): super(FeaturizationTests, self).__init__(*args, **kwargs) self.data = cp.read_data(size=100) print 'there are ' + str(len(self.data))
def setUp(self): self.data = cp.read_data() self.testUUID = uuid.uuid4() self.ts = esta.TimeSeries.get_time_series(self.testUUID) print('there are ' + str(len(self.data)))
def testRemoveNoise(self): data = cp.read_data(size=100) newdata, bins = cp.remove_noise(None, 200) self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset newdata, bins = cp.remove_noise(data, 100) self.assertTrue(len(newdata) <= len(data))