def testReadDataNew(self):
        db = edb.get_trip_new_db()
        trips = db.find()
        uuids = set()
        for t in trips:
            if self.testUUID in t:
                uuids.add(t[self.testUUID])
        if len(uuids) == 0:
            uuids.add(None)
        self.uuids = uuids

        data = cp.read_data(size=10, old=False)
        print 'there are ' + str(len(data))
        self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database
        uuid = 'baduuid'
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(not data)
        uuid = self.uuids.pop()
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count())
        self.uuids.add(uuid)
        sum = 0
        for uuid in self.uuids:
            data = cp.read_data(uuid=uuid, old=False)
            sum += len(data)
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(0 <= len(data) <= db.find().count())
        print "len(data) = %s" % len(data)
        self.assertTrue(len(data) == sum)
    def testReadData(self):
        db = edb.get_fake_trips_db()
        trips = db.find()
        uuids = set()
        for t in trips:
            uuids.add(t['user_id'])
        if len(uuids) == 0:
            uuids.add(None)
        self.uuids = uuids

        data = cp.read_data(size=10)
        print 'there are ' + str(len(data))
        self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database
        uuid = 'baduuid'
        data = cp.read_data(uuid=uuid)
        self.assertTrue(not data)
        uuid = self.uuids.pop()
        data = cp.read_data(uuid = uuid)
        self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count())
        self.uuids.add(uuid)
        sum = 0
        for uuid in self.uuids:
            data = cp.read_data(uuid=uuid)
            sum += len(data)
        data = cp.read_data()
        self.assertTrue(0 <= len(data) <= db.find().count())
        self.assertTrue(len(data) == sum)
        for d in data:
            self.assertTrue(d.trip_start_location and d.trip_end_location and d.start_time)
    def testReadData(self):
        data = cp.read_data(uuid=self.testUUID)

        # Test to make sure something is happening
        self.assertTrue(len(data) > 5)

        # Test to make sure that the trips are mapped to the correct uuid
        bad_data = cp.read_data(uuid="FakeUUID")
        self.assertEqual(len(bad_data), 0)
    def testReadData(self): 
	data = cp.read_data(uuid=self.testUUID)
	
	# Test to make sure something is happening
	self.assertTrue(len(data) > 5)

	# Test to make sure that the trips are mapped to the correct uuid
	bad_data = cp.read_data(uuid="FakeUUID")
	self.assertEqual(len(bad_data), 0)
 def __init__(self, *args, **kwargs):
     super(SimilarityTests, self).__init__(*args, **kwargs)
     self.data = cp.read_data(size=100)
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     print 'there are ' + str(len(self.data))
Exemple #6
0
 def __init__(self, *args, **kwargs):
     super(SimilarityTests, self).__init__(*args, **kwargs)
     self.data = cp.read_data(size=100)
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     print 'there are ' + str(len(self.data))
 def testClusterToTourModel(self):
     data = cp.cluster_to_tour_model(None, None) #for a negative test case
     self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset
     data = cp.read_data(size=100)#this and the following lines form the positive test case
     data, bins = cp.remove_noise(data, 300)
     n, labels, data = cp.cluster(data, len(bins))
     tour_dict = cp.main()
     self.assertTrue(len(tour_dict) <= n)
Exemple #8
0
 def setUp(self):
     self.testUUID = uuid.uuid4()
     self.data = cp.read_data()
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     logging.info("Found %s trips" % len(self.data))
     self.ts = esta.TimeSeries.get_time_series(self.testUUID)
Exemple #9
0
def _setup(self):
    self.data = cp.read_data()
    #if len(self.data) == 0:
    #    tg.create_fake_trips()
    #    self.data = cp.read_data(size=100)
    print 'there are ' + str(len(self.data))
    self.testUUID = uuid.uuid4()
    self.ts = esta.TimeSeries.get_time_series(self.testUUID)
Exemple #10
0
def _setup(self):
    self.data = cp.read_data()
    #if len(self.data) == 0:
    #    tg.create_fake_trips()
    #    self.data = cp.read_data(size=100)
    print 'there are ' + str(len(self.data))
    self.testUUID = uuid.uuid4()
    self.ts = esta.TimeSeries.get_time_series(self.testUUID)
 def setUp(self):
     self.testUUID = uuid.uuid4()
     self.data = cp.read_data()
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     logging.info("Found %s trips" % len(self.data))
     self.ts = esta.TimeSeries.get_time_series(self.testUUID)
 def __init__(self, *args, **kwargs):
     super(RepresentativesTests, self).__init__(*args, **kwargs)
     self.data = cp.read_data(size=100)
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     print 'there are ' + str(len(self.data))
     n = len(self.data)/5
     self.labels = feat.featurization(self.data).cluster(min_clusters=n, max_clusters=n)        
 def __init__(self, *args, **kwargs):
     super(RepresentativesTests, self).__init__(*args, **kwargs)
     self.data = cp.read_data(size=100)
     #if len(self.data) == 0:
     #    tg.create_fake_trips()
     #    self.data = cp.read_data(size=100)
     print 'there are ' + str(len(self.data))
     n = len(self.data) / 5
     self.labels = feat.featurization(self.data).cluster(min_clusters=n,
                                                         max_clusters=n)
 def testClusterToTourModelNew(self):
     
     data = cp.cluster_to_tour_model(None, None, old=False) #for a negative test case
     self.assertTrue(not data) #checking that the code doesn't crash on an empty dataset
     user_name = "test1"
     data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case
     data, bins = cp.remove_noise(data, 300, old=False)
     n, labels, data = cp.cluster(data, len(bins), old=False)
     tour_dict = cp.main(uuid=user_name, old=False)
     print 'n = %s | len(tour_dict) = %s' % (n, len(tour_dict))
     self.assertTrue(len(tour_dict) <= n)
 def testClusterNew(self):
     data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case
     clusters, labels, newdata = cp.cluster([], 10, old=False)
     self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset
     clusters, labels, newdata = cp.cluster(data, 10, old=False)
     self.assertTrue(clusters == 0 or 10 <= clusters <= 15)
     self.assertTrue(len(labels) == len(newdata))
     self.assertTrue(cmp(newdata, data) == 0)
     data, bins = cp.remove_noise(data, 200, old=False)
     clusters, labels, newdata = cp.cluster(data, 20, old=False)
     self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
 def testCluster(self):
     data = cp.read_data(size=100)
     clusters, labels, newdata = cp.cluster([], 10)
     self.assertTrue(len(newdata) == clusters == len(labels) == 0) #checking that the code doesn't crash on an empty dataset
     clusters, labels, newdata = cp.cluster(data, 10)
     self.assertTrue(clusters == 0 or 10 <= clusters <= 15)
     self.assertTrue(len(labels) == len(newdata))
     self.assertTrue(cmp(newdata, data) == 0)
     data, bins = cp.remove_noise(data, 200)
     clusters, labels, newdata = cp.cluster(data, 20)
     self.assertTrue(clusters == 0 or 20 <= clusters <= 30)
    def testClusterToTourModel(self):
	# Test to make sure it doesn't crash on a empty dataset
	data = cp.cluster_to_tour_model(None, None)
	self.assertFalse(data)
	
	# Test with the real dataset
	data = cp.read_data(uuid=self.testUUID)
	data, bins = cp.remove_noise(data, self.RADIUS)
	n, labels, data = cp.cluster(data, len(bins))
	tour_dict = cp.main(uuid=self.testUUID)
	self.assertTrue(len(tour_dict) <= n)
    def testClusterToTourModel(self):
        # Test to make sure it doesn't crash on a empty dataset
        data = cp.cluster_to_tour_model(None, None)
        self.assertFalse(data)

        # Test with the real dataset
        data = cp.read_data(uuid=self.testUUID)
        data, bins = cp.remove_noise(data, self.RADIUS)
        n, labels, data = cp.cluster(data, len(bins))
        tour_dict = cp.main(uuid=self.testUUID)
        self.assertTrue(len(tour_dict) <= n)
    def testRemoveNoise(self):
	data = cp.read_data(uuid=self.testUUID)

	# Test to make sure the code doesn't break on an empty dataset
	new_data, bins = cp.remove_noise(None, self.RADIUS)
	self.assertTrue(len(new_data) == len(bins) == 0)	
	
	#Test to make sure some or no data was filtered out, but that nothing was added after filtering
	new_data, bins = cp.remove_noise(None, self.RADIUS)
	self.assertTrue(len(new_data) <= len(data))
	
	# Make sure there are not more bins than data; that wouldnt make sense
	self.assertTrue(len(bins) <= len(data))
    def testRemoveNoise(self):
        data = cp.read_data(uuid=self.testUUID)

        # Test to make sure the code doesn't break on an empty dataset
        new_data, bins = cp.remove_noise(None, self.RADIUS)
        self.assertTrue(len(new_data) == len(bins) == 0)

        #Test to make sure some or no data was filtered out, but that nothing was added after filtering
        new_data, bins = cp.remove_noise(None, self.RADIUS)
        self.assertTrue(len(new_data) <= len(data))

        # Make sure there are not more bins than data; that wouldnt make sense
        self.assertTrue(len(bins) <= len(data))
Exemple #21
0
def generate_route_clusters(user):
    print "In profile, generating route clusters for %s" % user
    data = cp.read_data(uuid=user)
    data, bins = cp.remove_noise(data, 300)
    num_clusters, labels, data = cp.cluster(data, len(bins))
    clusters = {}
    for i in range(num_clusters):
        idx = labels.index(i)
        tripid = data[idx].trip_id
        clusters[tripid] = []
        for j in range(len(labels)):
            if labels[j] == i:
                clusters[tripid].append(data[j].trip_id)
    update_user_routeClusters(user, clusters)
Exemple #22
0
def generate_route_clusters(user):
    print "In profile, generating route clusters for %s" % user
    data = cp.read_data(uuid=user)
    data, bins = cp.remove_noise(data, 300)
    num_clusters, labels, data = cp.cluster(data, len(bins))
    clusters = {}
    for i in range(num_clusters):
        idx = labels.index(i)
        tripid = data[idx].trip_id
        clusters[tripid] = []
        for j in range(len(labels)):
            if labels[j] == i:
                clusters[tripid].append(data[j].trip_id)
    update_user_routeClusters(user, clusters)
def main(colors):
    data = cp.read_data() #get the data
    colors = get_colors(data, colors) #make colors the right format
    data, bins = cp.remove_noise(data, .5, 300) #remove noise from data
    ###### the next few lines are to evaluate the binning
    sim = similarity.similarity(data, .5, 300) #create a similarity object
    sim.bins = bins #set the bins, since we calculated them above
    sim.evaluate_bins() #evaluate them to create the labels
    ######
    colors = update_colors(bins, colors) #update the colors to reflect deleted bins
    labels = sim.labels #get labels
    evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins
    clusters, labels, data = cp.cluster(data, len(bins)) #cluster
    evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering
    map_clusters_by_groundtruth(data, labels, colors, map_individuals=False) #map clusters, make last parameter true to map individual clusters
    def testCluster(self):
	data = cp.read_data(uuid=self.testUUID)

	# Test to make sure empty dataset doesn't crash the program
	clusters, labels, new_data = cp.cluster([], 10)
	self.assertTrue(len(new_data) == clusters == len(labels) == 0)

	# Test to make sure clustering with noise works
	clusters, labels, new_data = cp.cluster(data, 10)
	self.assertEqual(len(labels), len(new_data))
	self.assertEqual(cmp(new_data, data), 0)
	
	# Test to make sure clustering without noise works
	data, bins = cp.remove_noise(data, self.RADIUS)
	clusters, labels, new_data = cp.cluster(data, len(bins))
	self.assertTrue(clusters == 0 or len(bins) <= clusters <= len(bins) + 10)
    def testCluster(self):
        data = cp.read_data(uuid=self.testUUID)

        # Test to make sure empty dataset doesn't crash the program
        clusters, labels, new_data = cp.cluster([], 10)
        self.assertTrue(len(new_data) == clusters == len(labels) == 0)

        # Test to make sure clustering with noise works
        clusters, labels, new_data = cp.cluster(data, 10)
        self.assertEqual(len(labels), len(new_data))
        self.assertEqual(cmp(new_data, data), 0)

        # Test to make sure clustering without noise works
        data, bins = cp.remove_noise(data, self.RADIUS)
        clusters, labels, new_data = cp.cluster(data, len(bins))
        self.assertTrue(clusters == 0
                        or len(bins) <= clusters <= len(bins) + 10)
Exemple #26
0
def main(colors):
    data = cp.read_data()  #get the data
    colors = get_colors(data, colors)  #make colors the right format
    data, bins = cp.remove_noise(data, .5, 300)  #remove noise from data
    ###### the next few lines are to evaluate the binning
    sim = similarity.similarity(data, .5, 300)  #create a similarity object
    sim.bins = bins  #set the bins, since we calculated them above
    sim.evaluate_bins()  #evaluate them to create the labels
    ######
    colors = update_colors(bins,
                           colors)  #update the colors to reflect deleted bins
    labels = sim.labels  #get labels
    evaluate(numpy.array(colors), numpy.array(labels))  #evaluate the bins
    clusters, labels, data = cp.cluster(data, len(bins))  #cluster
    evaluate(numpy.array(colors), numpy.array(labels))  #evaluate clustering
    map_clusters_by_groundtruth(
        data, labels, colors, map_individuals=False
    )  #map clusters, make last parameter true to map individual clusters
 def testRemoveNoiseNew(self):
     data = cp.read_data(uuid=self.testUUID, size=100, old=False) #this and the following lines form the positive test case
     newdata, bins = cp.remove_noise(None, 200, old=False)
     self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset
     newdata, bins = cp.remove_noise(data, 100, old=False)
     self.assertTrue(len(newdata) <= len(data))
 def __init__(self, *args, **kwargs):
     super(FeaturizationTests, self).__init__(*args, **kwargs)
     self.data = cp.read_data(size=100)
     print 'there are ' + str(len(self.data))
 def setUp(self):
     self.data = cp.read_data()
     self.testUUID = uuid.uuid4()
     self.ts = esta.TimeSeries.get_time_series(self.testUUID)
     print('there are ' + str(len(self.data)))
 def testRemoveNoise(self):
     data = cp.read_data(size=100)
     newdata, bins = cp.remove_noise(None, 200)
     self.assertTrue(len(newdata) == len(bins) == 0) #checking that the code doesn't crash on an empty dataset
     newdata, bins = cp.remove_noise(data, 100)
     self.assertTrue(len(newdata) <= len(data))
 def setUp(self):
     self.data = cp.read_data()
     self.testUUID = uuid.uuid4()
     self.ts = esta.TimeSeries.get_time_series(self.testUUID)
     print('there are ' + str(len(self.data)))