def testBinData(self): sim = similarity.similarity([], 300) self.assertTrue(not sim.bins) sim = similarity.similarity(self.data, 300) sim.bin_data() sum = 0 for bin in sim.bins: sum += len(bin) self.assertTrue(sum == len(sim.data)) testbins = set() for bin in sim.bins: for b in bin: testbins.add(b) self.assertTrue(len(testbins) == len(sim.data)) for i in range(len(sim.bins)-1): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1])) data = [] now = datetime.datetime.now() start = Coordinate(47,-122) end = Coordinate(47,-123) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) start = Coordinate(41,-74) end = Coordinate(42, -74) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2)
def testBinData(self): sim = similarity.similarity([], 300) self.assertTrue(not sim.bins) sim = similarity.similarity(self.data, 300) sim.bin_data() sum = 0 for bin in sim.bins: sum += len(bin) self.assertTrue(sum == len(sim.data)) testbins = set() for bin in sim.bins: for b in bin: testbins.add(b) self.assertTrue(len(testbins) == len(sim.data)) for i in range(len(sim.bins)-1): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1])) data = [] now = time.time() start = [-122, 47] end = [-123, 47] for i in range(10): data.append(etatc._createTripEntry(self, now, now, start, end)) start = [-74, 41] end = [-74, 42] for i in range(10): data.append(etatc._createTripEntry(self, now, now, start, end)) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2)
def testBinData(self): sim = similarity.similarity([], 300) self.assertTrue(not sim.bins) sim = similarity.similarity(self.data, 300) sim.bin_data() sum = 0 for bin in sim.bins: sum += len(bin) self.assertTrue(sum == len(sim.data)) testbins = set() for bin in sim.bins: for b in bin: testbins.add(b) self.assertTrue(len(testbins) == len(sim.data)) for i in range(len(sim.bins) - 1): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1])) data = [] now = datetime.datetime.now() start = Coordinate(47, -122) end = Coordinate(47, -123) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) start = Coordinate(41, -74) end = Coordinate(42, -74) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2)
def testBinData(self): sim = similarity.similarity([], 300) self.assertTrue(not sim.bins) sim = similarity.similarity(self.data, 300) sim.bin_data() sum = 0 for bin in sim.bins: sum += len(bin) self.assertTrue(sum == len(sim.data)) testbins = set() for bin in sim.bins: for b in bin: testbins.add(b) self.assertTrue(len(testbins) == len(sim.data)) for i in range(len(sim.bins) - 1): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1])) data = [] now = time.time() start = [-122, 47] end = [-123, 47] for i in range(10): data.append(etatc._createTripEntry(self, now, now, start, end)) start = [-74, 41] end = [-74, 42] for i in range(10): data.append(etatc._createTripEntry(self, now, now, start, end)) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2)
def testEvaluateBins(self): sim = similarity.similarity([], 300) a = sim.evaluate_bins() self.assertTrue(not a) sim = similarity.similarity(self.data, 300) sim.bin_data() c = sim.evaluate_bins() if sim.data: self.assertTrue(c)
def testGraph(self): if os.path.isfile('./histogram.png'): os.remove('./histogram.png') sim = similarity.similarity([], 300) sim.bin_data() # sim.graph() sim = similarity.similarity(self.data, 300) # sim.graph() sim.bin_data() # sim.graph() sim.delete_bins()
def testMatch(self): sim = similarity.similarity(self.data, 300) sim.bin_data() for bin in sim.bins: for b in bin: for c in bin: self.assertTrue(sim.distance_helper(b, c))
def testDeleteBins(self): sim = similarity.similarity(self.data, 300) sim.bin_data() sim.delete_bins() b = len(sim.bins) if sim.data: self.assertTrue(b == sim.num)
def testMatch(self): sim = similarity.similarity(self.data, 300) sim.bin_data() for bin in sim.bins: for b in bin: for c in bin: self.assertTrue(sim.distance_helper(b,c))
def testInit(self): try: sim = similarity.similarity([], 'b') except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) now = datetime.datetime.now() start = Coordinate(47,-122) end = Coordinate(47,-123) t1 = Trip(None, None, None, None, now, now, start, start) t2 = Trip(None, None, None, None, now, now, start, end) sim = similarity.similarity([t1, t2], 100) simmy = similarity.similarity([t2], 100) self.assertTrue(sim.data == simmy.data)
def testInit(self): try: sim = similarity.similarity([], 'b') except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) now = datetime.datetime.now() start = Coordinate(47, -122) end = Coordinate(47, -123) t1 = Trip(None, None, None, None, now, now, start, start) t2 = Trip(None, None, None, None, now, now, start, end) sim = similarity.similarity([t1, t2], 100) simmy = similarity.similarity([t2], 100) self.assertTrue(sim.data == simmy.data)
def remove_noise(data, radius): if not data: return [], [] sim = similarity.similarity(data, radius) sim.bin_data() print 'number of bins before filtering: ' + str(len(sim.bins)) sim.delete_bins() print 'number of bins after filtering: ' + str(len(sim.bins)) return sim.newdata, sim.bins
def remove_noise(data, radius, old=True): if not data: return [], [] sim = similarity.similarity(data, radius, old) sim.bin_data() logging.debug('number of bins before filtering: %d' % len(sim.bins)) sim.delete_bins() logging.debug('number of bins after filtering: %d' % len(sim.bins)) return sim.newdata, sim.bins
def remove_noise(data, radius): if not data: return [], [] sim = similarity.similarity(data, radius) sim.bin_data() logging.debug('number of bins before filtering: %d' % len(sim.bins)) sim.delete_bins() logging.debug('number of bins after filtering: %d' % len(sim.bins)) return sim.newdata, sim.bins
def testDistance(self): start = Coordinate(-122.259447, 37.875174) end1 = Coordinate(-122.259279, 37.875479) end2 = Coordinate(-122.252287, 37.869569) now = datetime.datetime.now() t1 = Trip(None, None, None, None, now, now, start, end1) t2 = Trip(None, None, None, None, now, now, start, end2) sim = similarity.similarity(self.data, 300) self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon)) self.assertTrue(not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
def testDistance(self): start = [-122.259447, 37.875174] end1 = [-122.259279, 37.875479] end2 = [-122.252287, 37.869569] now = time.time() t1 = etatc._createTripEntry(self, now, now, start, end1) t2 = etatc._createTripEntry(self, now, now, start, end2) sim = similarity.similarity(self.data, 300) self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0])) self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
def testElbowDistance(self): start = [-122,47] end = [-123,47] now = time.time() t = etatc._createTripEntry(self, now, now, start, end) data = [t] * 11 bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) sim.bins = bins sim.delete_bins() self.assertTrue(sim.num == 2)
def testElbowDistance(self): start = [-122, 47] end = [-123, 47] now = time.time() t = etatc._createTripEntry(self, now, now, start, end) data = [t] * 11 bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) sim.bins = bins sim.delete_bins() self.assertTrue(sim.num == 2)
def testDistance(self): start = Coordinate(-122.259447, 37.875174) end1 = Coordinate(-122.259279, 37.875479) end2 = Coordinate(-122.252287, 37.869569) now = datetime.datetime.now() t1 = Trip(None, None, None, None, now, now, start, end1) t2 = Trip(None, None, None, None, now, now, start, end2) sim = similarity.similarity(self.data, 300) self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon)) self.assertTrue( not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
def testElbowDistance(self): start = Coordinate(47, -122) end = Coordinate(47, -123) now = datetime.datetime.now() t = Trip(None, None, None, None, now, now, start, end) data = [t] * 11 bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) sim.bins = bins sim.delete_bins() self.assertTrue(sim.num == 2)
def testElbowDistance(self): start = Coordinate(47,-122) end = Coordinate(47,-123) now = datetime.datetime.now() t = Trip(None, None, None, None, now, now, start, end) data = [t] * 11 bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) sim.bins = bins sim.delete_bins() self.assertTrue(sim.num == 2)
def testInit(self): try: sim = similarity.similarity([], 'b') except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) logging.debug("STARTING init test") sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) now = time.time() start = [-122,47] end = [-123,47] t1 = etatc._createTripEntry(self, now, now, start, start) t2 = etatc._createTripEntry(self, now, now, start, end) sim = similarity.similarity([t1, t2], 100) logging.debug("sim.data = %s" % sim.data) simmy = similarity.similarity([t2], 100) logging.debug("simmy.data = %s" % simmy.data) self.assertTrue(sim.data == simmy.data)
def testInit(self): try: sim = similarity.similarity([], 'b') except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) logging.debug("STARTING init test") sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) now = time.time() start = [-122, 47] end = [-123, 47] t1 = etatc._createTripEntry(self, now, now, start, start) t2 = etatc._createTripEntry(self, now, now, start, end) sim = similarity.similarity([t1, t2], 100) logging.debug("sim.data = %s" % sim.data) simmy = similarity.similarity([t2], 100) logging.debug("simmy.data = %s" % simmy.data) self.assertTrue(sim.data == simmy.data)
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth(data, labels, colors, map_individuals=False) #map clusters, make last parameter true to map individual clusters
def main(colors): data = cp.read_data() #get the data colors = get_colors(data, colors) #make colors the right format data, bins = cp.remove_noise(data, .5, 300) #remove noise from data ###### the next few lines are to evaluate the binning sim = similarity.similarity(data, .5, 300) #create a similarity object sim.bins = bins #set the bins, since we calculated them above sim.evaluate_bins() #evaluate them to create the labels ###### colors = update_colors(bins, colors) #update the colors to reflect deleted bins labels = sim.labels #get labels evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins clusters, labels, data = cp.cluster(data, len(bins)) #cluster evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering map_clusters_by_groundtruth( data, labels, colors, map_individuals=False ) #map clusters, make last parameter true to map individual clusters