def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins)-1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1]))

        data = []
        now = datetime.datetime.now()
        start = Coordinate(47,-122)
        end = Coordinate(47,-123)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41,-74)
        end = Coordinate(42, -74)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins)-1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1]))

        data = []
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Example #3
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins) - 1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1]))

        data = []
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41, -74)
        end = Coordinate(42, -74)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Example #4
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins) - 1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1]))

        data = []
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Example #5
0
 def testEvaluateBins(self):
     sim = similarity.similarity([], 300)
     a = sim.evaluate_bins()
     self.assertTrue(not a)
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     c = sim.evaluate_bins()
     if sim.data:
         self.assertTrue(c)
 def testEvaluateBins(self):
     sim = similarity.similarity([], 300)
     a = sim.evaluate_bins()
     self.assertTrue(not a)
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     c = sim.evaluate_bins()
     if sim.data:
         self.assertTrue(c)
 def testGraph(self):
     if os.path.isfile('./histogram.png'):
         os.remove('./histogram.png')
     sim = similarity.similarity([], 300)
     sim.bin_data()
     # sim.graph()
     sim = similarity.similarity(self.data, 300)
     # sim.graph()
     sim.bin_data()
     # sim.graph()
     sim.delete_bins()
Example #8
0
 def testGraph(self):
     if os.path.isfile('./histogram.png'):
         os.remove('./histogram.png')
     sim = similarity.similarity([], 300)
     sim.bin_data()
     # sim.graph()
     sim = similarity.similarity(self.data, 300)
     # sim.graph()
     sim.bin_data()
     # sim.graph()
     sim.delete_bins()
Example #9
0
 def testMatch(self):
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     for bin in sim.bins:
         for b in bin:
             for c in bin:
                 self.assertTrue(sim.distance_helper(b, c))
Example #10
0
 def testDeleteBins(self):
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     sim.delete_bins()
     b = len(sim.bins)
     if sim.data:
         self.assertTrue(b == sim.num)
 def testMatch(self):
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     for bin in sim.bins:
         for b in bin:
             for c in bin:
                 self.assertTrue(sim.distance_helper(b,c))
 def testDeleteBins(self):
     sim = similarity.similarity(self.data, 300)
     sim.bin_data()
     sim.delete_bins()
     b = len(sim.bins)
     if sim.data:
         self.assertTrue(b == sim.num)
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = datetime.datetime.now()
        start = Coordinate(47,-122)
        end = Coordinate(47,-123)
        t1 = Trip(None, None, None, None, now, now, start, start)
        t2 = Trip(None, None, None, None, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        simmy = similarity.similarity([t2], 100)
        self.assertTrue(sim.data == simmy.data)
Example #14
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        t1 = Trip(None, None, None, None, now, now, start, start)
        t2 = Trip(None, None, None, None, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        simmy = similarity.similarity([t2], 100)
        self.assertTrue(sim.data == simmy.data)
def remove_noise(data, radius):
    if not data:
        return [], []
    sim = similarity.similarity(data, radius)
    sim.bin_data()
    print 'number of bins before filtering: ' + str(len(sim.bins))
    sim.delete_bins()
    print 'number of bins after filtering: ' + str(len(sim.bins))
    return sim.newdata, sim.bins
def remove_noise(data, radius, old=True):
    if not data:
        return [], []
    sim = similarity.similarity(data, radius, old)
    sim.bin_data()
    logging.debug('number of bins before filtering: %d' % len(sim.bins))
    sim.delete_bins()
    logging.debug('number of bins after filtering: %d' % len(sim.bins))
    return sim.newdata, sim.bins
Example #17
0
def remove_noise(data, radius):
    if not data:
        return [], []
    sim = similarity.similarity(data, radius)
    sim.bin_data()
    logging.debug('number of bins before filtering: %d' % len(sim.bins))
    sim.delete_bins()
    logging.debug('number of bins after filtering: %d' % len(sim.bins))
    return sim.newdata, sim.bins
 def testDistance(self):
     start = Coordinate(-122.259447, 37.875174)
     end1 = Coordinate(-122.259279, 37.875479)
     end2 = Coordinate(-122.252287, 37.869569)
     now = datetime.datetime.now()
     t1 = Trip(None, None, None, None, now, now, start, end1)
     t2 = Trip(None, None, None, None, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon))
     self.assertTrue(not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
Example #19
0
 def testDistance(self):
     start = [-122.259447, 37.875174]
     end1 = [-122.259279, 37.875479]
     end2 = [-122.252287, 37.869569]
     now = time.time()
     t1 = etatc._createTripEntry(self, now, now, start, end1)
     t2 = etatc._createTripEntry(self, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0]))
     self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
 def testDistance(self):
     start = [-122.259447, 37.875174]
     end1 = [-122.259279, 37.875479]
     end2 = [-122.252287, 37.869569]
     now = time.time()
     t1 = etatc._createTripEntry(self, now, now, start, end1)
     t2 = etatc._createTripEntry(self, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0]))
     self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
 def testElbowDistance(self):
     start = [-122,47]
     end = [-123,47]
     now = time.time()
     t = etatc._createTripEntry(self, now, now, start, end)
     data = [t] * 11
     bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
Example #22
0
 def testElbowDistance(self):
     start = [-122, 47]
     end = [-123, 47]
     now = time.time()
     t = etatc._createTripEntry(self, now, now, start, end)
     data = [t] * 11
     bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
Example #23
0
 def testDistance(self):
     start = Coordinate(-122.259447, 37.875174)
     end1 = Coordinate(-122.259279, 37.875479)
     end2 = Coordinate(-122.252287, 37.869569)
     now = datetime.datetime.now()
     t1 = Trip(None, None, None, None, now, now, start, end1)
     t2 = Trip(None, None, None, None, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon))
     self.assertTrue(
         not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
Example #24
0
 def testElbowDistance(self):
     start = Coordinate(47, -122)
     end = Coordinate(47, -123)
     now = datetime.datetime.now()
     t = Trip(None, None, None, None, now, now, start, end)
     data = [t] * 11
     bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
 def testElbowDistance(self):
     start = Coordinate(47,-122)
     end = Coordinate(47,-123)
     now = datetime.datetime.now()
     t = Trip(None, None, None, None, now, now, start, end)
     data = [t] * 11
     bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        logging.debug("STARTING init test")
        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = time.time()
        start = [-122,47]
        end = [-123,47]
        t1 = etatc._createTripEntry(self, now, now, start, start)
        t2 = etatc._createTripEntry(self, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        logging.debug("sim.data = %s" % sim.data)
        simmy = similarity.similarity([t2], 100)
        logging.debug("simmy.data = %s" % simmy.data)
        self.assertTrue(sim.data == simmy.data)
Example #27
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        logging.debug("STARTING init test")
        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        t1 = etatc._createTripEntry(self, now, now, start, start)
        t2 = etatc._createTripEntry(self, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        logging.debug("sim.data = %s" % sim.data)
        simmy = similarity.similarity([t2], 100)
        logging.debug("simmy.data = %s" % simmy.data)
        self.assertTrue(sim.data == simmy.data)
def main(colors):
    data = cp.read_data() #get the data
    colors = get_colors(data, colors) #make colors the right format
    data, bins = cp.remove_noise(data, .5, 300) #remove noise from data
    ###### the next few lines are to evaluate the binning
    sim = similarity.similarity(data, .5, 300) #create a similarity object
    sim.bins = bins #set the bins, since we calculated them above
    sim.evaluate_bins() #evaluate them to create the labels
    ######
    colors = update_colors(bins, colors) #update the colors to reflect deleted bins
    labels = sim.labels #get labels
    evaluate(numpy.array(colors), numpy.array(labels)) #evaluate the bins
    clusters, labels, data = cp.cluster(data, len(bins)) #cluster
    evaluate(numpy.array(colors), numpy.array(labels)) #evaluate clustering
    map_clusters_by_groundtruth(data, labels, colors, map_individuals=False) #map clusters, make last parameter true to map individual clusters
Example #29
0
def main(colors):
    data = cp.read_data()  #get the data
    colors = get_colors(data, colors)  #make colors the right format
    data, bins = cp.remove_noise(data, .5, 300)  #remove noise from data
    ###### the next few lines are to evaluate the binning
    sim = similarity.similarity(data, .5, 300)  #create a similarity object
    sim.bins = bins  #set the bins, since we calculated them above
    sim.evaluate_bins()  #evaluate them to create the labels
    ######
    colors = update_colors(bins,
                           colors)  #update the colors to reflect deleted bins
    labels = sim.labels  #get labels
    evaluate(numpy.array(colors), numpy.array(labels))  #evaluate the bins
    clusters, labels, data = cp.cluster(data, len(bins))  #cluster
    evaluate(numpy.array(colors), numpy.array(labels))  #evaluate clustering
    map_clusters_by_groundtruth(
        data, labels, colors, map_individuals=False
    )  #map clusters, make last parameter true to map individual clusters