def testBinData(self): sim = similarity.similarity([], 300) self.assertTrue(not sim.bins) sim = similarity.similarity(self.data, 300) sim.bin_data() sum = 0 for bin in sim.bins: sum += len(bin) self.assertTrue(sum == len(sim.data)) testbins = set() for bin in sim.bins: for b in bin: testbins.add(b) self.assertTrue(len(testbins) == len(sim.data)) for i in range(len(sim.bins) - 1): self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1])) data = [] now = datetime.datetime.now() start = Coordinate(47, -122) end = Coordinate(47, -123) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) start = Coordinate(41, -74) end = Coordinate(42, -74) for i in range(10): a = Trip(None, None, None, None, now, now, start, end) data.append(a) sim = similarity.similarity(data, 300) sim.bin_data() self.assertTrue(len(sim.bins) == 2)
def testDistance(self): start = Coordinate(-122.259447, 37.875174) end1 = Coordinate(-122.259279, 37.875479) end2 = Coordinate(-122.252287, 37.869569) now = datetime.datetime.now() t1 = Trip(None, None, None, None, now, now, start, end1) t2 = Trip(None, None, None, None, now, now, start, end2) sim = similarity.similarity(self.data, 300) self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon)) self.assertTrue( not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
def testLocations(self): repy = rep.representatives(self.data, self.labels) repy.list_clusters() repy.get_reps() repy.locations() total = 0 for bin in repy.bins: for i in range(len(bin)): b = bin[i] if b[0] == 'start': a = repy.reps[b[1]].trip_start_location if b[0] == 'end': a = repy.reps[b[1]].trip_end_location for j in range(i): c = bin[j] if c[0] == 'start': d = repy.reps[c[1]].trip_start_location if c[0] == 'end': d = repy.reps[c[1]].trip_end_location self.assertTrue( repy.distance(a.lat, a.lon, d.lat, d.lon) < 300) total += len(bin) self.assertTrue(total == 2 * repy.num_clusters) for i in range(repy.num_clusters): self.assertTrue( sum(bin.count(('start', i)) for bin in repy.bins) == 1) self.assertTrue( sum(bin.count(('end', i)) for bin in repy.bins) == 1) self.assertTrue(len(repy.locs) == len(repy.bins)) tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2), Coordinate(30, 40)) tripb = Trip(None, None, None, None, None, None, Coordinate(1.0000002, 2.0000002), Coordinate(55.0000002, 85.0000002)) tripc = Trip(None, None, None, None, None, None, Coordinate(30.0000002, 40.0000002), Coordinate(55, 85)) data = [tripa, tripb, tripc] labels = [0, 1, 2] repy = rep.representatives(data, labels) repy.list_clusters() repy.get_reps() repy.locations() self.assertTrue(repy.bins[0] == [('start', 0), ('start', 1)]) self.assertTrue(repy.bins[1] == [('end', 0), ('start', 2)]) self.assertTrue(repy.bins[2] == [('end', 1), ('end', 2)]) self.assertTrue(round(repy.locs[0].lat, 7) == 1.0000001) self.assertTrue(round(repy.locs[0].lon, 7) == 2.0000001) self.assertTrue(round(repy.locs[1].lat, 7) == 30.0000001) self.assertTrue(round(repy.locs[1].lon, 7) == 40.0000001) self.assertTrue(round(repy.locs[2].lat, 7) == 55.0000001) self.assertTrue(round(repy.locs[2].lon, 7) == 85.0000001)
def get_reps(self): self.reps = [] if not self.data: return for cluster in self.clusters: points = [[], [], [], []] for c in cluster: if self.is_old: points[0].append(c.trip_start_location.lat) points[1].append(c.trip_start_location.lon) points[2].append(c.trip_end_location.lat) points[3].append(c.trip_end_location.lon) else: # We want (lat, lon) to be consistent with old above. # But in the new, our data is in geojson so it is (lon, lat). # Fix it by flipping the order of the indices points[0].append(c.data.start_loc["coordinates"][1]) points[1].append(c.data.start_loc["coordinates"][0]) points[2].append(c.data.end_loc["coordinates"][1]) points[3].append(c.data.end_loc["coordinates"][0]) centers = numpy.mean(points, axis=1) a = Trip(None, None, None, None, None, None, Coordinate(centers[0], centers[1]), Coordinate(centers[2], centers[3])) self.reps.append(a)
def testInit(self): try: sim = similarity.similarity([], 'b') except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) sim = similarity.similarity([], 100) self.assertTrue(len(sim.data) == 0) now = datetime.datetime.now() start = Coordinate(47, -122) end = Coordinate(47, -123) t1 = Trip(None, None, None, None, now, now, start, start) t2 = Trip(None, None, None, None, now, now, start, end) sim = similarity.similarity([t1, t2], 100) simmy = similarity.similarity([t2], 100) self.assertTrue(sim.data == simmy.data)
def testElbowDistance(self): start = Coordinate(47, -122) end = Coordinate(47, -123) now = datetime.datetime.now() t = Trip(None, None, None, None, now, now, start, end) data = [t] * 11 bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]] sim = similarity.similarity(data, 300) sim.bins = bins sim.delete_bins() self.assertTrue(sim.num == 2)
def testMatch(self): tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2), Coordinate(3, 4)) tripb = Trip(None, None, None, None, None, None, Coordinate(3, 4), Coordinate(1, 2)) tripc = Trip(None, None, None, None, None, None, Coordinate(1, 2), Coordinate(9, 10)) data = [tripa, tripb, tripc] labels = [0, 1, 2] repy = rep.representatives(data, labels) repy.list_clusters() repy.get_reps() bin = [('start', 0), ('start', 2)] self.assertTrue(repy.match('end', 1, bin)) bin = [('start', 0), ('end', 0)] self.assertTrue(not repy.match('end', 1, bin)) bin = [('start', 0), ('start', 1)] self.assertTrue(not repy.match('end', 2, bin))
def testReps(self): repy = rep.representatives(self.data, self.labels) repy.list_clusters() repy.get_reps() self.assertTrue(len(repy.reps) == len(repy.clusters)) clusters = [0] tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2), Coordinate(3, 4)) tripb = Trip(None, None, None, None, None, None, Coordinate(9, 10), Coordinate(5, 8)) tripc = Trip(None, None, None, None, None, None, Coordinate(5, 6), Coordinate(4, 6)) data = [tripa, tripb, tripc] labels = [0, 0, 0] repy = rep.representatives(data, labels) repy.list_clusters() repy.get_reps() self.assertTrue(repy.reps[0].trip_start_location.lat == 5) self.assertTrue(repy.reps[0].trip_start_location.lon == 6) self.assertTrue(repy.reps[0].trip_end_location.lat == 4) self.assertTrue(repy.reps[0].trip_end_location.lon == 6)
def testCluster(self): feat = featurization.featurization(self.data) feat.cluster(min_clusters=2, max_clusters=10) self.assertTrue(len(feat.labels) == len(feat.points)) self.assertTrue(feat.clusters == len(set(feat.labels))) a = feat.cluster(name='kmeans', min_clusters=5, max_clusters=20) self.assertTrue(len(feat.labels) == len(feat.points)) self.assertTrue(feat.clusters == len(set(feat.labels))) b = feat.cluster(name='nonname', min_clusters=5, max_clusters=20) self.assertTrue(a == b) #defaults to kmeans with invalid clustering method feat.cluster(min_clusters=len(self.data)+1) c = feat.cluster(min_clusters = 0, max_clusters=20) d = feat.cluster(min_clusters = 2, max_clusters=20) self.assertTrue(c == d) try: feat.cluster(min_clusters = 10, max_clusters=2) except ValueError: self.assertTrue(True) except Exception: self.assertTrue(False) data = [] start = Coordinate(47,-122) end = Coordinate(47,-123) for i in range(10): now = datetime.datetime.now() a = Trip(None, None, None, None, now, now, start, end) data.append(a) start = Coordinate(41,-74) end = Coordinate(42, -74) for i in range(10): now = datetime.datetime.now() a = Trip(None, None, None, None, now, now, start, end) data.append(a) feat = featurization.featurization(data) feat.cluster() self.assertTrue(len(set(feat.labels)) == 2)
def testCalculatePoints(self): feat = featurization.featurization([]) self.assertTrue(not feat.data) feat = featurization.featurization(None) self.assertTrue(not feat.data) trip = Trip(None, None, None, None, None, None, None, None) data = [trip] try: feat = featurization.featurization(data) except AttributeError: self.assertTrue(True) except Exception: self.assertTrue(False) feat = featurization.featurization(self.data) self.assertTrue(len(feat.points) == len(feat.data)) for p in feat.points: self.assertTrue(None not in p)
def turn_into_trip(self, _id, user_id, trip_id, is_fake=False, itinerary=0): sections = [ ] our_json = self.get_json() mode_list = set() car_dist = 0 if "plan" not in our_json: print("While querying alternatives from %s to %s" % (self.start_point, self.end_point)) print("query URL is %s" % self.make_url()) print("Response %s does not have a plan " % our_json) raise PathNotFoundException(our_json['debugOutput']) for leg in our_json["plan"]["itineraries"][itinerary]['legs']: coords = [ ] var = 'steps' if leg['mode'] == 'RAIL' or leg['mode'] == 'SUBWAY': var = 'intermediateStops' for step in leg[var]: coords.append(Coordinate(step['lat'], step['lon'])) start_time = otp_time_to_ours(leg["startTime"]) end_time = otp_time_to_ours(leg["endTime"]) distance = float(leg['distance']) start_loc = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"])) end_loc = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"])) coords.insert(0, start_loc) coords.append(end_loc) mode = leg["mode"] mode_list.add(mode) fake_id = random.random() points = [ ] for step in leg['steps']: c = Coordinate(step["lat"], step['lon']) #print c points.append(c) #print "len of points is %s" % len(points) section = Section(str(fake_id), user_id, trip_id, distance, "move", start_time, end_time, start_loc, end_loc, mode, mode, points) #section.points = coords sections.append(section) if mode == 'CAR': car_dist = distance car_start_coordinates = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"])) car_end_coordinates = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"])) print "len(sections) = %s" % len(sections) final_start_loc = Coordinate(float(our_json["plan"]["from"]["lat"]), float(our_json["plan"]["from"]["lon"])) final_end_loc = Coordinate(float(our_json["plan"]["to"]["lat"]), float(our_json["plan"]["to"]["lon"])) final_start_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["startTime"]) final_end_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["endTime"]) cost = 0 if "RAIL" in mode_list or "SUBWAY" in mode_list: try: cost = float(our_json['plan']['itineraries'][0]['fare']['fare']['regular']['cents']) / 100.0 #gives fare in cents except: cost = 0 elif "CAR" in mode_list: # TODO calculate car cost cost = 0 mode_list = list(mode_list) if is_fake: return Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc) return Alternative_Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc, 0, cost, mode_list)