def plot_instances_for_gps_error_model(): smoothing_ground_truth_map = json.load(open("/Users/shankari/cluster_ground_truth/smoothing/caltrain/smoothing_removed_points")) needsSmoothing = [] fineWithoutSmoothing = [] for (sid, rp_list) in smoothing_ground_truth_map.items(): sectionJSON = get_section_db().find_one({"_id": sid}) if sectionJSON is None: print("Unable to find section %s in the database" % sid) else: if len(rp_list) > 0: needsSmoothing.append(sectionJSON) else: fineWithoutSmoothing.append(sectionJSON) print("-" * 20, "Needs smoothing", '-' * 20) for section in needsSmoothing: if section is not None: print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99])) print("-" * 20, "Fine without smoothing", '-' * 20) for section in fineWithoutSmoothing: if section is not None: print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99])) other_manual_candidates = find_other_sections_manual(needsSmoothing, fineWithoutSmoothing) other_auto_candidates = find_other_sections_auto(needsSmoothing, fineWithoutSmoothing) print(other_auto_candidates) gsfgtc.generate_cluster_comparison(other_manual_candidates, "/tmp/other_manual") gsfgtc.generate_cluster_comparison(other_auto_candidates, "/tmp/other_auto")
def find_other_sections_manual(needsSmoothing, findWithoutSmoothing): section_list = [] maxSpeed_list = [] for section in edb.get_section_db().find(query): avg_speed = fc.calAvgSpeed(section) if len(maxSpeed_list) == 0 or fc.calAvgSpeed(section) > max(maxSpeed_list): maxSpeed_list.append(avg_speed) section_list.append(section) return section_list
def find_other_sections_manual(needsSmoothing, findWithoutSmoothing): section_list = [] maxSpeed_list = [] for section in edb.get_section_db().find(query): avg_speed = fc.calAvgSpeed(section) if len(maxSpeed_list ) == 0 or fc.calAvgSpeed(section) > max(maxSpeed_list): maxSpeed_list.append(avg_speed) section_list.append(section) return section_list
def plot_instances_for_gps_error_model(): smoothing_ground_truth_map = json.load( open( "/Users/shankari/cluster_ground_truth/smoothing/caltrain/smoothing_removed_points" )) needsSmoothing = [] fineWithoutSmoothing = [] for (sid, rp_list) in smoothing_ground_truth_map.items(): sectionJSON = get_section_db().find_one({"_id": sid}) if sectionJSON is None: print("Unable to find section %s in the database" % sid) else: if len(rp_list) > 0: needsSmoothing.append(sectionJSON) else: fineWithoutSmoothing.append(sectionJSON) print("-" * 20, "Needs smoothing", '-' * 20) for section in needsSmoothing: if section is not None: print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99])) print("-" * 20, "Fine without smoothing", '-' * 20) for section in fineWithoutSmoothing: if section is not None: print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99])) other_manual_candidates = find_other_sections_manual( needsSmoothing, fineWithoutSmoothing) other_auto_candidates = find_other_sections_auto(needsSmoothing, fineWithoutSmoothing) print(other_auto_candidates) gsfgtc.generate_cluster_comparison(other_manual_candidates, "/tmp/other_manual") gsfgtc.generate_cluster_comparison(other_auto_candidates, "/tmp/other_auto")
def get_feature_row(section): ret_arr = np.zeros((5)) ret_arr[0] = fc.calAvgSpeed(section) ret_arr[1] = fc.getIthMaxSpeed(section, 1) percentiles = np.percentile(fc.calSpeeds(section), [90, 95, 99]) ret_arr[2] = percentiles[0] ret_arr[3] = percentiles[1] ret_arr[4] = percentiles[2] return ret_arr
def updateFeatureMatrixRowWithSection(self, featureMatrix, i, section): featureMatrix[i, 0] = section['distance'] featureMatrix[i, 1] = (section['section_end_datetime'] - section['section_start_datetime']).total_seconds() # Deal with unknown modes like "airplane" try: featureMatrix[i, 2] = section['mode'] except ValueError: featureMatrix[i, 2] = 0 featureMatrix[i, 3] = section['section_id'] featureMatrix[i, 4] = easf.calAvgSpeed(section) speeds = easf.calSpeeds(section) if speeds != None and len(speeds) > 0: featureMatrix[i, 5] = np.mean(speeds) featureMatrix[i, 6] = np.std(speeds) featureMatrix[i, 7] = np.max(speeds) else: # They will remain zero pass accels = easf.calAccels(section) if accels != None and len(accels) > 0: featureMatrix[i, 8] = np.max(accels) else: # They will remain zero pass featureMatrix[i, 9] = ('commute' in section) and (section['commute'] == 'to' or section['commute'] == 'from') featureMatrix[i, 10] = easf.calHCR(section) featureMatrix[i, 11] = easf.calSR(section) featureMatrix[i, 12] = easf.calVCR(section) if 'section_start_point' in section and section['section_start_point'] != None: startCoords = section['section_start_point']['coordinates'] featureMatrix[i, 13] = startCoords[0] featureMatrix[i, 14] = startCoords[1] if 'section_end_point' in section and section['section_end_point'] != None: endCoords = section['section_end_point']['coordinates'] featureMatrix[i, 15] = endCoords[0] featureMatrix[i, 16] = endCoords[1] featureMatrix[i, 17] = section['section_start_datetime'].time().hour featureMatrix[i, 18] = section['section_end_datetime'].time().hour if (hasattr(self, "bus_cluster")): featureMatrix[i, 19] = easf.mode_start_end_coverage(section, self.bus_cluster,105) if (hasattr(self, "train_cluster")): featureMatrix[i, 20] = easf.mode_start_end_coverage(section, self.train_cluster,600) if (hasattr(self, "air_cluster")): featureMatrix[i, 21] = easf.mode_start_end_coverage(section, self.air_cluster,600) # Replace NaN and inf by zeros so that it doesn't crash later featureMatrix[i] = np.nan_to_num(featureMatrix[i])