def plot_instances_for_gps_error_model():
    smoothing_ground_truth_map = json.load(open("/Users/shankari/cluster_ground_truth/smoothing/caltrain/smoothing_removed_points"))
    needsSmoothing = []
    fineWithoutSmoothing = []

    for (sid, rp_list) in smoothing_ground_truth_map.items():
        sectionJSON = get_section_db().find_one({"_id": sid})
        if sectionJSON is None:
            print("Unable to find section %s in the database" % sid)
        else:
            if len(rp_list) > 0:
                needsSmoothing.append(sectionJSON)
            else:
                fineWithoutSmoothing.append(sectionJSON)

    print("-" * 20, "Needs smoothing", '-' * 20)

    for section in needsSmoothing:
        if section is not None:
            print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99]))

    print("-" * 20, "Fine without smoothing", '-' * 20)

    for section in fineWithoutSmoothing:
        if section is not None:
            print(section["_id"], fc.calAvgSpeed(section), fc.getIthMaxSpeed(section, 1), np.percentile(fc.calSpeeds(section), [90, 95, 99]))

    other_manual_candidates = find_other_sections_manual(needsSmoothing, fineWithoutSmoothing)
    other_auto_candidates = find_other_sections_auto(needsSmoothing, fineWithoutSmoothing)

    print(other_auto_candidates)

    gsfgtc.generate_cluster_comparison(other_manual_candidates, "/tmp/other_manual")
    gsfgtc.generate_cluster_comparison(other_auto_candidates, "/tmp/other_auto")
def find_other_sections_manual(needsSmoothing, findWithoutSmoothing):
    section_list = []
    maxSpeed_list = []

    for section in edb.get_section_db().find(query):
        avg_speed = fc.calAvgSpeed(section)
        if len(maxSpeed_list) == 0 or fc.calAvgSpeed(section) > max(maxSpeed_list):
            maxSpeed_list.append(avg_speed)
            section_list.append(section)

    return section_list
Beispiel #3
0
def find_other_sections_manual(needsSmoothing, findWithoutSmoothing):
    section_list = []
    maxSpeed_list = []

    for section in edb.get_section_db().find(query):
        avg_speed = fc.calAvgSpeed(section)
        if len(maxSpeed_list
               ) == 0 or fc.calAvgSpeed(section) > max(maxSpeed_list):
            maxSpeed_list.append(avg_speed)
            section_list.append(section)

    return section_list
Beispiel #4
0
def plot_instances_for_gps_error_model():
    smoothing_ground_truth_map = json.load(
        open(
            "/Users/shankari/cluster_ground_truth/smoothing/caltrain/smoothing_removed_points"
        ))
    needsSmoothing = []
    fineWithoutSmoothing = []

    for (sid, rp_list) in smoothing_ground_truth_map.items():
        sectionJSON = get_section_db().find_one({"_id": sid})
        if sectionJSON is None:
            print("Unable to find section %s in the database" % sid)
        else:
            if len(rp_list) > 0:
                needsSmoothing.append(sectionJSON)
            else:
                fineWithoutSmoothing.append(sectionJSON)

    print("-" * 20, "Needs smoothing", '-' * 20)

    for section in needsSmoothing:
        if section is not None:
            print(section["_id"], fc.calAvgSpeed(section),
                  fc.getIthMaxSpeed(section, 1),
                  np.percentile(fc.calSpeeds(section), [90, 95, 99]))

    print("-" * 20, "Fine without smoothing", '-' * 20)

    for section in fineWithoutSmoothing:
        if section is not None:
            print(section["_id"], fc.calAvgSpeed(section),
                  fc.getIthMaxSpeed(section, 1),
                  np.percentile(fc.calSpeeds(section), [90, 95, 99]))

    other_manual_candidates = find_other_sections_manual(
        needsSmoothing, fineWithoutSmoothing)
    other_auto_candidates = find_other_sections_auto(needsSmoothing,
                                                     fineWithoutSmoothing)

    print(other_auto_candidates)

    gsfgtc.generate_cluster_comparison(other_manual_candidates,
                                       "/tmp/other_manual")
    gsfgtc.generate_cluster_comparison(other_auto_candidates,
                                       "/tmp/other_auto")
def get_feature_row(section):
    ret_arr = np.zeros((5))
    ret_arr[0] = fc.calAvgSpeed(section)
    ret_arr[1] = fc.getIthMaxSpeed(section, 1)
    percentiles = np.percentile(fc.calSpeeds(section), [90, 95, 99])
    ret_arr[2] = percentiles[0]
    ret_arr[3] = percentiles[1]
    ret_arr[4] = percentiles[2]
    return ret_arr
Beispiel #6
0
def get_feature_row(section):
    ret_arr = np.zeros((5))
    ret_arr[0] = fc.calAvgSpeed(section)
    ret_arr[1] = fc.getIthMaxSpeed(section, 1)
    percentiles = np.percentile(fc.calSpeeds(section), [90, 95, 99])
    ret_arr[2] = percentiles[0]
    ret_arr[3] = percentiles[1]
    ret_arr[4] = percentiles[2]
    return ret_arr
Beispiel #7
0
  def updateFeatureMatrixRowWithSection(self, featureMatrix, i, section):
    featureMatrix[i, 0] = section['distance']
    featureMatrix[i, 1] = (section['section_end_datetime'] - section['section_start_datetime']).total_seconds()

    # Deal with unknown modes like "airplane"
    try:
      featureMatrix[i, 2] = section['mode']
    except ValueError:
      featureMatrix[i, 2] = 0

    featureMatrix[i, 3] = section['section_id']
    featureMatrix[i, 4] = easf.calAvgSpeed(section)
    speeds = easf.calSpeeds(section)
    if speeds != None and len(speeds) > 0:
        featureMatrix[i, 5] = np.mean(speeds)
        featureMatrix[i, 6] = np.std(speeds)
        featureMatrix[i, 7] = np.max(speeds)
    else:
        # They will remain zero
        pass
    accels = easf.calAccels(section)
    if accels != None and len(accels) > 0:
        featureMatrix[i, 8] = np.max(accels)
    else:
        # They will remain zero
        pass
    featureMatrix[i, 9] = ('commute' in section) and (section['commute'] == 'to' or section['commute'] == 'from')
    featureMatrix[i, 10] = easf.calHCR(section)
    featureMatrix[i, 11] = easf.calSR(section)
    featureMatrix[i, 12] = easf.calVCR(section)
    if 'section_start_point' in section and section['section_start_point'] != None:
        startCoords = section['section_start_point']['coordinates']
        featureMatrix[i, 13] = startCoords[0]
        featureMatrix[i, 14] = startCoords[1]
    
    if 'section_end_point' in section and section['section_end_point'] != None:
        endCoords = section['section_end_point']['coordinates']
        featureMatrix[i, 15] = endCoords[0]
        featureMatrix[i, 16] = endCoords[1]
    
    featureMatrix[i, 17] = section['section_start_datetime'].time().hour
    featureMatrix[i, 18] = section['section_end_datetime'].time().hour
   
    if (hasattr(self, "bus_cluster")): 
        featureMatrix[i, 19] = easf.mode_start_end_coverage(section, self.bus_cluster,105)
    if (hasattr(self, "train_cluster")): 
        featureMatrix[i, 20] = easf.mode_start_end_coverage(section, self.train_cluster,600)
    if (hasattr(self, "air_cluster")): 
        featureMatrix[i, 21] = easf.mode_start_end_coverage(section, self.air_cluster,600)

    # Replace NaN and inf by zeros so that it doesn't crash later
    featureMatrix[i] = np.nan_to_num(featureMatrix[i])
Beispiel #8
0
  def updateFeatureMatrixRowWithSection(self, featureMatrix, i, section):
    featureMatrix[i, 0] = section['distance']
    featureMatrix[i, 1] = (section['section_end_datetime'] - section['section_start_datetime']).total_seconds()

    # Deal with unknown modes like "airplane"
    try:
      featureMatrix[i, 2] = section['mode']
    except ValueError:
      featureMatrix[i, 2] = 0

    featureMatrix[i, 3] = section['section_id']
    featureMatrix[i, 4] = easf.calAvgSpeed(section)
    speeds = easf.calSpeeds(section)
    if speeds != None and len(speeds) > 0:
        featureMatrix[i, 5] = np.mean(speeds)
        featureMatrix[i, 6] = np.std(speeds)
        featureMatrix[i, 7] = np.max(speeds)
    else:
        # They will remain zero
        pass
    accels = easf.calAccels(section)
    if accels != None and len(accels) > 0:
        featureMatrix[i, 8] = np.max(accels)
    else:
        # They will remain zero
        pass
    featureMatrix[i, 9] = ('commute' in section) and (section['commute'] == 'to' or section['commute'] == 'from')
    featureMatrix[i, 10] = easf.calHCR(section)
    featureMatrix[i, 11] = easf.calSR(section)
    featureMatrix[i, 12] = easf.calVCR(section)
    if 'section_start_point' in section and section['section_start_point'] != None:
        startCoords = section['section_start_point']['coordinates']
        featureMatrix[i, 13] = startCoords[0]
        featureMatrix[i, 14] = startCoords[1]
    
    if 'section_end_point' in section and section['section_end_point'] != None:
        endCoords = section['section_end_point']['coordinates']
        featureMatrix[i, 15] = endCoords[0]
        featureMatrix[i, 16] = endCoords[1]
    
    featureMatrix[i, 17] = section['section_start_datetime'].time().hour
    featureMatrix[i, 18] = section['section_end_datetime'].time().hour
   
    if (hasattr(self, "bus_cluster")): 
        featureMatrix[i, 19] = easf.mode_start_end_coverage(section, self.bus_cluster,105)
    if (hasattr(self, "train_cluster")): 
        featureMatrix[i, 20] = easf.mode_start_end_coverage(section, self.train_cluster,600)
    if (hasattr(self, "air_cluster")): 
        featureMatrix[i, 21] = easf.mode_start_end_coverage(section, self.air_cluster,600)

    # Replace NaN and inf by zeros so that it doesn't crash later
    featureMatrix[i] = np.nan_to_num(featureMatrix[i])