Ejemplo n.º 1
0
  def runPipeline(self):
    allConfirmedTripsQuery = ModeInferencePipelineMovesFormat.getSectionQueryWithGroundTruth({'$ne': ''})
    self.confirmedSections = self.loadTrainingDataStep(allConfirmedTripsQuery)
    logging.debug("confirmedSections.count() = %s" % (self.confirmedSections.count()))
    logging.info("initial loadTrainingDataStep DONE")

    logging.debug("finished loading current training set, now loading from backup!")
    backupSections = safmt.AbstractCollection(edb.pm_address, "Backup_database", "Stage_Sections", None)
    self.backupConfirmedSections = self.loadTrainingDataStep(allConfirmedTripsQuery, backupSections)
    logging.info("loadTrainingDataStep DONE")

    (self.bus_cluster, self.train_cluster) = self.generateBusAndTrainStopStep() 
    logging.info("generateBusAndTrainStopStep DONE")
    (self.featureMatrix, self.resultVector) = self.generateFeatureMatrixAndResultVectorStep()
    logging.info("generateFeatureMatrixAndResultVectorStep DONE")
    (self.cleanedFeatureMatrix, self.cleanedResultVector) = self.cleanDataStep()
    logging.info("cleanDataStep DONE")
    self.selFeatureIndices = self.selectFeatureIndicesStep()
    logging.info("selectFeatureIndicesStep DONE")
    self.selFeatureMatrix = self.cleanedFeatureMatrix[:,self.selFeatureIndices]
    self.model = self.buildModelStep()
    logging.info("buildModelStep DONE")
    # Serialize the model
    self.saveModelStep()
    logging.info("saveModelStep DONE")
def get_mode_share_by_count(list_idx):
    Sections = get_section_db()
    BackupSections = safmt.AbstractCollection(edb.pm_address,
                                              "Backup_database",
                                              "Stage_Sections", None)
    ## takes a list of idx's
    AllModeList = getAllModes()

    MODE = {}
    MODE2 = {}
    for mode in AllModeList:
        MODE[mode['mode_id']] = 0
    for _id in list_idx:
        section = Sections.find_one({'_id': _id})
        if section is None:
            section = BackupSections.find_one({'id': _id})
        mode_id = section['confirmed_mode']
        try:
            MODE[mode_id] += 1
        except KeyError:
            MODE[mode_id] = 1
    # print(sum(MODE.values()))
    if sum(MODE.values()) == 0:
        for mode in AllModeList:
            MODE2[mode['mode_id']] = 0
        # print(MODE2)
    else:
        for mode in AllModeList:
            MODE2[mode['mode_id']] = MODE[mode['mode_id']] / sum(MODE.values())
    return MODE2
def get_client_stats_db_backup():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_client_stats", None)
def get_uuid_db():
    return safmt.AbstractCollection(pm_address, database_name, "Stage_uuids",
                                    None)
def get_groundClusters_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_groundCluster", None)
def get_prediction_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_Predictions", None)
def get_trip_db():
    return safmt.AbstractCollection(pm_address, database_name, "Stage_Trips",
                                    None)
def get_utility_model_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_utility_models", None)
def get_mode_db():
    return safmt.AbstractCollection(pm_address, database_name, "Stage_Modes",
                                    None)
def get_fake_sections_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_fake_sections", None)
def get_common_place_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_common_place", None)
def get_push_token_mapping_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_push_token_mapping", None)
def get_pipeline_state_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_pipeline_state", None)
def get_timeseries_error_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_timeseries_error", None)
def get_perturbed_trips_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_alternative_trips", None)
def mode_cluster(mode, eps, sam):
    Sections = get_section_db()
    BackupSections = safmt.AbstractCollection(edb.pm_address,
                                              "Backup_database",
                                              "Stage_Sections", None)
    mode_change_pnts = []
    # print(tran_mat)
    query = {"$and": [{'type': 'move'},\
                      {'confirmed_mode':mode}]}
    # print(Sections.find(query).count())
    logging.debug("Trying to find cluster locations for %s trips" %
                  (Sections.find(query).count()))
    for section in Sections.find(query).sort("section_start_datetime", 1):
        try:
            mode_change_pnts.append(
                section['section_start_point']['coordinates'])
            mode_change_pnts.append(
                section['section_end_point']['coordinates'])
        except:
            logging.warn("Found trip %s with missing start and/or end points" %
                         (section['_id']))
            pass

    for section in BackupSections.find(query).sort("section_start_datetime",
                                                   1):
        try:
            mode_change_pnts.append(
                section['section_start_point']['coordinates'])
            mode_change_pnts.append(
                section['section_end_point']['coordinates'])
        except:
            logging.warn("Found trip %s with missing start and/or end points" %
                         (section['_id']))
            pass
    # print(user_change_pnts)
    # print(len(mode_change_pnts))
    if len(mode_change_pnts) == 0:
        logging.debug("No points found in cluster input, nothing to fit..")
        return np.zeros(0)

    if len(mode_change_pnts) >= 1:
        # print(mode_change_pnts)
        np_points = np.array(mode_change_pnts)
        # print(np_points[:,0])
        # fig, axes = plt.subplots(1, 1)
        # axes.scatter(np_points[:,0], np_points[:,1])
        # plt.show()
    else:
        pass
    utm_x = []
    utm_y = []
    for row in mode_change_pnts:
        # GEOJSON order is lng, lat
        try:
            utm_loc = utm.from_latlon(row[1], row[0])
            utm_x = np.append(utm_x, utm_loc[0])
            utm_y = np.append(utm_y, utm_loc[1])
        except utm.error.OutOfRangeError as oore:
            logging.warning(
                "Found OutOfRangeError while converting=%s, swapping" % row)
            utm_loc = utm.from_latlon(row[0], row[1])
            utm_x = np.append(utm_x, utm_loc[1])
            utm_y = np.append(utm_y, utm_loc[0])

    utm_location = np.column_stack((utm_x, utm_y))
    db = DBSCAN(eps=eps, min_samples=sam)
    db_fit = db.fit(utm_location)
    db_labels = db_fit.labels_
    #print db_labels
    new_db_labels = db_labels[db_labels != -1]
    new_location = np_points[db_labels != -1]
    # print len(new_db_labels)
    # print len(new_location)
    # print new_information

    label_unique = np.unique(new_db_labels)
    cluster_center = np.zeros((len(label_unique), 2))
    for label in label_unique:
        sub_location = new_location[new_db_labels == label]
        temp_center = np.mean(sub_location, axis=0)
        cluster_center[int(label)] = temp_center
    # print cluster_center
    return cluster_center
def get_habitica_db():
    return safmt.AbstractCollection(pm_address, database_name,
                                    "Stage_user_habitica_access", None)
def get_test_db():
    return safmt.AbstractCollection(pm_address, database_name, "Test_Trips",
                                    None)