def runPipeline(self): allConfirmedTripsQuery = ModeInferencePipelineMovesFormat.getSectionQueryWithGroundTruth({'$ne': ''}) self.confirmedSections = self.loadTrainingDataStep(allConfirmedTripsQuery) logging.debug("confirmedSections.count() = %s" % (self.confirmedSections.count())) logging.info("initial loadTrainingDataStep DONE") logging.debug("finished loading current training set, now loading from backup!") backupSections = safmt.AbstractCollection(edb.pm_address, "Backup_database", "Stage_Sections", None) self.backupConfirmedSections = self.loadTrainingDataStep(allConfirmedTripsQuery, backupSections) logging.info("loadTrainingDataStep DONE") (self.bus_cluster, self.train_cluster) = self.generateBusAndTrainStopStep() logging.info("generateBusAndTrainStopStep DONE") (self.featureMatrix, self.resultVector) = self.generateFeatureMatrixAndResultVectorStep() logging.info("generateFeatureMatrixAndResultVectorStep DONE") (self.cleanedFeatureMatrix, self.cleanedResultVector) = self.cleanDataStep() logging.info("cleanDataStep DONE") self.selFeatureIndices = self.selectFeatureIndicesStep() logging.info("selectFeatureIndicesStep DONE") self.selFeatureMatrix = self.cleanedFeatureMatrix[:,self.selFeatureIndices] self.model = self.buildModelStep() logging.info("buildModelStep DONE") # Serialize the model self.saveModelStep() logging.info("saveModelStep DONE")
def get_mode_share_by_count(list_idx): Sections = get_section_db() BackupSections = safmt.AbstractCollection(edb.pm_address, "Backup_database", "Stage_Sections", None) ## takes a list of idx's AllModeList = getAllModes() MODE = {} MODE2 = {} for mode in AllModeList: MODE[mode['mode_id']] = 0 for _id in list_idx: section = Sections.find_one({'_id': _id}) if section is None: section = BackupSections.find_one({'id': _id}) mode_id = section['confirmed_mode'] try: MODE[mode_id] += 1 except KeyError: MODE[mode_id] = 1 # print(sum(MODE.values())) if sum(MODE.values()) == 0: for mode in AllModeList: MODE2[mode['mode_id']] = 0 # print(MODE2) else: for mode in AllModeList: MODE2[mode['mode_id']] = MODE[mode['mode_id']] / sum(MODE.values()) return MODE2
def get_client_stats_db_backup(): return safmt.AbstractCollection(pm_address, database_name, "Stage_client_stats", None)
def get_uuid_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_uuids", None)
def get_groundClusters_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_groundCluster", None)
def get_prediction_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_Predictions", None)
def get_trip_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_Trips", None)
def get_utility_model_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_utility_models", None)
def get_mode_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_Modes", None)
def get_fake_sections_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_fake_sections", None)
def get_common_place_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_common_place", None)
def get_push_token_mapping_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_push_token_mapping", None)
def get_pipeline_state_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_pipeline_state", None)
def get_timeseries_error_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_timeseries_error", None)
def get_perturbed_trips_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_alternative_trips", None)
def mode_cluster(mode, eps, sam): Sections = get_section_db() BackupSections = safmt.AbstractCollection(edb.pm_address, "Backup_database", "Stage_Sections", None) mode_change_pnts = [] # print(tran_mat) query = {"$and": [{'type': 'move'},\ {'confirmed_mode':mode}]} # print(Sections.find(query).count()) logging.debug("Trying to find cluster locations for %s trips" % (Sections.find(query).count())) for section in Sections.find(query).sort("section_start_datetime", 1): try: mode_change_pnts.append( section['section_start_point']['coordinates']) mode_change_pnts.append( section['section_end_point']['coordinates']) except: logging.warn("Found trip %s with missing start and/or end points" % (section['_id'])) pass for section in BackupSections.find(query).sort("section_start_datetime", 1): try: mode_change_pnts.append( section['section_start_point']['coordinates']) mode_change_pnts.append( section['section_end_point']['coordinates']) except: logging.warn("Found trip %s with missing start and/or end points" % (section['_id'])) pass # print(user_change_pnts) # print(len(mode_change_pnts)) if len(mode_change_pnts) == 0: logging.debug("No points found in cluster input, nothing to fit..") return np.zeros(0) if len(mode_change_pnts) >= 1: # print(mode_change_pnts) np_points = np.array(mode_change_pnts) # print(np_points[:,0]) # fig, axes = plt.subplots(1, 1) # axes.scatter(np_points[:,0], np_points[:,1]) # plt.show() else: pass utm_x = [] utm_y = [] for row in mode_change_pnts: # GEOJSON order is lng, lat try: utm_loc = utm.from_latlon(row[1], row[0]) utm_x = np.append(utm_x, utm_loc[0]) utm_y = np.append(utm_y, utm_loc[1]) except utm.error.OutOfRangeError as oore: logging.warning( "Found OutOfRangeError while converting=%s, swapping" % row) utm_loc = utm.from_latlon(row[0], row[1]) utm_x = np.append(utm_x, utm_loc[1]) utm_y = np.append(utm_y, utm_loc[0]) utm_location = np.column_stack((utm_x, utm_y)) db = DBSCAN(eps=eps, min_samples=sam) db_fit = db.fit(utm_location) db_labels = db_fit.labels_ #print db_labels new_db_labels = db_labels[db_labels != -1] new_location = np_points[db_labels != -1] # print len(new_db_labels) # print len(new_location) # print new_information label_unique = np.unique(new_db_labels) cluster_center = np.zeros((len(label_unique), 2)) for label in label_unique: sub_location = new_location[new_db_labels == label] temp_center = np.mean(sub_location, axis=0) cluster_center[int(label)] = temp_center # print cluster_center return cluster_center
def get_habitica_db(): return safmt.AbstractCollection(pm_address, database_name, "Stage_user_habitica_access", None)
def get_test_db(): return safmt.AbstractCollection(pm_address, database_name, "Test_Trips", None)