def deletePatterns(req: deletePatterns_payload): cf.logmessage("deletePatterns api call") space_id = int(os.environ.get('SPACE_ID', 1)) patternsSQL = cf.quoteNcomma(req.patterns) d1 = f"delete from pattern_stops where space_id={space_id} and pattern_id in ({patternsSQL})" d2 = f"delete from patterns where space_id={space_id} and id in ({patternsSQL})" d1Count = dbconnect.execSQL(d1) d2Count = dbconnect.execSQL(d2) # also trips and timings under the pattern d3 = f"delete from stop_times where space_id={space_id} and trip_id in (select id from trips where pattern_id in ({patternsSQL}) )" d4 = f"delete from trips where where space_id={space_id} and pattern_id in ({patternsSQL})" d3Count = dbconnect.execSQL(d3) d4Count = dbconnect.execSQL(d4) returnD = { 'message': "success", "pattern_stops_deleted": d1Count, "patterns_deleted": d2Count, "trips_deleted": d4Count, "stop_times_deleted": d4Count } cf.logmessage(returnD) return returnD
def addPattern(req: addPattern_payload): cf.logmessage("addPattern api call") space_id = int(os.environ.get('SPACE_ID', 1)) # check if already existing s1 = f"""select name from patterns where space_id = {space_id} and route_id='{req.route_id}' order by sequence""" existingPatterns = dbconnect.makeQuery(s1, output='column') if req.name in existingPatterns: raise HTTPException(status_code=400, detail="Pattern already exists") returnD = {"message": "success"} pid1 = cf.makeUID() i1 = f"""insert into patterns (space_id, id, route_id, name, sequence, created_on) values ({space_id}, '{pid1}', '{req.route_id}', '{req.name}', {len(existingPatterns)+1} , CURRENT_TIMESTAMP) """ pCount = dbconnect.execSQL(i1) if not pCount: raise HTTPException(status_code=400, detail="Error could not create Pattern") returnD['id'] = pid1 return returnD
def OLD_addTable(df, tablename, lowerCaseColumns=False): ps_connection = threaded_postgreSQL_pool.getconn() table_cols = getColumnsList(tablename,ps_connection) if lowerCaseColumns: df.columns = [x.lower() for x in df.columns] # make lowercase sending_cols = [x for x in table_cols if x in df.columns] discarded_cols = set(df.columns.to_list()) - set(table_cols) if len(discarded_cols): cf.logmessage("Dropping {} cols from uploaded data as they're not in the DB: {}".format(len(discarded_cols),', '.join(discarded_cols))) # ensure only those values go to DB table that have columns there # cf.logmessage("Adding {} rows into {} with these columns: {}".format(len(df), tablename, sending_cols)) CHUNKSIZE = int(os.environ.get('CHUNKSIZE',1)) try: df[sending_cols].to_sql(name=tablename, con=ps_connection, chunksize=CHUNKSIZE, if_exists='append', index=False ) threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool except IntegrityError as e: cf.logmessage(e) threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool return False except: threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool raise return False return True
def addTable(df, table): """ From https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/ Using psycopg2.extras.execute_values() to insert the dataframe """ # Create a list of tupples from the dataframe values tuples = [tuple(x) for x in df.to_numpy()] # Comma-separated dataframe columns cols = ','.join(list(df.columns)) # SQL query to execute query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols) ps_connection = threaded_postgreSQL_pool.getconn() cursor = ps_connection.cursor() cf.logmessage(f"Adding {len(df)} rows to {table}") try: extras.execute_values(cursor, query, tuples) ps_connection.commit() except (Exception, psycopg2.DatabaseError) as error: cf.logmessage("Error: %s" % error) ps_connection.rollback() cursor.close() threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool return False cursor.close() threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool return True
def loadRoutesList(req: loadRoutesList_payload): cf.logmessage("loadRoutes api call") space_id = int(os.environ.get('SPACE_ID',1)) s1 = f"""select id, name, depot, description from routes where space_id = {space_id} order by depot, name""" df = dbconnect.makeQuery(s1, output='df',keepCols=True) df.rename(columns={'name':'text'}, inplace=True) returnD = { 'message': "success"} # TO DO: group by depots, route groups etc in this format: # https://select2.org/data-sources/formats#grouped-data returnD['routes'] = [] for depot in df['depot'].unique(): row = {} if not len(depot): row['text'] = "MISC" else: row['text'] = depot df2 = df[df['depot']==depot] row['children'] = df2.to_dict(orient='records') returnD['routes'].append(row) # returnD['routes'] = df.to_dict(orient='records') returnD['depots'] = df['depot'].unique().tolist() return returnD
def loadStops(req: loadStops_payload): cf.logmessage("loadStops api call") if len(req.data): cols = ','.join(req.data) else: cols = ','.join([ 'id', 'name', 'description', 'latitude', 'longitude', 'stop_group_id', 'created_on', 'created_by', 'last_updated', 'modified_by' ]) space_id = int(os.environ.get('SPACE_ID', 1)) s1 = f"select {cols} from stops_master where space_id = {space_id}" df = dbconnect.makeQuery(s1, output='df', fillna=False) returnD = {'message': "success"} if len(df): returnD['stops'] = df.to_dict(orient='records') if req.indexed: returnD['indexed'] = df.set_index( 'id', drop=False).to_dict(orient='index') else: returnD['stops'] = [] if req.indexed: returnD['indexed'] = {} time.sleep(5) return returnD
def execSQL(s1): cf.logmessage(' '.join(s1.split())) ps_connection = threaded_postgreSQL_pool.getconn() ps_cursor = ps_connection.cursor() ps_cursor.execute(s1) ps_connection.commit() affected = ps_cursor.rowcount ps_cursor.close() threaded_postgreSQL_pool.putconn(ps_connection) return affected
def updateStops(req: updateStops_payload): """ Update stops """ cf.logmessage("updateStops api call") # convert request body to json array, from https://stackoverflow.com/a/60845064/4355695 requestArr = [t.__dict__ for t in req.data] print(requestArr) df1 = pd.DataFrame(requestArr) df1['space_id'] = int(os.environ.get('SPACE_ID', 1)) timestamp = cf.getTime() df1['last_updated'] = timestamp df1['modified_by'] = '' # will bring in username later # to do : validation etc updated = [] not_updated = [] for row in df1.to_dict(orient='records'): if not row.get('stop_id'): cf.logmessage("No stop_id:", row) continue uterms = [] if row.get('name'): uterms.append(f"name='{row['name']}'") uterms.append(f"zap='{cf.zapper(row['name'])}'") if row.get('latitude'): uterms.append(f"latitude={row['latitude']}") if row.get('longitude'): uterms.append(f"longitude={row['longitude']}") if row.get('description'): uterms.append(f"description='{row['description']}'") if row.get('group_id'): uterms.append(f"group_id='{row['group_id']}'") u1 = f"""update stops_master set {', '.join(uterms)} where id='{row['stop_id']}' """ uCount = dbconnect.execSQL(u1) if not uCount: not_updated.append(row) else: updated.append(row) returnD = { 'message': "success", "num_updated": 0, "num_not_updated": 0, "updated": [], "not_updated": [] } if len(updated): returnD['num_updated'] = len(updated) returnD['updated'] = [x['name'] for x in updated] if len(not_updated): returnD['num_not_updated'] = len(not_updated) returnD['not_updated'] = [x['name'] for x in not_updated] return returnD
def loadconfig(req: loadConfig_payload): cf.logmessage("loadConfig api call") space_id = int(os.environ.get('SPACE_ID', 1)) s1 = f"""select config_key, config_value from config where space_id = {space_id}""" df = dbconnect.makeQuery(s1, output='df') returnD = {'message': "success"} if len(df): returnD['config'] = df.to_dict(orient='records') else: returnD['config'] = [] return returnD
def loadpatterns(req: loadpatterns_payload): cf.logmessage("loadpatterns api call") route_id = req.route_id space_id = int(os.environ.get('SPACE_ID', 1)) s1 = f"""select * from patterns where space_id = {space_id} and route_id = '{route_id}'""" df = dbconnect.makeQuery(s1, output='df') returnD = {'message': "success"} if len(df): returnD['patterns'] = df.to_dict(orient='records') else: returnD['patterns'] = [] return returnD
def saveConfig(req: saveConfig_payload): cf.logmessage("saveConfig api call") space_id = int(os.environ.get('SPACE_ID', 1)) # convert request body to json array, from https://stackoverflow.com/a/60845064/4355695 requestArr = [t.__dict__ for t in req.data] # print(requestArr) dfnew = pd.DataFrame(requestArr) if not len(dfnew): raise HTTPException(status_code=400, detail="Nothing to save") # fetch existing configs s1 = f"""select config_key, config_value, id from config where space_id = {space_id}""" dfold = dbconnect.makeQuery(s1, output='df', fillna=True, keepCols=True).set_index('config_key') oldConfigs = dfold.to_dict(orient='index') print(oldConfigs) print(dfnew) returnD = {'message': "success", "updates": 0, "new": 0} for N, row in dfnew.iterrows(): if row['key'] in oldConfigs.keys(): if row['value'] != oldConfigs[row['key']]['config_value']: u1 = f"""update config set config_value = '{row['value']}', last_updated = CURRENT_TIMESTAMP where id = '{oldConfigs[row['key']]['id']}' """ u1Count = dbconnect.execSQL(u1) returnD['updates'] += u1Count else: cf.logmessage( f"{row['key']} has same value {row['value']} so not replacing." ) else: newId = cf.makeUID() i1 = f"""insert into config (id, space_id, config_key, config_value, created_on, last_updated) values ('{newId}', {space_id}, '{row['key']}','{row['value']}', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) """ i1Count = dbconnect.execSQL(i1) returnD['new'] += i1Count return returnD
def loadPattern(req: loadPattern_payload): cf.logmessage("loadPattern api call") space_id = int(os.environ.get('SPACE_ID', 1)) returnD = {"message": "success"} # note: currently, frontend isn't using the lat-longs here, can make it use them later. s1 = f"""select t1.*, t2.name, t2.latitude, t2.longitude from pattern_stops as t1 left join stops_master as t2 on t1.stop_id = t2.id where t1.space_id = {space_id} and t1.pattern_id = '{req.pattern_id}' order by t1.stop_sequence """ df1 = dbconnect.makeQuery(s1, output='df', keepCols=True) returnD['pattern_id'] = req.pattern_id returnD['pattern_stops'] = df1.to_dict(orient='records') return returnD
def updatePatternsOrder(req: updatePatternsOrder_payload): cf.logmessage("updatePatternsOrder api call") returnD = {'message': "success"} # 2 sets of updates to avoid breaking the route + sequence constraint. # First, set the new sequences as negative numbers, which we know for sure aren't being taken up. for N, pid in enumerate(req.sequence): newSeq = -(N + 1) u1 = f"update patterns set sequence={newSeq} where id='{pid}'" uCount = dbconnect.execSQL(u1) # After, populate actual new sequence numbers. All positive nums were cleared so are available. for N, pid in enumerate(req.sequence): newSeq = (N + 1) u2 = f"update patterns set sequence={newSeq} where id='{pid}'" uCount = dbconnect.execSQL(u2) return returnD
async def deleteTrip(req: deleteTrip_payload): cf.logmessage("deleteTrip api call") space_id = int(os.environ.get('SPACE_ID', 1)) pattern_id = req.pattern_id trip_id = req.trip_id # check if its there in trips table and stop_times table s1 = f"""select count(*) from trips where space_id = {space_id} and pattern_id = '{pattern_id}' and id = '{trip_id}' """ c1 = dbconnect.makeQuery(s1, output='oneValue') s2 = f"""select count(*) from stop_times where space_id = {space_id} and trip_id = '{trip_id}' """ c2 = dbconnect.makeQuery(s2, output='oneValue') returnD = { "message": "success", "trips_deleted": 0, "stop_times_deleted": 0 } if c1: d1 = f"""delete from trips where space_id = {space_id} and pattern_id = '{pattern_id}' and id = '{trip_id}' """ d1Count = dbconnect.execSQL(d1) returnD['trips_deleted'] = d1Count if c2: d2 = f"""delete from stop_times where space_id = {space_id} and trip_id = '{trip_id}' """ d2Count = dbconnect.execSQL(d2) returnD['stop_times_deleted'] = d2Count return returnD
def diagnoseStops(req: diagnoseStops_payload): ''' Diagnose stops for deleting Fetch each stop's patterns, routes ''' cf.logmessage("diagnoseStops api call") idsListSQL = cf.quoteNcomma(req.idsList) s1 = f""" select t1.stop_id, t1.pattern_id, t2.route_id, t2.name as pattern_name, t3.depot, t3.name, t3.description from pattern_stops as t1 left join patterns as t2 on t1.pattern_id = t2.id left join routes as t3 on t2.route_id = t3.id where t1.stop_id in ({idsListSQL}) """ df1 = dbconnect.makeQuery(s1, output='df', fillna=False) returnD = {"message": "success"} if not len(df1): returnD['patternCount'] = 0 return returnD # returnD['stops'] = df1.to_dict(orient='records') # print(df1) # print(df1['route_id']) # return returnD # print("uniques:",df1['route_id'].unique()) returnD['patterns'] = [ x for x in df1['pattern_id'].unique() if x is not None ] returnD['patternCount'] = len(returnD['patterns']) returnD['routes'] = [x for x in df1['route_id'].unique() if x is not None] returnD['routeCount'] = len(returnD['routes']) return returnD
def addRoute(req: addRoute_payload): cf.logmessage("addRoute api call") # turn request body to dict reqD = req.__dict__ cf.logmessage(reqD) # to do: validation if reqD.get('description'): descriptionHolder = f"'{reqD['description']}'" else: descriptionHolder = "NULL" if reqD.get('depot'): depotHolder = f"'{reqD['depot']}'" else: depotHolder = "NULL" space_id = int(os.environ.get('SPACE_ID',1)) returnD = { "message": "success" } if not reqD.get('route_id'): # create new route flow route_id = cf.makeUID() i1 = f"""insert into routes (space_id, id, name, created_on, description, depot ) values ({space_id}, '{route_id}', '{reqD['name']}', CURRENT_TIMESTAMP, {descriptionHolder}, {depotHolder} ) """ iCount = dbconnect.execSQL(i1) if not iCount: raise HTTPException(status_code=400, detail="Could not create route") returnD["id"] = route_id # also create basic 2 patterns for the route: UP and DOWN pid1 = cf.makeUID() pid2 = cf.makeUID() i2 = f"""insert into patterns (space_id, id, route_id, name, sequence, created_on) values ({space_id}, '{pid1}', '{route_id}', 'UP', 1, CURRENT_TIMESTAMP), ({space_id}, '{pid2}', '{route_id}', 'DOWN', 2, CURRENT_TIMESTAMP) """ iCount2 = dbconnect.execSQL(i2) if not iCount2: cf.logmessage("Warning: Could not create patterns") else: returnD["patterns"] = [pid1, pid2] else: # update route u1 = f"""update routes set name = '{reqD['name']}', depot = {depotHolder}, description = {descriptionHolder}, last_updated = CURRENT_TIMESTAMP where id='{reqD['route_id']}' """ uCount = dbconnect.execSQL(u1) if not uCount: raise HTTPException(status_code=400, detail="Could not create route") returnD["updated"] = uCount return returnD
def loadRouteDetails(req: loadRouteDetails_payload): cf.logmessage("loadRouteDetails api call") route_id = req.route_id returnD = { "message": "success"} space_id = int(os.environ.get('SPACE_ID',1)) s1 = f"""select * from routes where space_id = {space_id} and id='{route_id}'""" returnD['route'] = dbconnect.makeQuery(s1, output='oneJson') if not returnD['route'].get('name') : raise HTTPException(status_code=400, detail="Could not find route for given id") s2 = f"""select * from patterns where space_id = {space_id} and route_id='{route_id}' order by sequence""" returnD['patterns'] = dbconnect.makeQuery(s2, output='list') if not len(returnD['patterns']): return returnD # removing this.. loading each pattern individually instead in a separate api call # pattern_idsList = [x['id'] for x in returnD['patterns']] # pattern_idsListSQL = cf.quoteNcomma(pattern_idsList) # s3 = f"""select t1.*, t2.name, t2.latitude, t2.longitude # from pattern_stops as t1 # left join stops_master as t2 # on t1.stop_id = t2.id # where t1.pattern_id in ({pattern_idsListSQL}) # order by t1.pattern_id, t1.stop_sequence # """ # df1 = dbconnect.makeQuery(s3, output='df', keepCols=True) # # group them # returnD['pattern_stops'] = {} # for pid in pattern_idsList: # df2 = df1[df1['pattern_id'] == pid] # returnD['pattern_stops'][pid] = df2.to_dict(orient='records') return returnD
async def deleteTrip(req: addTrip_payload): cf.logmessage("addTrip api call") space_id = int(os.environ.get('SPACE_ID', 1)) pattern_id = req.pattern_id start_time = req.start_time trip_id = cf.makeUID() i1 = f"""insert into trips (space_id, id, pattern_id, start_time, name) values ({space_id}, '{trip_id}', '{pattern_id}', '{start_time}', '{trip_id}_{start_time}') """ s1 = f"""select stop_sequence, stop_id from pattern_stops where space_id = {space_id} and pattern_id = '{pattern_id}' order by stop_sequence """ df1 = dbconnect.makeQuery(s1, output='df') df2 = df1[['stop_sequence']].copy() df2['space_id'] = space_id df2['trip_id'] = trip_id df2['id'] = cf.assignUID(df1, length=7) df2['arrival_time'] = None df2.at[0, 'arrival_time'] = start_time # to do: populate remaining arrival times also, taking a default speed # and calculating lat-long distance / routed distance status1 = dbconnect.execSQL(i1) status2 = dbconnect.addTable(df2, 'stop_times') returnD = {"message": "success"} returnD['trip_id'] = trip_id returnD['added_stop_times'] = len(df2) return returnD
def deleteStops(req: deleteStops_payload): """ Delete stops """ cf.logmessage("deleteStops api call") idsList = req.idsList space_id = int(os.environ.get('SPACE_ID', 1)) # dependencyStatus, details = checkStopDependency(idsList, space_id) # if not dependencyStatus: idsListSQL = cf.quoteNcomma(idsList) d1 = f"delete from stops_master where id in ({idsListSQL})" dCount = dbconnect.execSQL(d1) returnD = { "message": "success", "deleted": dCount, "confirmation_required": False } if dCount: return returnD else: raise HTTPException(status_code=400, detail="Nothing to delete")
def suggestMatches(req: suggestMatches_payload): cf.logmessage("suggestMatches api call") space_id = int(os.environ.get('SPACE_ID', 1)) stop_name_zap = cf.zapper(req.name) s1 = f"""select id, zap, name, latitude, longitude from stops_master where space_id = {space_id} and latitude between {req.minLat} and {req.maxLat} and longitude between {req.minLon} and {req.maxLon} """ dfMapped = dbconnect.makeQuery(s1, output='df') if req.orig_id: # remove the original stop from the matches dfMapped = dfMapped[dfMapped['id'] != req.orig_id].copy() cf.logmessage(f"Got {len(dfMapped)} locations within the lat-long bounds") # filter 1 : get name matches if not req.fuzzy: # direct match filter1 = ( dfMapped[dfMapped['zap'] == stop_name_zap].copy().drop_duplicates( subset=['latitude', 'longitude']).copy().head( req.maxRows).copy().reset_index(drop=True)) # putting inside () to make mutli-line possible here else: # dfMapped['Fpartial'] = dfMapped['zap'].apply( lambda x: fuzz.partial_ratio(stop_name_zap,x) ) dfMapped['score'] = dfMapped['zap'].apply( lambda x: jf.jaro_winkler(stop_name_zap, x)) filter1 = (dfMapped[dfMapped['score'] >= req.accuracy].sort_values( 'score', ascending=False).drop_duplicates( subset=['latitude', 'longitude']).copy().head( req.maxRows).copy().reset_index(drop=True)) # below accuracy=0.8, observed its normally too much mismatch, so better to limit it. # skipping ranking, source and databank parts from orig payanam for now cf.logmessage(f"{req.name}: {len(filter1)} matches found") del filter1['zap'] returnD = {'message': "success"} returnD['hits'] = len(filter1) if len(filter1): returnD['data'] = filter1.to_dict(orient='records') return returnD
def addStops(req: addStops_payload): """ Add stops """ cf.logmessage("addStops api call") # convert request body to json array, from https://stackoverflow.com/a/60845064/4355695 requestArr = [t.__dict__ for t in req.data] # print(requestArr) df1 = pd.DataFrame(requestArr) # to do: validation: remove the bad ones # remove duplicates df1 = df1.drop_duplicates('name').copy() df1['space_id'] = int(os.environ.get('SPACE_ID', 1)) df1['id'] = cf.assignUID(df1) timestamp = cf.getTime() df1['created_on'] = timestamp df1['created_by'] = '' # will bring in username later not_added = [] added = [] for row in df1.to_dict(orient='records'): if not row.get('name'): cf.logmessage("No name:", row) continue icols = ['space_id', 'id', 'name', 'created_on', 'created_by', 'zap'] ivals= [f"{row['space_id']}", f"'{row['id']}'", f"'{row['name']}'", \ "CURRENT_TIMESTAMP", f"'{row['created_by']}'", f"'{cf.zapper(row['name'])}'" ] if row.get('latitude'): icols.append('latitude') ivals.append(f"{row['latitude']}") if row.get('longitude'): icols.append('longitude') ivals.append(f"{row['longitude']}") if row.get('description'): icols.append('description') ivals.append(f"'{row['description']}'") if row.get('group_id'): icols.append('group_id') ivals.append(f"'{row['group_id']}'") i1 = f"""insert into stops_master ({','.join(icols)}) values ({','.join(ivals)})""" iCount = dbconnect.execSQL(i1) if not iCount: not_added.append(row) else: added.append(row) returnD = { 'message': "success", "num_added": 0, "num_not_added": 0, "added": [], "not_added": [] } if len(added): returnD['num_added'] = len(added) returnD['added'] = [{ "stop_id": x['id'], "name": x['name'] } for x in added] if len(not_added): returnD['num_not_added'] = len(not_added) returnD['not_added'] = [x['id'] for x in not_added] return returnD
def deleteStopsConfirm(req: deleteStopsConfirm_payload): cf.logmessage("deleteStopsConfirm api call") idsListSQL = cf.quoteNcomma(req.idsList) space_id = int(os.environ.get('SPACE_ID', 1)) returnD = {"message": "success"} # find the patterns s1 = f"""select distinct pattern_id from pattern_stops where space_id = {space_id} and stop_id in ({idsListSQL})""" patternsList = dbconnect.makeQuery(s1, output='column') if len(patternsList): # find which routes affected patternsListSQL = cf.quoteNcomma(patternsList) s4 = f"""select distinct route_id from patterns where space_id = {space_id} and id in ({patternsListSQL})""" routesList = dbconnect.makeQuery(s4, output='column') returnD['routeCount'] = len(routesList) # delete stop's entries from pattern_stops d1 = f"""delete from pattern_stops where space_id = {space_id} and stop_id in ({idsListSQL})""" pattern_deleted = dbconnect.execSQL(d1) returnD['patternCount'] = pattern_deleted # now, update job in all the patterns where this stop was. for pN, pattern_id in enumerate(patternsList): s2 = f"""select id, stop_id, stop_sequence from pattern_stops where space_id = {space_id} and pattern_id='{pattern_id}' order by stop_sequence """ pattern_stops = dbconnect.makeQuery(s2, output='list', fillna=False) # cf.logmessage(f"Pattern {pattern_id}: {len(pattern_stops)} stops originally") counter = 0 for row in pattern_stops: if row['stop_id'] in req.idsList: # pattern_deleted += 1 continue counter += 1 if row['stop_sequence'] == counter: # sequence is as-is, do nothing continue else: u1 = f"""update pattern_stops set stop_sequence={counter} where id='{row['id']}' """ ustatus = dbconnect.execSQL(u1) if counter > 0: cf.logmessage( f"Changed pattern {pattern_id}, stops count changed to {counter}" ) # else if the stop isn't in any patterns, proceed to delete from stops_master # now delete from stops master d3 = f"delete from stops_master where id in ({idsListSQL})" dcount = dbconnect.execSQL(d3) returnD['stopCount'] = dcount return returnD
def updateTimingsForPattern(pattern_id, pattern_length): # to do: if a pattern's length has changed, then update timings entries for it space_id = int(os.environ.get('SPACE_ID', 1)) totalAdded = totalRemoved = 0 # find all trips for the pattern s1 = f"""select t1.id as trip_id, t2.id, t2.stop_sequence from trips as t1 left join stop_times as t2 on t1.id = t2.trip_id where t1.space_id = {space_id} and t1.pattern_id = '{pattern_id}' and t2.space_id = {space_id} order by t2.trip_id, t2.stop_sequence """ df_exist_all = dbconnect.makeQuery(s1, output='df', keepCols=True) # tripsList = dbconnect.makeQuery(s1, output='column') if not len(df_exist_all): return 0, 0, 0 tripsList = df_exist_all['trip_id'].unique().tolist() # if not len(tripsList): # return len(tripsList), totalAdded, totalRemoved all_delIds = [] all_df_new = [] for trip_id in tripsList: # get existing cf.logmessage(f"trip_id: {trip_id}") df_exist = df_exist_all[df_exist_all['trip_id'] == trip_id].copy().reset_index(drop=True) # space_id = int(os.environ.get('SPACE_ID',1)) # s1 = f"""select id, stop_sequence from stop_times # where space_id = {space_id} # and trip_id = '{trip_id}' # order by stop_sequence # """ # df_exist = dbconnect.makeQuery(s1, output='df', keepCols=True) if len(df_exist) == pattern_length: # no change needed! continue elif len(df_exist) > pattern_length: # delete flow delIds = df_exist[pattern_length:]['id'].tolist() if len(delIds): all_delIds += delIds else: # add flow newSeq = list(range(len(df_exist) + 1, pattern_length + 1)) df_new = pd.DataFrame({'stop_sequence': newSeq}) df_new['id'] = cf.assignUID(df_new, length=7) df_new['space_id'] = space_id df_new['trip_id'] = trip_id all_df_new.append(df_new) # delete at once if len(all_delIds): delIdsSQL = cf.quoteNcomma(all_delIds) cf.logmessage(f"ids to delete: {all_delIds}") d1 = f"""delete from stop_times where id in ({delIdsSQL}) """ totalRemoved = dbconnect.execSQL(d1) # add at once if len(all_df_new): add_df = pd.concat(all_df_new, sort=False, ignore_index=True) print("add_df:") print(add_df) totalAdded = dbconnect.addTable(add_df, 'stop_times') return len(tripsList), totalAdded, totalRemoved
async def saveTimings(req: Request): cf.logmessage("saveTimings api call") space_id = int(os.environ.get('SPACE_ID', 1)) returnD = {'message': "success"} # making the api take a custom json array # from https://stackoverflow.com/a/64379772/4355695 (that and other answers) reqD = await req.json() # print(reqD) if (not len(reqD.get('data', []))) or (not isinstance( reqD.get('data', []), list)): raise HTTPException(status_code=400, detail="No data") df1 = pd.DataFrame(reqD['data']).fillna('') if 'stop_id' in df1.columns: del df1['stop_id'] if 'name' in df1.columns: del df1['name'] keepcols = ['stop_sequence'] df2 = pd.melt(df1, id_vars=keepcols, var_name='trip_id', value_name='arrival_time').sort_values( ['trip_id', 'stop_sequence']).reset_index(drop=True) # df2.to_csv('stop_times.csv',index=False) df2['id'] = cf.assignUID(df2, length=7) df2['space_id'] = space_id # TO DO: time validation for N in range(len(df2)): if df2.at[N, 'arrival_time'] == '': df2.at[N, 'arrival_time'] = None tripsList = df2['trip_id'].unique().tolist() if not len(tripsList): raise HTTPException(status_code=400, detail="No tripIds in data") trip_idSQL = cf.quoteNcomma(tripsList) # fully delete existing stop_times for this pattern and replace with new d1 = f"""delete from stop_times where space_id = {space_id} and trip_id in ({trip_idSQL})""" dCount1 = dbconnect.execSQL(d1) returnD['old_count'] = dCount1 # df2.to_csv('sample.csv') iStatus1 = dbconnect.addTable(df2, 'stop_times') if not iStatus1: returnD['new_count'] = 0 raise HTTPException(status_code=400, detail="Failed to add stop_times data in DB") # update trips data # get all start times start_times_lookup = df2[df2['stop_sequence'] == 1][[ 'trip_id', 'arrival_time' ]].copy().set_index('trip_id').to_dict(orient='index') print(start_times_lookup) # to do later: get all end times also. Or, decide to drop that and don't bother. returnD['trips_updated'] = 0 for trip_id in tripsList: uList = [] uList.append(f"last_updated = CURRENT_TIMESTAMP") uList.append(f"modified_by = 'admin'") if start_times_lookup.get(trip_id, False): start_time = start_times_lookup[trip_id]['arrival_time'] uList.append(f"start_time = '{start_time}'") uList.append(f"name = '{trip_id}_{start_time}'") else: uList.append(f"start_time = NULL") uList.append(f"name = '{trip_id}'") u1 = f"""update trips set {', '.join(uList)} where space_id = {space_id} and id = '{trip_id}' """ uCount = dbconnect.execSQL(u1) returnD['trips_updated'] += uCount returnD['new_count'] = len(df2) return returnD
def suggestMatches(req: autoMapPattern_payload): cf.logmessage("suggestMatches api call") space_id = int(os.environ.get('SPACE_ID', 1)) returnD = {"message": "success"} # fetch all the unmapped stop names s1 = f"""select t1.id, t1.stop_sequence, t1.stop_id, t2.name, t2.zap from pattern_stops as t1 left join stops_master as t2 on t1.stop_id = t2.id where t1.pattern_id = '{req.pattern_id}' and t2.latitude is NULL and t1.space_id = {space_id} order by t1.stop_sequence """ pStopsdf = dbconnect.makeQuery(s1, output='df') returnD['unmapped_stops'] = pStopsdf.to_dict(orient='records') if not len(pStopsdf): returnD['noneed'] = True return returnD else: returnD['noneed'] = False s1 = f"""select id as stop_id, zap, name, latitude, longitude from stops_master where space_id = {space_id} and latitude between {req.minLat} and {req.maxLat} and longitude between {req.minLon} and {req.maxLon} """ dfMapped = dbconnect.makeQuery(s1, output='df') returnD['scanned'] = len(dfMapped) if not len(dfMapped): cf.logmessage("No stops mapped within given co-ords") return returnD else: cf.logmessage( f"Got {len(dfMapped)} locations within the lat-long bounds") matchCollector = [] suggestionCollector = [] nomatches = [] for sN, srow in pStopsdf.iterrows(): stop_name_zap = srow['zap'] # filter 1 : get name matches if not req.fuzzy: # direct match filter1 = (dfMapped[dfMapped['zap'] == stop_name_zap].copy( ).drop_duplicates(subset=['latitude', 'longitude']).copy().head( req.maxRows).copy().reset_index(drop=True)) # putting inside () to make mutli-line possible here else: # dfMapped['Fpartial'] = dfMapped['zap'].apply( lambda x: fuzz.partial_ratio(stop_name_zap,x) ) dfMapped['score'] = dfMapped['zap'].apply( lambda x: jf.jaro_winkler(stop_name_zap, x)) filter1 = (dfMapped[dfMapped['score'] >= req.accuracy].sort_values( 'score', ascending=False).drop_duplicates( subset=['latitude', 'longitude']).copy().head( req.maxRows).copy().reset_index(drop=True)) # take first stop_id match in filter if len(filter1): # print(f"{srow['name']}: {len(filter1)} matches found") matchCollector.append({ 'id': srow['id'], 'old_stop_id': srow['stop_id'], 'new_stop_id': filter1['stop_id'].tolist()[0] }) if not req.autoMap: suggestionCollector.append({ 'id': srow['id'], 'matches': filter1.to_dict(orient='records') }) else: cf.logmessage(f"{srow['name']}: no match found") nomatches.append({'id': srow['id'], 'name': srow['name']}) returnD['automapped_count'] = 0 returnD['matchCollector'] = matchCollector returnD['suggestionCollector'] = suggestionCollector returnD['nomatches'] = nomatches if req.autoMap: # take matchCollector and do it for mrow in matchCollector: u1 = f"""update pattern_stops set stop_id = '{mrow['new_stop_id']}' where id='{mrow['id']}' """ # print(' '.join(u1.split())) u1Count = dbconnect.execSQL(u1) returnD['automapped_count'] += u1Count return returnD
dbcreds = { 'host': os.environ.get('DB_SERVER',''), 'port': os.environ.get('DB_PORT',''), 'dbname': os.environ.get('DB_DBNAME',''), 'user': os.environ.get('DB_USER',''), 'password': os.environ.get('DB_PW','') } assert len(dbcreds['password']) > 2, "Invalid DB connection password" threaded_postgreSQL_pool = psycopg2.pool.ThreadedConnectionPool(5, 20, user=dbcreds['user'], password=dbcreds['password'], host=dbcreds['host'], port=dbcreds['port'], database=dbcreds['dbname']) assert threaded_postgreSQL_pool, "Could not create DB connection" cf.logmessage("DB Connected") def makeQuery(s1, output='df', lowerCaseColumns=False, keepCols=False, fillna=True, engine=None, noprint=False): ''' output choices: oneValue : ex: select count(*) from table1 (output will be only one value) oneRow : ex: select * from table1 where id='A' (output will be onle one row) df: ex: select * from users (output will be a table) list: json array, like df.to_dict(orient='records') column: first column in output as a list. ex: select username from users oneJson: First row, as dict ''' if not isinstance(s1,str): cf.logmessage("query needs to be a string") return False if ';' in s1:
def makeQuery(s1, output='df', lowerCaseColumns=False, keepCols=False, fillna=True, engine=None, noprint=False): ''' output choices: oneValue : ex: select count(*) from table1 (output will be only one value) oneRow : ex: select * from table1 where id='A' (output will be onle one row) df: ex: select * from users (output will be a table) list: json array, like df.to_dict(orient='records') column: first column in output as a list. ex: select username from users oneJson: First row, as dict ''' if not isinstance(s1,str): cf.logmessage("query needs to be a string") return False if ';' in s1: cf.logmessage("; not allowed") return False if not noprint: # keeping auth check and some other queries out skipPrint = ['where token=', '.STArea()', 'STGeomFromText'] if not any([(x in s1) for x in skipPrint]) : cf.logmessage(f"Query: {' '.join(s1.split())}") else: cf.logmessage(f"Query: {' '.join(s1.split())[:20]}") ps_connection = threaded_postgreSQL_pool.getconn() result = None # default return value if output in ('oneValue','oneRow'): ps_cursor = ps_connection.cursor() ps_cursor.execute(s1) row = ps_cursor.fetchone() if output == 'oneValue': result = row[0] else: result = row ps_cursor.close() elif output in ('df','list','oneJson','column'): # df if fillna: df = pd.read_sql_query(s1, con=ps_connection, coerce_float=False).fillna('') else: df = pd.read_sql_query(s1, con=ps_connection, coerce_float=False) # coerce_float : need to ensure mobiles aren't screwed # make all colunm headers lowercase if lowerCaseColumns: df.columns = [x.lower() for x in df.columns] # from https://stackoverflow.com/questions/19726029/how-can-i-make-pandas-dataframe-column-headers-all-lowercase if output=='df': result = df if (not len(df)) and (not keepCols): result = [] elif output == 'oneJson': if not len(df): result = {} else: result = df.to_dict(orient='records')[0] elif (not len(df)): result = [] elif output == 'column': result = df.iloc[:,0].tolist() # .iloc[:,0] -> first column elif output == 'list': result = df.to_dict(orient='records') else: # default - df result = df else: cf.logmessage('invalid output type') threaded_postgreSQL_pool.putconn(ps_connection) # return threaded connnection back to pool return result
def loadTimings(req: loadTimings_payload): cf.logmessage("loadTimings api call") space_id = int(os.environ.get('SPACE_ID', 1)) pattern_id = req.pattern_id returnD = {'message': "success", "stops": [], "trips": [], "num_trips": 0} # stops s1 = f"""select t1.stop_sequence, t1.stop_id, t2.name from pattern_stops as t1 left join stops_master as t2 on t1.stop_id = t2.id where t1.space_id={space_id} and t1.pattern_id = '{pattern_id}' order by t1.stop_sequence """ df1 = dbconnect.makeQuery(s1, output='df', keepCols=True, fillna=True) returnD['stops'] = df1.to_dict(orient='records') # trips s2 = f"""select * from trips where space_id={space_id} and pattern_id = '{pattern_id}' order by start_time """ df2 = dbconnect.makeQuery(s2, output='df', keepCols=True, fillna=True) df2['start_time'] = df2['start_time'].apply(lambda x: str(x)[:5]) returnD['num_trips'] = len(df2) returnD['trips'] = df2.to_dict(orient='records') # timings if len(df2): trip_idSQL = cf.quoteNcomma(df2['id'].tolist()) s3 = f"""select trip_id, stop_sequence, arrival_time from stop_times where space_id={space_id} and trip_id in ({trip_idSQL}) order by trip_id, stop_sequence """ df3 = dbconnect.makeQuery(s3, output='df', keepCols=True, fillna=True) # df3['trip_id'] = df3['trip_id'].apply(lambda x: f"trip_{x}") df3['arrival_time'] = df3['arrival_time'].apply(lambda x: str(x)[:5]) else: df3 = pd.DataFrame( columns=['trip_id', 'stop_sequence', 'arrival_time']) # pivot by trip_id df4 = df3.pivot(index='stop_sequence', columns='trip_id', values='arrival_time').fillna('').reset_index() # merge in stop ids, names df5 = pd.merge(df1, df4, on='stop_sequence', how='left') # sort by start timings allCols = list(df5.columns) tripCols = [ x for x in allCols if x not in ('stop_sequence', 'stop_id', 'name') ] newCols = ['stop_sequence', 'stop_id', 'name'] + sorted(tripCols) returnD['stop_times'] = df5[newCols].to_dict(orient='records') # TO DO: calc stop times offset from first trip or so cf.logmessage(f"Got {len(df2)} trips, {len(df3)} timings total") return returnD
def editPattern(req: editPattern_payload): cf.logmessage("editPattern api call") space_id = int(os.environ.get('SPACE_ID', 1)) # find if existing s1 = f"""select * from patterns where space_id = {space_id} and id='{req.pattern_id}' """ existingPattern = dbconnect.makeQuery(s1, output='oneJson') if not len(existingPattern): raise HTTPException(status_code=400, detail="Could not remove existing sequence") s2 = f"""select * from pattern_stops where space_id = {space_id} and pattern_id='{req.pattern_id}' order by stop_sequence """ existingPatternStops = dbconnect.makeQuery(s2, output='df') # to do : validation of stop ids # delete existing pattern if any if len(existingPatternStops): print("existing:") print(existingPatternStops) d1 = f"""delete from pattern_stops where pattern_id='{req.pattern_id}' """ dCount = dbconnect.execSQL(d1) if not dCount: raise HTTPException(status_code=400, detail="Could not remove existing sequence") else: cf.logmessage("This pattern didn't have stops earlier.") # new pattern df = pd.DataFrame({'stop_id': req.stops}) df['id'] = cf.assignUID(df) df['stop_sequence'] = list(range(1, len(df) + 1)) print("new:") print(df) df['space_id'] = space_id df['pattern_id'] = req.pattern_id status1 = dbconnect.addTable(df, table='pattern_stops') if not status1: raise HTTPException(status_code=400, detail="Could not add sequence") # also update pattern's entry u1 = f"""update patterns set last_updated=CURRENT_TIMESTAMP where id='{req.pattern_id}' """ uCount = dbconnect.execSQL(u1) if not uCount: cf.logmessage( "Warning: could not update the pattern's entry in patterns table, continuing" ) returnD = { "message": "success", "oldCount": len(existingPatternStops), "newCount": len(df) } # update timings entries if the length of the pattern has changed if len(existingPatternStops) != len(df): returnD['numTrips'], returnD['timings_added'], returnD[ 'timings_removed'] = updateTimingsForPattern( req.pattern_id, len(df)) cf.logmessage(returnD) return returnD