def get_max_temperatur_per_route(): yesterday = str(date.today()- timedelta(days=1)).replace('-','') query = f'''select mt.route_long_name, max(t.buss_stopp_name), max(t.temperatur) from (select r.route_long_name , max(f.temperatur) max_temperatur from datamart_star.facttable_1 f join datamart_star.buss_stopp bs on f.buss_stop_id = bs.bkey join datamart_star.route_busstop rb using(bkey) join datamart_star.route r using(route_id) where f.kdate = {yesterday} group by r.route_long_name order by max_temperatur desc) as mt join (select bs.buss_stopp_name, r.route_long_name, f.temperatur from datamart_star.facttable_1 f join datamart_star.buss_stopp bs on f.buss_stop_id = bs.bkey join datamart_star.route_busstop rb using(bkey) join datamart_star.route r using(route_id) order by temperatur desc) as t on t.route_long_name = mt.route_long_name where mt.max_temperatur = t.temperatur group by mt.route_long_name order by max(t.temperatur) desc;''' conn, cur = db.connect() cur.execute(query) results = cur.fetchall() for line in results: print(line) db.close(conn, cur)
def insert_routes(): query = '''CREATE TABLE IF NOT EXISTS frostentur.routes(route_id int, route_short_name text, route_long_name text, route_type text);''' stations = get_routes() conn, cur = db.connect() cur.execute(query) arg_list = [] for index, arg in stations.iterrows(): arg_list.append((str(arg['route_id']), str(arg['route_short_name']), str(arg['route_long_name']), str(arg['route_type']))) if(index%100 == 0): query = '''insert into frostentur.routes(route_id, route_short_name, route_long_name, route_type) values (%s, %s, %s, %s)''' cur.executemany(query, arg_list) print(f'inserted: {index}') arg_list = [] conn.commit() query = '''insert into frostentur.routes(route_id, route_short_name, route_long_name, route_type) values (%s, %s, %s, %s)''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def update_daily(): conn, cur = db.connect() yesterday = str(date.today()- timedelta(days=1)) valid_oslo_stations = ['SN18701','SN18315','SN18240','SN18210','SN18270','SN18500','SN18020','SN17980','SN18269','SN18815','SN76914','SN18690','SN18410','SN18700','SN18920','SN18420','SN18950','SN18980'] query = '''CREATE TABLE IF NOT EXISTS frostentur.DailyTemperatures (sourceId text, timek int, datek int, value float, unit text)''' cur.execute(query) query = '''TRUNCATE TABLE frostentur.DailyTemperatures;''' cur.execute(query) query = '''insert into frostentur.DailyTemperatures(sourceId, timek, datek, value, unit) values (%s, %s, %s, %s, %s)''' for station in valid_oslo_stations: # ['sourceId', 'time', 'date', 'value', 'unit'] result = get_weather_on_station(station, yesterday) if(result is None): continue row_list = [] for _, row in result.iterrows(): row_list.append((str(row['sourceId']), str(row['time']), str(row['date']), str(row['value']), str(row['unit']))) cur.executemany(query, row_list) conn.commit() query = '''insert into datamart_star.facttable_1 (buss_stop_id, sourceId, temperatur, kdate, ktime) select bs.bkey, d.sourceid ,d.value ,d.datek, d.timek from frostentur.DailyTemperatures d join datamart_star.closest_buss_weather_station cbws using(sourceId) join datamart_star.buss_stopp bs on bs.buss_stopp_name = cbws.stop_name ;''' cur.execute(query) db.close(conn, cur)
def insert_weather_stations_latlon(): temp_prec_ids = ['SN18210', 'SN18815', 'SN18315', 'SN18700'] temp_prec = get_weather_station_lat_lon_from_list(temp_prec_ids) wind_ids = ['SN18210', 'SN18700', 'SN76914'] wind = get_weather_station_lat_lon_from_list(wind_ids) conn, cur = db.connect() arg_list = [] for _, arg in temp_prec.iterrows(): arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']), str(arg['lon']))) query = '''insert into weather.prec_temp_stations(id_, name_, lat_, lon_) values (%s, %s, %s, %s)''' cur.executemany(query, arg_list) arg_list = [] for _, arg in wind.iterrows(): arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']), str(arg['lon']))) query = '''insert into weather.wind_stations(id_, name_, lat_, lon_) values (%s, %s, %s, %s) ON CONFLICT DO NOTHING''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def init_database(): conn, cur = db.connect() with open('dw_init/step1.sql', 'r') as sql_file: cur.execute(sql_file.read()) with open('dw_init/step2.sql', 'r') as sql_file: cur.execute(sql_file.read()) db.close(conn, cur)
def classifyTweets(size=100): conn = db.connect() tweets = db.getRandomUnprocessedTweets(conn=conn, size=size) conn.close() # Tweet structure [id, text] tweetsToProcess = getSingleCandidateMentions(tweets) # call GUI and pass tweets to process openClassificationGUI(tweetsToProcess)
def insert_buss_weather_station_relation(): conn, cur = db.connect() query = '''create table if not exists fe_star.buss_weather_station_relation (bkey text, sourceid text);''' cur.execute(query) relations = pythagoras_weather_buss() in_ = relations[['bkey', 'sourceid']].values.tolist() query = '''insert into fe_star.buss_weather_station_relation (bkey, sourceid) values (%s, %s);''' print('inserted!') cur.executemany(query, in_) db.close(conn, cur)
def insert_precipitation_by_day(dates='2020-09-09/2020-10-10'): df_ = get_precipitation_by_day(dates) conn, cur = db.connect() arg_list = [] for _, arg in df_.iterrows(): arg_list.append( (str(arg['id']), str(arg['date']), str(arg['precipitation_mm']))) query = '''insert into weather.precipitation(id_, date_, precipitation_mm) values (%s, %s, %s) ON CONFLICT DO NOTHING''' cur.executemany(query, arg_list) db.close(conn, cur)
def insert_temperatur_by_hour(dates='2020-10-09/2020-10-10'): df_ = get_temperatur_by_hour(dates) conn, cur = db.connect() arg_list = [] for _, arg in df_.iterrows(): arg_list.append((str(arg['id']), str(arg['hour']), str(arg['date']), str(arg['air_temperatur_celsius']))) query = '''insert into weather.temperature(id_, hour_, date_, air_temperatur_celsius) values (%s, %s, %s, %s) ON CONFLICT DO NOTHING''' cur.executemany(query, arg_list) db.close(conn, cur)
def insert_busstop_latlon(): stations = get_busstop_latlon() conn, cur = db.connect() arg_list = [] for _, arg in stations.iterrows(): arg_list.append((str(arg['stop_id']), str(arg['stop_name']), str(arg['stop_lat']) ,str(arg['stop_lon']))) query = '''insert into frostentur.busstoplatlon(stop_id, stop_name, lat, lon) values (%s, %s, %s, %s)''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def insert_weather_station_latlon(): stations = get_weather_station_latlon() conn, cur = db.connect() arg_list = [] for _, arg in stations.iterrows(): arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']), str(arg['lon']))) query = '''insert into frostentur.weatherStationlatlon(sourceId, name, lat, lon) values (%s, %s, %s, %s)''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def insert_busstop_latlon(): stations = get_busstop_latlon() conn, cur = db.connect() arg_list = [] for _, arg in stations.iterrows(): arg_list.append((str(str(arg['stop_name']) + str(arg['stop_id'])), str(arg['stop_id']), str(arg['stop_name']), str(arg['stop_lat']), str(arg['stop_lon']))) query1 = ''' CREATE TABLE IF NOT EXISTS fe_star.buss_stopp(bkey text,buss_stopp_id int, buss_stopp_name text, latitude float, longitude float);''' cur.execute(query1) query = '''insert into fe_star.buss_stopp(bkey, buss_stopp_id, buss_stopp_name, latitude, longitude) values (%s, %s, %s, %s, %s)''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def exportTweets(): trainingTweetsPath = os.getcwd() + "/tweets/train" testTweetsPath = os.getcwd() + "/tweets/test" conn = db.connect() query = "SELECT processedText, tweet from processedTweets WHERE sentiment=" cursor = conn.cursor() for sentiment in ["positive", "negative", "neutral"]: # make sentiment folder sentimentTrainingPath = "%s/%s"%(trainingTweetsPath, sentiment) sentimentTestPath = "%s/%s"%(testTweetsPath, sentiment) if not os.path.exists(sentimentTrainingPath): os.makedirs(sentimentTrainingPath) if not os.path.exists(sentimentTestPath): os.makedirs(sentimentTestPath) finalQuery = "%s\'%s\'" % (query, sentiment) cursor.execute(finalQuery) tweets = cursor.fetchall() # get tweets to move to training set trainingSize = int( len(tweets) * float(2)/3) trainingIndices = random.sample(range(0, len(tweets)), trainingSize) for index in range(0, len(tweets)): filename = tweets[index][1] + ".txt" if index in trainingIndices: tweetFile = open("%s/%s" % (sentimentTrainingPath, filename), "w") else: tweetFile = open("%s/%s" % (sentimentTestPath, filename), "w") tweetFile.write(tweets[index][0].encode('utf-8')) tweetFile.close() cursor.close() conn.close()
def insert_busstop_route(): stations = get_busstop_route() conn, cur = db.connect() arg_list = [] print(len(stations)) for index, arg in stations.iterrows(): arg_list.append((str(arg['route_id']), str(arg['bkey']))) if(index%100 == 0): query = '''insert into datamart_star.route_busstop(route_id, bkey) values (%s, %s)''' cur.executemany(query, arg_list) print(f'inserted: {index}') arg_list = [] conn.commit() query = '''insert into datamart_star.route_busstop(route_id, bkey) values (%s, %s)''' cur.executemany(query, arg_list) conn.commit() db.close(conn, cur) return 1
def updateQuery(searchQuery, minTweetID_=None): searchPointers = {} try: pointersFile = open(searchPointersFileName, "r") searchPointers = pickle.load(pointersFile) # reload searchPointers to make sure it is up-to-date pointersFile.close() # maybe turn searchPointer into a global variable? For easier saving on error also create separate f'n for updating except Exception as e: #searchPointers file doesn't exist yet pointerFileExists = False searchPointers = {} if searchQuery not in searchPointers.keys(): searchPointers[searchQuery] = {"latestTweet":None, "oldestTweet":None, "finishedLastRun":True} if "finishedLastRun" not in searchPointers[searchQuery].keys(): searchPointers[searchQuery]["finishedLastRun"] = True tweets = [] users = [] conn = db.connect() cursor = conn.cursor() querySize = 100 maxTweetID = None # Not actually the oldest tweet! oldest tweet in current update cycle # latestTweetID = db.getLatestTweet(conn) # Will run update until this tweet is encountered minTweetID = None if minTweetID_: minTweetID = minTweetID_ else: minTweetID = searchPointers[searchQuery]["latestTweet"] firstRun = True if not searchPointers[searchQuery]["finishedLastRun"]: minTweetID = None maxTweetID = searchPointers[searchQuery]["oldestTweet"] #Last Run wasn't finished #Make oldestTweet = max_id while True: tweets = [] try: # print("maxTweetID: %s" % maxTweetID) # maxTweetID is the OLDEST TWEET returned by search query NOT OF WHOLE HISTORY OF QUERYING. It becomes the new MAX for the next query hence the name. # minTweetID is the floor. When using searchPointer file, latestTweet becomes minTweetID searchResults = twitter.search(q=searchQuery, max_id=maxTweetID, since_id=minTweetID, count=querySize, result_type="recent") tweetCount = len(searchResults['statuses']) if tweetCount > 1: if firstRun: if searchPointers[searchQuery]["finishedLastRun"]: searchPointers[searchQuery]["latestTweet"] = searchResults['statuses'][0]['id_str'] firstRun = False for tweet in searchResults['statuses']: tweets.append(tweet) addAllToDB(conn, tweets, False) print("Added %d tweets to Database by backward search. Query: %s" % (tweetCount, searchQuery)) maxTweetID = searchResults['statuses'][-1]['id_str'] searchPointers[searchQuery]["oldestTweet"] = maxTweetID print("Oldest tweet ID: %s" % (maxTweetID)) searchPointers[searchQuery]["finishedLastRun"] = False pointersFile = open(searchPointersFileName, "w+") pickle.dump(searchPointers, pointersFile) pointersFile.close() # print("Current max Tweet: %s" % maxTweetID) else: searchPointers[searchQuery]['finishedLastRun'] = True pointersFile = open(searchPointersFileName, "w+") pickle.dump(searchPointers, pointersFile) pointersFile.close() #end of search results OR search error (e.g. rate limit) print("Search Pointer file saved.") #ONLY SAVES AT SEARCH RESULT == 0 TO ASSURE NO TWEET IN THE MIDDLE IS LEFT OUT print("Backward search result returned zero") ## then start again from top break except TwythonRateLimitError as e: print("[ERROR] TwythonRateLimitError on Search Query") print(e) retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now() retryIn = float(retryIn.total_seconds()) print("Time to sleep: %d" % math.ceil(retryIn)) #raw_input("Press enter to continue to sleep") print("Time at start of sleep:") print datetime.datetime.now() print("End of sleep:") print(datetime.datetime.fromtimestamp(float(e.retry_after))) retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now() retryIn = float(retryIn.total_seconds()) # # Pointer File Saving # pointersFile = open(searchPointersFileName, "w+") # pickle.dump(searchPointers, pointersFile) # pointersFile.close() # print("Search Pointer file saved. Safe to Exit.") if retryIn > 0: time.sleep(math.ceil(retryIn)) pass except Exception as e: print e print "Restarting loop, ignoring error" # except Exception as e: # print("ERROR: Error while populating database.") # File saved as %s\n" % filename) # print(e) cursor.close() conn.commit() conn.close() pointersFile = open(searchPointersFileName, "w+") pickle.dump(searchPointers, pointersFile) pointersFile.close() print("Search Pointer file saved. Safe to Exit.") print("END OF EXECUTION")