def get_max_temperatur_per_route():
    yesterday = str(date.today()- timedelta(days=1)).replace('-','')
    query = f'''select mt.route_long_name, max(t.buss_stopp_name), max(t.temperatur)
    from
		(select r.route_long_name , max(f.temperatur) max_temperatur
		from datamart_star.facttable_1 f 
		join datamart_star.buss_stopp bs on f.buss_stop_id = bs.bkey
		join datamart_star.route_busstop rb using(bkey)
		join datamart_star.route r using(route_id)
        where f.kdate = {yesterday}
		group by r.route_long_name order by max_temperatur desc) as mt
	join
		(select bs.buss_stopp_name, r.route_long_name, f.temperatur
		from datamart_star.facttable_1 f 
		join datamart_star.buss_stopp bs on f.buss_stop_id = bs.bkey
		join datamart_star.route_busstop rb using(bkey)
		join datamart_star.route r using(route_id)
		order by temperatur desc) as t
	on t.route_long_name = mt.route_long_name
	where mt.max_temperatur = t.temperatur
	group by mt.route_long_name order by max(t.temperatur) desc;'''
    conn, cur = db.connect()
    cur.execute(query)

    results = cur.fetchall()
    for line in results:
        print(line)

    db.close(conn, cur)
def insert_routes():

    query = '''CREATE TABLE IF NOT EXISTS frostentur.routes(route_id int,
	route_short_name text,
	route_long_name text,
	route_type text);'''

    stations = get_routes()
    conn, cur = db.connect()
    cur.execute(query)
    arg_list = []
    
    for index, arg in stations.iterrows():
        arg_list.append((str(arg['route_id']), str(arg['route_short_name']), str(arg['route_long_name']), str(arg['route_type'])))
        if(index%100 == 0):
            query = '''insert into frostentur.routes(route_id, route_short_name, route_long_name, route_type) 
                        values (%s, %s, %s, %s)'''
            cur.executemany(query, arg_list)
            print(f'inserted: {index}')
            arg_list = []
            conn.commit()

    query = '''insert into frostentur.routes(route_id, route_short_name, route_long_name, route_type) 
                        values (%s, %s, %s, %s)'''
    
    cur.executemany(query, arg_list)
        
    conn.commit()
    db.close(conn, cur)
    return 1
def update_daily():
    conn, cur = db.connect()
    yesterday = str(date.today()- timedelta(days=1))
    valid_oslo_stations = ['SN18701','SN18315','SN18240','SN18210','SN18270','SN18500','SN18020','SN17980','SN18269','SN18815','SN76914','SN18690','SN18410','SN18700','SN18920','SN18420','SN18950','SN18980']
    query = '''CREATE TABLE IF NOT EXISTS frostentur.DailyTemperatures (sourceId text, timek int, datek int, value float, unit text)'''
    cur.execute(query)
    query = '''TRUNCATE TABLE frostentur.DailyTemperatures;'''
    cur.execute(query)
    query = '''insert into frostentur.DailyTemperatures(sourceId, timek, datek, value, unit) 
                         values (%s, %s, %s, %s, %s)'''

    for station in valid_oslo_stations:
        # ['sourceId', 'time', 'date', 'value', 'unit']
        result = get_weather_on_station(station, yesterday)
        if(result is None):
            continue
        row_list = []
        for _, row in result.iterrows():
            row_list.append((str(row['sourceId']), str(row['time']), str(row['date']), str(row['value']), str(row['unit'])))
        
        cur.executemany(query, row_list)
        conn.commit()

    query = '''insert into datamart_star.facttable_1 (buss_stop_id, sourceId, temperatur, kdate, ktime)
                select bs.bkey, d.sourceid ,d.value ,d.datek, d.timek from frostentur.DailyTemperatures d 
                join datamart_star.closest_buss_weather_station cbws using(sourceId)
                join datamart_star.buss_stopp bs on bs.buss_stopp_name = cbws.stop_name ;'''
    cur.execute(query)
    db.close(conn, cur)
Exemple #4
0
def insert_weather_stations_latlon():
    temp_prec_ids = ['SN18210', 'SN18815', 'SN18315', 'SN18700']
    temp_prec = get_weather_station_lat_lon_from_list(temp_prec_ids)

    wind_ids = ['SN18210', 'SN18700', 'SN76914']
    wind = get_weather_station_lat_lon_from_list(wind_ids)
    conn, cur = db.connect()

    arg_list = []
    for _, arg in temp_prec.iterrows():
        arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']),
                         str(arg['lon'])))

    query = '''insert into weather.prec_temp_stations(id_, name_, lat_, lon_)
                        values (%s, %s, %s, %s)'''
    cur.executemany(query, arg_list)

    arg_list = []
    for _, arg in wind.iterrows():
        arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']),
                         str(arg['lon'])))

    query = '''insert into weather.wind_stations(id_, name_, lat_, lon_)
                        values (%s, %s, %s, %s) ON CONFLICT DO NOTHING'''

    cur.executemany(query, arg_list)

    conn.commit()
    db.close(conn, cur)
    return 1
Exemple #5
0
def init_database():
    conn, cur = db.connect()
    with open('dw_init/step1.sql', 'r') as sql_file:
        cur.execute(sql_file.read())
    with open('dw_init/step2.sql', 'r') as sql_file:
        cur.execute(sql_file.read())

    db.close(conn, cur)
def classifyTweets(size=100):
	conn = db.connect()
	tweets = db.getRandomUnprocessedTweets(conn=conn, size=size)
	conn.close()
	# Tweet structure [id, text]

	tweetsToProcess = getSingleCandidateMentions(tweets)

	# call GUI and pass tweets to process
	openClassificationGUI(tweetsToProcess)
Exemple #7
0
def insert_buss_weather_station_relation():
    conn, cur = db.connect()
    query = '''create table if not exists fe_star.buss_weather_station_relation (bkey text, sourceid text);'''
    cur.execute(query)
    relations = pythagoras_weather_buss()
    in_ = relations[['bkey', 'sourceid']].values.tolist()
    query = '''insert into fe_star.buss_weather_station_relation (bkey, sourceid) values (%s, %s);'''
    print('inserted!')
    cur.executemany(query, in_)

    db.close(conn, cur)
Exemple #8
0
def insert_precipitation_by_day(dates='2020-09-09/2020-10-10'):
    df_ = get_precipitation_by_day(dates)
    conn, cur = db.connect()
    arg_list = []
    for _, arg in df_.iterrows():
        arg_list.append(
            (str(arg['id']), str(arg['date']), str(arg['precipitation_mm'])))

    query = '''insert into weather.precipitation(id_,  date_, precipitation_mm)
                        values (%s, %s, %s) ON CONFLICT DO NOTHING'''
    cur.executemany(query, arg_list)
    db.close(conn, cur)
Exemple #9
0
def insert_temperatur_by_hour(dates='2020-10-09/2020-10-10'):
    df_ = get_temperatur_by_hour(dates)
    conn, cur = db.connect()
    arg_list = []
    for _, arg in df_.iterrows():
        arg_list.append((str(arg['id']), str(arg['hour']), str(arg['date']),
                         str(arg['air_temperatur_celsius'])))

    query = '''insert into weather.temperature(id_, hour_, date_, air_temperatur_celsius)
                        values (%s, %s, %s, %s) ON CONFLICT DO NOTHING'''
    cur.executemany(query, arg_list)
    db.close(conn, cur)
def insert_busstop_latlon():
    stations = get_busstop_latlon()
    conn, cur = db.connect()
    arg_list = []
    for _, arg in stations.iterrows():
        arg_list.append((str(arg['stop_id']), str(arg['stop_name']), str(arg['stop_lat']) ,str(arg['stop_lon'])))

    query = '''insert into frostentur.busstoplatlon(stop_id, stop_name, lat, lon) 
                        values (%s, %s, %s, %s)'''
    cur.executemany(query, arg_list)
        
    conn.commit()
    db.close(conn, cur)
    return 1
def insert_weather_station_latlon():
    stations = get_weather_station_latlon()
    conn, cur = db.connect()
    arg_list = []
    for _, arg in stations.iterrows():
        arg_list.append((str(arg['id']), str(arg['name']), str(arg['lat']), str(arg['lon'])))
        
    query = '''insert into frostentur.weatherStationlatlon(sourceId, name, lat, lon) 
                        values (%s, %s, %s, %s)'''
    cur.executemany(query, arg_list)
        
    conn.commit()
    db.close(conn, cur)
    return 1
Exemple #12
0
def insert_busstop_latlon():
    stations = get_busstop_latlon()
    conn, cur = db.connect()
    arg_list = []
    for _, arg in stations.iterrows():
        arg_list.append((str(str(arg['stop_name']) + str(arg['stop_id'])),
                         str(arg['stop_id']), str(arg['stop_name']),
                         str(arg['stop_lat']), str(arg['stop_lon'])))

    query1 = '''
    CREATE TABLE IF NOT EXISTS fe_star.buss_stopp(bkey text,buss_stopp_id int, buss_stopp_name text, latitude float, longitude float);'''
    cur.execute(query1)

    query = '''insert into fe_star.buss_stopp(bkey, buss_stopp_id, buss_stopp_name, latitude, longitude) 
                        values (%s, %s, %s, %s, %s)'''
    cur.executemany(query, arg_list)

    conn.commit()
    db.close(conn, cur)
    return 1
def exportTweets():

	trainingTweetsPath = os.getcwd() + "/tweets/train"
	testTweetsPath = os.getcwd() + "/tweets/test"


	conn = db.connect()
	query = "SELECT processedText, tweet from processedTweets WHERE sentiment="
	cursor = conn.cursor()


	for sentiment in ["positive", "negative", "neutral"]:

		# make sentiment folder
		sentimentTrainingPath = "%s/%s"%(trainingTweetsPath, sentiment)
		sentimentTestPath = "%s/%s"%(testTweetsPath, sentiment)
		if not os.path.exists(sentimentTrainingPath):
			os.makedirs(sentimentTrainingPath)
		if not os.path.exists(sentimentTestPath):
			os.makedirs(sentimentTestPath)

		finalQuery = "%s\'%s\'" % (query, sentiment)
		cursor.execute(finalQuery)
		tweets = cursor.fetchall()

		# get tweets to move to training set
		trainingSize = int( len(tweets) * float(2)/3)
		trainingIndices = random.sample(range(0, len(tweets)), trainingSize)

		for index in range(0, len(tweets)):
			filename = tweets[index][1] + ".txt"
			if index in trainingIndices:
				tweetFile = open("%s/%s" % (sentimentTrainingPath, filename), "w")
			else:
				tweetFile = open("%s/%s" % (sentimentTestPath, filename), "w")
			tweetFile.write(tweets[index][0].encode('utf-8'))
			tweetFile.close()

	cursor.close()
	conn.close()
def insert_busstop_route():
    stations = get_busstop_route()
    conn, cur = db.connect()
    arg_list = []
    print(len(stations))
    for index, arg in stations.iterrows():
        arg_list.append((str(arg['route_id']), str(arg['bkey'])))
        if(index%100 == 0):
            query = '''insert into datamart_star.route_busstop(route_id, bkey) 
                        values (%s, %s)'''
            cur.executemany(query, arg_list)
            print(f'inserted: {index}')
            arg_list = []
            conn.commit()

    query = '''insert into datamart_star.route_busstop(route_id, bkey)  
                        values (%s, %s)'''
    
    cur.executemany(query, arg_list)
        
    conn.commit()
    db.close(conn, cur)
    return 1
def updateQuery(searchQuery, minTweetID_=None):
	searchPointers = {}
	try:
		pointersFile = open(searchPointersFileName, "r")
		searchPointers = pickle.load(pointersFile)	# reload searchPointers to make sure it is up-to-date
		pointersFile.close()						# maybe turn searchPointer into a global variable? For easier saving on error also create separate f'n for updating
	except Exception as e:
		#searchPointers file doesn't exist yet
		pointerFileExists = False
		searchPointers = {}
	if searchQuery not in searchPointers.keys():
		searchPointers[searchQuery] = {"latestTweet":None, "oldestTweet":None, "finishedLastRun":True}
	if "finishedLastRun" not in searchPointers[searchQuery].keys():
		searchPointers[searchQuery]["finishedLastRun"] = True
	tweets = []
	users = []
	conn = db.connect()
	cursor = conn.cursor()
	querySize = 100
	maxTweetID = None	# Not actually the oldest tweet! oldest tweet in current update cycle
	# latestTweetID = db.getLatestTweet(conn)	# Will run update until this tweet is encountered
	minTweetID = None
	if minTweetID_:
		minTweetID = minTweetID_
	else:
		minTweetID = searchPointers[searchQuery]["latestTweet"]
	firstRun = True
	if not searchPointers[searchQuery]["finishedLastRun"]:
		minTweetID = None
		maxTweetID = searchPointers[searchQuery]["oldestTweet"]
		#Last Run wasn't finished
		#Make oldestTweet = max_id
	while True:
		tweets = []
		try:
			# print("maxTweetID: %s" % maxTweetID)
			# maxTweetID is the OLDEST TWEET returned by search query NOT OF WHOLE HISTORY OF QUERYING. It becomes the new MAX for the next query hence the name.
			# minTweetID is the floor. When using searchPointer file, latestTweet becomes minTweetID
			searchResults = twitter.search(q=searchQuery, max_id=maxTweetID, since_id=minTweetID, count=querySize, result_type="recent")
			tweetCount = len(searchResults['statuses'])
			if tweetCount > 1:
				if firstRun:
					if searchPointers[searchQuery]["finishedLastRun"]:
						searchPointers[searchQuery]["latestTweet"] = searchResults['statuses'][0]['id_str']
					firstRun = False
				for tweet in searchResults['statuses']:
					tweets.append(tweet)
				addAllToDB(conn, tweets, False)
				print("Added %d tweets to Database by backward search. Query: %s" % (tweetCount, searchQuery))
				maxTweetID = searchResults['statuses'][-1]['id_str']
				searchPointers[searchQuery]["oldestTweet"] = maxTweetID
				print("Oldest tweet ID: %s" % (maxTweetID))
				searchPointers[searchQuery]["finishedLastRun"] = False
				pointersFile = open(searchPointersFileName, "w+")
				pickle.dump(searchPointers, pointersFile)
				pointersFile.close()
				# print("Current max Tweet: %s" % maxTweetID)

			else:
				searchPointers[searchQuery]['finishedLastRun'] = True
				pointersFile = open(searchPointersFileName, "w+")
				pickle.dump(searchPointers, pointersFile)
				pointersFile.close()
				#end of search results OR search error (e.g. rate limit)
				print("Search Pointer file saved.")
				#ONLY SAVES AT SEARCH RESULT == 0 TO ASSURE NO TWEET IN THE MIDDLE IS LEFT OUT

				print("Backward search result returned zero")
				## then start again from top
				break
		except TwythonRateLimitError as e:
			print("[ERROR] TwythonRateLimitError on Search Query")
			print(e)
			retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now()
			retryIn = float(retryIn.total_seconds())
			print("Time to sleep: %d" % math.ceil(retryIn))
			#raw_input("Press enter to continue to sleep")
			print("Time at start of sleep:")
			print datetime.datetime.now()
			print("End of sleep:")
			print(datetime.datetime.fromtimestamp(float(e.retry_after)))
			retryIn = datetime.datetime.fromtimestamp(float(e.retry_after)) - datetime.datetime.now()
			retryIn = float(retryIn.total_seconds())
			# # Pointer File Saving
			# pointersFile = open(searchPointersFileName, "w+")
			# pickle.dump(searchPointers, pointersFile)
			# pointersFile.close()
			# print("Search Pointer file saved. Safe to Exit.")
			if retryIn > 0:
				time.sleep(math.ceil(retryIn))
			pass
		except Exception as e:
			print e
			print "Restarting loop, ignoring error"
		# except Exception as e:

		# 	print("ERROR: Error while populating database.") # File saved as %s\n" % filename)
		# 	print(e)
	cursor.close()
	conn.commit()
	conn.close()
	pointersFile = open(searchPointersFileName, "w+")
	pickle.dump(searchPointers, pointersFile)
	pointersFile.close()
	print("Search Pointer file saved. Safe to Exit.")
	print("END OF EXECUTION")