def addDataToTable(self): """ This function get station information from s3 as a text file and then create a table for station in lower 48 states of the US """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() client = boto3.client('s3') connectAWS = AwsLogins() bucket = connectAWS.s3BucketClimate fileName = "ghcnd-stations.txt" lines = client.get_object(Bucket=bucket, Key=fileName)['Body'].read().decode('utf-8') lines = lines.splitlines() for line in lines: if line[0:2] == 'US': stationName = line[42:72] stationMetaData = [ x for x in line[0:41].strip().split(' ') if x != '' ] if stationMetaData[-1] != 'AK' and stationMetaData[ -1] != 'HI' and float(stationMetaData[-2]) > 0: station = ''' INSERT INTO stations (station_name, latitude, longitude, elevation, state, name) VALUES(%s,%s,%s,%s,%s,%s); ''' record_to_insert = (stationMetaData[0], float(stationMetaData[1]), float(stationMetaData[2]),\ float(stationMetaData[3]), stationMetaData[4], stationName) cursor.execute(station, record_to_insert) newConection.commit() cursor.close() newConection.close() print("Done!")
def addDataToTable(self): """ This function read csv file of elevation and then insert its data to table in database """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() client = boto3.client('s3') connectAWS = AwsLogins() bucket = connectAWS.s3Bucket fileName = "states_elevation_csv/lower48elevation.csv" lines = client.get_object( Bucket=bucket, Key=fileName)['Body'].read().decode('utf-8').split() for row in lines: row = row.split(',') state_elevation_row = ''' INSERT INTO states_elevation(state, elevation) VALUES(%s,%s) ''' record_to_insert = (row[0], float(row[1])) cursor.execute(state_elevation_row, record_to_insert) newConection.commit() cursor.close() newConection.close() print("Done!")
def makeDataframeFromQuery(self, query): """ This function convert database query to pandas data frame """ newConection = dataBaseConnect().connectToDataBase() dataframe = pd.read_sql(query, newConection) newConection.close() return dataframe
def checkIfTableExist(self, tableName): """ This check if monthly table is in data base or not. If table exists, it drop it to make sure now data a fresh version of data is saved in database """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() ifExist = "DROP TABLE IF EXISTS " + tableName cursor.execute(ifExist) newConection.commit() cursor.close() newConection.close()
def updateDataBaseAnnually(self, tableName, columnName, slope, intercept): """ This function get rgression input and then update table of the given climate data """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() query = "update " + tableName + " " + \ "set " + columnName + " = elevation * %s + %s ;" cursor.execute(query, (slope, intercept)) newConection.commit() cursor.close() newConection.close()
def createTable(self): """ This function creates a table for elevation of each states """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() stationTable = ''' DROP TABLE IF EXISTS states_elevation; CREATE TABLE states_elevation ( id serial PRIMARY KEY, state VARCHAR(5) NOT NULL, elevation real NOT NULL ); ''' cursor.execute(stationTable) print("state elevation table is created") newConection.commit() cursor.close() newConection.close()
def createStationsTable(self): """ This function create a table for climate stations """ newConection = dataBaseConnect().connectToDataBase() cursor = newConection.cursor() stationTable = ''' DROP TABLE IF EXISTS stations; CREATE TABLE stations ( id serial PRIMARY KEY, station_name VARCHAR(20) UNIQUE NOT NULL, latitude real NOT NULL, longitude real NOT NULL, elevation real NOT NULL, state VARCHAR(20) NOT NULL, name VARCHAR(100) NOT NULL ); ''' cursor.execute(stationTable) print("station table created") newConection.commit() cursor.close() newConection.close()
def makeDataframeFromQuery(query): newConection = dataBaseConnect().connectToDataBase() dataframe = pd.read_sql(query, newConection) newConection.close() return dataframe