def insertAMTCruiseTraj(): server = 'Rainier' tableName = 'tblCruise_Trajectory' usecols = ['Cruise_name', 'time', 'lat', 'lon'] rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/' rawFileName = 'master_AMT.csv' path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' os.chdir(rawFilePath) df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols) for Cruise_name in df['Cruise_name'].unique(): export_path = '%s%s.csv' % (exportBase, Cruise_name) print(Cruise_name) cruise_df = df[df['Cruise_name'] == Cruise_name] #selects only df of cruise Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:]) cruise_df['Cruise_ID'] = Cruise_ID cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.NaNtoNone(cruise_df) cruise_df = ip.colDatatypes(cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.removeDuplicates(cruise_df) cruise_df = cruise_df[['Cruise_ID', 'time', 'lat', 'lon']] cruise_df.to_csv(export_path, index=False) ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon') print('export path: ', Cruise_name + export_path) iF.toSQLbcp(export_path, tableName, server)
def insertSeaFlowCruiseTraj(): server = 'Rainier' tableName = 'tblCruise_Trajectory' rawFilePath = cfgv.rep_allSeaFlowCruises_raw os.chdir(rawFilePath) sfl_cruise_list = glob.glob('*.sfl*') usecols_sfl = ['DATE', 'LAT', 'LON'] for cruise in sfl_cruise_list: prefix = cruise[:-8] + '_traj' rawFileName = cruise path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) print(cruise) Cruise_ID = iF.findID_CRUISE(cruise[:-8]) df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S') df['Cruise_ID'] = Cruise_ID df.rename(columns={ 'DATE': 'time', 'LAT': 'lat', 'LON': 'lon' }, inplace=True) df = df[['Cruise_ID', 'time', 'lat', 'lon']] df = ip.removeMissings(['time', 'lat', 'lon'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.removeDuplicates(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) # print(export_path,tableName) iF.toSQLbcp(export_path, tableName, server)
def makeWOA_climatology(rawFilePath, tableName): for month in month_list: # ie 1 = jan print('Month: ' + month) df = merge_WOA_variables(month) df = ip.removeColumn(['I_gp', 'C_gp'], df) df = ip.removeMissings(['lat', 'lon', 'depth'], df) df = ip.NaNtoNone(df) df = ip.addIDcol(df) df.sort_values(['lat', 'lon', 'depth'], ascending=[True, True, True], inplace=True) df.to_csv(exportBase + tableName + '_' + month + '.csv', index=False) print('export path: ' , exportBase + tableName + '_' + month + '.csv') iF.toSQLbcp(exportBase + tableName + '_' + month + '.csv', tableName)
def insertAMTCruises(): server = 'Rainier' cruise = 'AMT_cruises' tableName = 'tblCruise' rawFileName = 'tblCruise_AMT.csv' rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/' path = rawFilePath + rawFileName prefix = tableName + '_' + cruise exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',') df.to_csv(export_path, index=False) iF.toSQLbcp(export_path, tableName, server)
def insertSeaFlowCruiseSalinity(): server = 'Rainier' tableName = 'tblCruise_Salinity' rawFilePath = cfgv.rep_allSeaFlowCruises_raw os.chdir(rawFilePath) sfl_cruise_list = glob.glob('*.sfl*') usecols_sfl = ['DATE', 'LAT', 'LON', 'SALINITY'] for cruise in sfl_cruise_list: prefix = cruise[:-8] + '_temp' rawFileName = cruise path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) print(cruise) Cruise_ID = iF.findID_CRUISE(cruise[:-8]) df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S') df['DEPTH'] = 5.0 df['Cruise_ID'] = Cruise_ID df.rename(columns={ 'DATE': 'time', 'LAT': 'lat', 'LON': 'lon', 'DEPTH': 'depth', 'SALINITY': 'salinity' }, inplace=True) df = df[['Cruise_ID', 'time', 'lat', 'lon', 'depth', 'salinity']] df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = df[pd.to_numeric(df['salinity'], errors='coerce').notnull()] df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.removeDuplicates(df) print(df.head()) if df.empty: print(cruise + ' had no salinity values. Not inserted into database') else: df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) # print(export_path,tableName) iF.toSQLbcp(export_path, tableName, server)
def insertAMTCruiseTemperature(): server = 'Rainier' tableName = 'tblCruise_Temperature' usecols = ['Cruise_name', 'time', 'lat', 'lon', 'temp', 'temp_flag'] rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/' rawFileName = 'master_AMT.csv' path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' os.chdir(rawFilePath) df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols) for Cruise_name in df['Cruise_name'].unique(): export_path = '%s%s%s.csv' % (exportBase, Cruise_name, tableName) print(Cruise_name) cruise_df = df[df['Cruise_name'] == Cruise_name] #selects only df of cruise Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:]) cruise_df['Cruise_ID'] = Cruise_ID cruise_df = cruise_df[(cruise_df['temp_flag'] != 'N') & (cruise_df['temp_flag'] != 'S') & (cruise_df['temp_flag'] != 'M') & (cruise_df['temp_flag'] != 'L')] cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.colDatatypes(cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.removeDuplicates(cruise_df) cruise_df = ip.renameCol(cruise_df, 'temp', 'temperature') cruise_df = cruise_df[[ 'Cruise_ID', 'time', 'lat', 'lon', 'temperature' ]] cruise_df = cruise_df.dropna(subset=['temperature']) cruise_df = ip.NaNtoNone(cruise_df) if cruise_df.empty: print(Cruise_name + ' had no temperature values. Not inserted into database') else: cruise_df.to_csv(export_path, index=False) ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) iF.toSQLbcp(export_path, tableName, server)
def insertSeaFlowCruises(): server = 'Rainier' tableName = 'tblCruise' rawFileName = 'tblCruise_seaflow.csv' rawFilePath = cfgv.rep_allSeaFlowCruises_raw path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',') df['Start_Time'], df['End_Time'], df['Lat_Min'], df['Lat_Max'], df[ 'Lon_Min'], df['Lon_Max'], df['Chief_Name'], df[ 'Chief_Email'] = None, None, None, None, None, None, None, None df = df[[ 'ID', 'Name', 'Nickname', 'Ship_Name', 'Start_Time', 'End_Time', 'Lat_Min', 'Lat_Max', 'Lon_Min', 'Lon_Max', 'Chief_Name', 'Chief_Email', 'Keywords' ]] df.to_csv(export_path, index=False, header=False) print('export path: ', export_path) iF.toSQLbcp(export_path, tableName, server)
import io import numpy as np import glob import xarray as xr import os.path ############################ ########### OPTS ########### tableName = 'tblDarwin_Ecosystem' rawFilePath = '/media/nrhagen/Drobo/OpediaVault/model/darwin_Ecosystem/rep/' netcdf_list = glob.glob(rawFilePath + '*.nc') exportBase = cfgv.opedia_proj + 'db/dbInsert/export_temp/' prefix = tableName export_path = '%s%s.csv' % (exportBase, prefix) ############################ ############################ processed_csv_list = glob.glob(rawFilePath + '*.csv*') sorted_csvlist = np.sort(processed_csv_list).tolist() for sorted_csv in sorted_csvlist: if os.path.isfile(sorted_csv[:-3] + '_BCP.txt'): print(sorted_csv[:-4] + ' already inserted into db. Passing') pass else: print('Inserting ' + sorted_csv[:-4] + ' into db') iF.toSQLbcp(sorted_csv, tableName) file = open( exportBase + os.path.basename(sorted_csv)[:-3] + '_BCP.txt', "w") file.close()
############################ ########### OPTS ########### tableName = 'tblMesoscope_km1709' rawFilePath = cfgv.rep_km1709_MESOSCOPE_raw rawFileName = 'mesoscope_cmap.xlsx' ############################ ############################ def makeMesoscope_km1709(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName df = pd.read_excel(path, 'data') df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = ip.colDatatypes(df) df['time'] = pd.to_datetime(df['time'], format='%Y-%m-%d') df['ID'] = None df = ip.removeDuplicates(df) exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df.to_csv(export_path, index=False) ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth') df.to_csv(export_path, index=False) print('export path: ', export_path) return export_path export_path = makeMesoscope_km1709(rawFilePath, rawFileName, tableName) iF.toSQLbcp(export_path, tableName)
'Cloud_Optical_Thickness_Ice_Uncertainty_in_Mean', 'Cloud_Optical_Thickness_Ice_Uncertainty_in_MeanLog10', 'Cloud_Optical_Thickness_Ice_MeanLog10', 'Cloud_Optical_Thickness_Total_MeanLog10', 'Optical_Thickness_vs_Cloud_Top_Pressure', 'Cloud_Particle_Size_Liquid_Mean', 'Cloud_Particle_Size_Liquid_Uncertainty_in_Mean', 'Cloud_Particle_Size_Ice_Mean', 'Cloud_Particle_Size_Ice_Uncertainty_in_Mean', 'Cloud_Fraction_Retrieval_Liquid_Mean', 'Cloud_Retrieval_Liquid_Pixel_Counts', 'Cloud_Fraction_Retrieval_Ice_Mean', 'Cloud_Retrieval_Ice_Pixel_Counts', 'Cloud_Fraction_Retrieval_Total_Mean', 'Cloud_Retrieval_Total_Pixel_Counts', 'Liquid_Path_Mean', 'Liquid_Path_Uncertainty_in_Mean', 'Ice_Path_Mean', 'Ice_Path_Uncertainty_in_Mean', 'Cloud_Fraction_Retrieval_High_Mean', 'Cloud_Fraction_Retrieval_Mid_Mean', 'Cloud_Fraction_Retrieval_Low_Mean', 'Cloud_Fraction_Mask_High_Mean', 'Cloud_Fraction_Mask_Mid_Mean', 'Cloud_Fraction_Mask_Low_Mean' ] keepCols = ['AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean'] ############################ ############################ iF.cleanStrucHDF_modis(rawFilePath, tableName, removeCols, opt_select_cols=keepCols) iF.toSQLbcp(rawFileName, tableName, removeCols)