def insertAMTCruiseTraj(): server = 'Rainier' tableName = 'tblCruise_Trajectory' usecols = ['Cruise_name', 'time', 'lat', 'lon'] rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/' rawFileName = 'master_AMT.csv' path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' os.chdir(rawFilePath) df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols) for Cruise_name in df['Cruise_name'].unique(): export_path = '%s%s.csv' % (exportBase, Cruise_name) print(Cruise_name) cruise_df = df[df['Cruise_name'] == Cruise_name] #selects only df of cruise Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:]) cruise_df['Cruise_ID'] = Cruise_ID cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.NaNtoNone(cruise_df) cruise_df = ip.colDatatypes(cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.removeDuplicates(cruise_df) cruise_df = cruise_df[['Cruise_ID', 'time', 'lat', 'lon']] cruise_df.to_csv(export_path, index=False) ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon') print('export path: ', Cruise_name + export_path) iF.toSQLbcp(export_path, tableName, server)
def insertSeaFlowCruiseTraj(): server = 'Rainier' tableName = 'tblCruise_Trajectory' rawFilePath = cfgv.rep_allSeaFlowCruises_raw os.chdir(rawFilePath) sfl_cruise_list = glob.glob('*.sfl*') usecols_sfl = ['DATE', 'LAT', 'LON'] for cruise in sfl_cruise_list: prefix = cruise[:-8] + '_traj' rawFileName = cruise path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) print(cruise) Cruise_ID = iF.findID_CRUISE(cruise[:-8]) df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S') df['Cruise_ID'] = Cruise_ID df.rename(columns={ 'DATE': 'time', 'LAT': 'lat', 'LON': 'lon' }, inplace=True) df = df[['Cruise_ID', 'time', 'lat', 'lon']] df = ip.removeMissings(['time', 'lat', 'lon'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.removeDuplicates(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) # print(export_path,tableName) iF.toSQLbcp(export_path, tableName, server)
def makeGLODAP(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',', usecols=usecols) df['year'] = df['year'].astype('int').astype( 'str') # removing ending zero, then str df['month'] = df['month'].astype('int').astype('str') df['day'] = df['day'].astype('int').astype('str') df['hour'] = df['hour'].astype('int').astype('str') df['minute'] = df['minute'].astype('int').astype('str') df['second'] = '0' #construct datetime df['time'] = pd.to_datetime( df[['year', 'month', 'day', 'hour', 'minute', 'second']], format='%Y%m%dT%H%M%S') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'longitude', 'lon') # renaming Variables ip.renameCol(df, 'theta', 'theta_potential_temperature') ip.renameCol(df, 'sigma0', 'sigma0_potential_density') ip.renameCol(df, 'sigma1', 'sigma1_potential_density_ref_1000_dbar') ip.renameCol(df, 'sigma2', 'sigma2_potential_density_ref_2000_dbar') ip.renameCol(df, 'sigma3', 'sigma3_potential_density_ref_3000_dbar') ip.renameCol(df, 'sigma4', 'sigma4_potential_density_ref_4000_dbar') ip.renameCol(df, 'gamma', 'gamma_neutral_density') ip.renameCol(df, 'TAlk', 'TAlk_total_alkalinity') ip.renameCol(df, 'phts25p0', 'phts25p0_pH_25C_0dbar') ip.renameCol(df, 'phtsinsitutp', 'phtsinsitutp_pH_insitu') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') #import cruise data to ID file and do join expocodes = pd.read_csv(rawFilePath + rawFileName_expocodes, sep='\t', names=['cruise_ID', 'expocode']) df = pd.merge(df, expocodes, left_on='cruise', right_on='cruise_ID') df = df.drop('cruise_ID', 1) ip.renameCol(df, 'expocode', 'cruise_expocode') df = ip.arrangeColumns(usecols_rearange, df) df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.addIDcol(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth') print('export path: ', export_path) return export_path, df
def insertAMTCruiseTemperature(): server = 'Rainier' tableName = 'tblCruise_Temperature' usecols = ['Cruise_name', 'time', 'lat', 'lon', 'temp', 'temp_flag'] rawFilePath = cfgv.rep_AMT_cruises_raw + 'amt/' rawFileName = 'master_AMT.csv' path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' os.chdir(rawFilePath) df = pd.read_csv(rawFilePath + rawFileName, sep=',', usecols=usecols) for Cruise_name in df['Cruise_name'].unique(): export_path = '%s%s%s.csv' % (exportBase, Cruise_name, tableName) print(Cruise_name) cruise_df = df[df['Cruise_name'] == Cruise_name] #selects only df of cruise Cruise_ID = iF.findID_CRUISE(Cruise_name[0:3] + Cruise_name[-2:]) cruise_df['Cruise_ID'] = Cruise_ID cruise_df = cruise_df[(cruise_df['temp_flag'] != 'N') & (cruise_df['temp_flag'] != 'S') & (cruise_df['temp_flag'] != 'M') & (cruise_df['temp_flag'] != 'L')] cruise_df = ip.removeMissings(['time', 'lat', 'lon'], cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.colDatatypes(cruise_df) cruise_df = ip.convertYYYYMMDD(cruise_df) cruise_df = ip.removeDuplicates(cruise_df) cruise_df = ip.renameCol(cruise_df, 'temp', 'temperature') cruise_df = cruise_df[[ 'Cruise_ID', 'time', 'lat', 'lon', 'temperature' ]] cruise_df = cruise_df.dropna(subset=['temperature']) cruise_df = ip.NaNtoNone(cruise_df) if cruise_df.empty: print(Cruise_name + ' had no temperature values. Not inserted into database') else: cruise_df.to_csv(export_path, index=False) ip.sortByTimeLatLon(cruise_df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) iF.toSQLbcp(export_path, tableName, server)
def makeSeaFlow(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',') df = df[df['flag'] == 0] df['prochloro_abundance'] = np.where(((df['pop'] == 'prochloro') & (df['quantile'] == 50)), df['abundance'], np.nan) df['prochloro_diameter'] = np.where(df['pop'] == 'prochloro', df['diam_mid'], np.nan) df['prochloro_carbon_content'] = np.where(df['pop'] == 'prochloro', df['Qc_mid'], np.nan) df['prochloro_biomass'] = df['prochloro_abundance'].astype(float) * df['prochloro_carbon_content'].astype(float) df['synecho_abundance'] = np.where(((df['pop'] == 'synecho') & (df['quantile'] == 50)), df['abundance'], np.nan) df['synecho_diameter'] = np.where(df['pop'] == 'synecho', df['diam_mid'], np.nan) df['synecho_carbon_content'] = np.where(df['pop'] == 'synecho', df['Qc_mid'], np.nan) df['synecho_biomass'] = df['synecho_abundance'] * df['synecho_carbon_content'] df['croco_abundance'] = np.where(((df['pop'] == 'croco') & (df['quantile'] == 50)), df['abundance'], np.nan) df['croco_diameter'] = np.where(df['pop'] == 'croco', df['diam_mid'], np.nan) df['croco_carbon_content'] = np.where(df['pop'] == 'croco', df['Qc_mid'], np.nan) df['croco_biomass'] = df['croco_abundance'] * df['croco_carbon_content'] df['picoeuk_abundance'] = np.where(((df['pop'] == 'picoeuk') & (df['quantile'] == 50)), df['abundance'], np.nan) df['picoeuk_diameter'] = np.where(df['pop'] == 'picoeuk', df['diam_mid'], np.nan) df['picoeuk_carbon_content'] = np.where(df['pop'] == 'picoeuk', df['Qc_mid'], np.nan) df['picoeuk_biomass'] = df['picoeuk_abundance'] * df['picoeuk_carbon_content'] df['unknown_abundance'] = np.where(((df['pop'] == 'unknown') & (df['quantile'] == 50)), df['abundance'], np.nan) df['unknown_diameter'] = np.where(df['pop'] == 'unknown', df['diam_mid'], np.nan) df['unknown_carbon_content'] = np.where(df['pop'] == 'unknown', df['Qc_mid'], np.nan) df['unknown_biomass'] = df['unknown_abundance'] * df['unknown_carbon_content'] df['total_abundance'] = np.where(((df['pop'] != 'beads') & (df['quantile'] == 50)), df['abundance'], np.nan) df['total_carbon_content'] = np.where(df['pop'] != 'beads', df['Qc_mid'], np.nan) df['total_biomass'] = df['total_abundance'] * df['total_carbon_content'] df = ip.arrangeColumns(['time', 'lat', 'lon', 'depth', 'prochloro_abundance', 'prochloro_diameter', 'prochloro_carbon_content', 'prochloro_biomass','synecho_abundance', 'synecho_diameter', 'synecho_carbon_content', 'synecho_biomass','croco_abundance', 'croco_diameter', 'croco_carbon_content', 'croco_biomass','picoeuk_abundance', 'picoeuk_diameter', 'picoeuk_carbon_content', 'picoeuk_biomass','unknown_abundance', 'unknown_diameter', 'unknown_carbon_content', 'unknown_biomass','total_biomass', 'par'], df) df = ip.removeMissings(['time','lat', 'lon'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.addIDcol(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ' ,export_path) return export_path
def makeHOT224(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_excel(path, sep=',',sheet_name='data') df = ip.removeMissings(['time','lat', 'lon','depth'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.addIDcol(df) df = ip.removeDuplicates(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth') print('export path: ' ,export_path) return export_path
def insertSeaFlowCruiseSalinity(): server = 'Rainier' tableName = 'tblCruise_Salinity' rawFilePath = cfgv.rep_allSeaFlowCruises_raw os.chdir(rawFilePath) sfl_cruise_list = glob.glob('*.sfl*') usecols_sfl = ['DATE', 'LAT', 'LON', 'SALINITY'] for cruise in sfl_cruise_list: prefix = cruise[:-8] + '_temp' rawFileName = cruise path = rawFilePath + rawFileName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) print(cruise) Cruise_ID = iF.findID_CRUISE(cruise[:-8]) df = pd.read_csv(cruise, sep='\t', usecols=usecols_sfl) df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%dT%H:%M:%S') df['DEPTH'] = 5.0 df['Cruise_ID'] = Cruise_ID df.rename(columns={ 'DATE': 'time', 'LAT': 'lat', 'LON': 'lon', 'DEPTH': 'depth', 'SALINITY': 'salinity' }, inplace=True) df = df[['Cruise_ID', 'time', 'lat', 'lon', 'depth', 'salinity']] df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = df[pd.to_numeric(df['salinity'], errors='coerce').notnull()] df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.removeDuplicates(df) print(df.head()) if df.empty: print(cruise + ' had no salinity values. Not inserted into database') else: df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ', export_path) # print(export_path,tableName) iF.toSQLbcp(export_path, tableName, server)