def makeGLODAP(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',', usecols=usecols) df['year'] = df['year'].astype('int').astype( 'str') # removing ending zero, then str df['month'] = df['month'].astype('int').astype('str') df['day'] = df['day'].astype('int').astype('str') df['hour'] = df['hour'].astype('int').astype('str') df['minute'] = df['minute'].astype('int').astype('str') df['second'] = '0' #construct datetime df['time'] = pd.to_datetime( df[['year', 'month', 'day', 'hour', 'minute', 'second']], format='%Y%m%dT%H%M%S') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'longitude', 'lon') # renaming Variables ip.renameCol(df, 'theta', 'theta_potential_temperature') ip.renameCol(df, 'sigma0', 'sigma0_potential_density') ip.renameCol(df, 'sigma1', 'sigma1_potential_density_ref_1000_dbar') ip.renameCol(df, 'sigma2', 'sigma2_potential_density_ref_2000_dbar') ip.renameCol(df, 'sigma3', 'sigma3_potential_density_ref_3000_dbar') ip.renameCol(df, 'sigma4', 'sigma4_potential_density_ref_4000_dbar') ip.renameCol(df, 'gamma', 'gamma_neutral_density') ip.renameCol(df, 'TAlk', 'TAlk_total_alkalinity') ip.renameCol(df, 'phts25p0', 'phts25p0_pH_25C_0dbar') ip.renameCol(df, 'phtsinsitutp', 'phtsinsitutp_pH_insitu') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') ip.renameCol(df, 'latitude', 'lat') #import cruise data to ID file and do join expocodes = pd.read_csv(rawFilePath + rawFileName_expocodes, sep='\t', names=['cruise_ID', 'expocode']) df = pd.merge(df, expocodes, left_on='cruise', right_on='cruise_ID') df = df.drop('cruise_ID', 1) ip.renameCol(df, 'expocode', 'cruise_expocode') df = ip.arrangeColumns(usecols_rearange, df) df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.addIDcol(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth') print('export path: ', export_path) return export_path, df
def makeSeaFlow(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_csv(path, sep=',') df = df[df['flag'] == 0] df['prochloro_abundance'] = np.where(((df['pop'] == 'prochloro') & (df['quantile'] == 50)), df['abundance'], np.nan) df['prochloro_diameter'] = np.where(df['pop'] == 'prochloro', df['diam_mid'], np.nan) df['prochloro_carbon_content'] = np.where(df['pop'] == 'prochloro', df['Qc_mid'], np.nan) df['prochloro_biomass'] = df['prochloro_abundance'].astype(float) * df['prochloro_carbon_content'].astype(float) df['synecho_abundance'] = np.where(((df['pop'] == 'synecho') & (df['quantile'] == 50)), df['abundance'], np.nan) df['synecho_diameter'] = np.where(df['pop'] == 'synecho', df['diam_mid'], np.nan) df['synecho_carbon_content'] = np.where(df['pop'] == 'synecho', df['Qc_mid'], np.nan) df['synecho_biomass'] = df['synecho_abundance'] * df['synecho_carbon_content'] df['croco_abundance'] = np.where(((df['pop'] == 'croco') & (df['quantile'] == 50)), df['abundance'], np.nan) df['croco_diameter'] = np.where(df['pop'] == 'croco', df['diam_mid'], np.nan) df['croco_carbon_content'] = np.where(df['pop'] == 'croco', df['Qc_mid'], np.nan) df['croco_biomass'] = df['croco_abundance'] * df['croco_carbon_content'] df['picoeuk_abundance'] = np.where(((df['pop'] == 'picoeuk') & (df['quantile'] == 50)), df['abundance'], np.nan) df['picoeuk_diameter'] = np.where(df['pop'] == 'picoeuk', df['diam_mid'], np.nan) df['picoeuk_carbon_content'] = np.where(df['pop'] == 'picoeuk', df['Qc_mid'], np.nan) df['picoeuk_biomass'] = df['picoeuk_abundance'] * df['picoeuk_carbon_content'] df['unknown_abundance'] = np.where(((df['pop'] == 'unknown') & (df['quantile'] == 50)), df['abundance'], np.nan) df['unknown_diameter'] = np.where(df['pop'] == 'unknown', df['diam_mid'], np.nan) df['unknown_carbon_content'] = np.where(df['pop'] == 'unknown', df['Qc_mid'], np.nan) df['unknown_biomass'] = df['unknown_abundance'] * df['unknown_carbon_content'] df['total_abundance'] = np.where(((df['pop'] != 'beads') & (df['quantile'] == 50)), df['abundance'], np.nan) df['total_carbon_content'] = np.where(df['pop'] != 'beads', df['Qc_mid'], np.nan) df['total_biomass'] = df['total_abundance'] * df['total_carbon_content'] df = ip.arrangeColumns(['time', 'lat', 'lon', 'depth', 'prochloro_abundance', 'prochloro_diameter', 'prochloro_carbon_content', 'prochloro_biomass','synecho_abundance', 'synecho_diameter', 'synecho_carbon_content', 'synecho_biomass','croco_abundance', 'croco_diameter', 'croco_carbon_content', 'croco_biomass','picoeuk_abundance', 'picoeuk_diameter', 'picoeuk_carbon_content', 'picoeuk_biomass','unknown_abundance', 'unknown_diameter', 'unknown_carbon_content', 'unknown_biomass','total_biomass', 'par'], df) df = ip.removeMissings(['time','lat', 'lon'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.convertYYYYMMDD(df) df = ip.addIDcol(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon') print('export path: ' ,export_path) return export_path
def makeBulkWind(itnum, nrt, hour): if itnum < 2012320: print('Error: Wind data is only availabe after 2012320.') return if nrt: path = cfgv.nrt_wind_raw + cfgv.nrt_wind_prefix + '%7.7d_%2.2dh.nc' % ( itnum, hour) else: path = 'unknown' #path = cfgv.rep_wind_raw + cfgv.rep_wind_prefix + '%7.7d_%2.2dh.nc' % (itnum, hour) prefix = 'wind_' df = nc.ncToDF(path) df = ip.removeColumn(['land_ice_mask', 'sampling_length'], df) #df = ip.removeMissings(['wind_stress'], df) # remove land ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order df = ip.arrangeColumns([ 'wind_speed_rms', 'eastward_wind_rms', 'wind_stress', 'eastward_wind', 'surface_downward_eastward_stress', 'wind_speed', 'surface_downward_northward_stress', 'northward_wind', 'northward_wind_rms' ], df) df['hour'] = hour df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d_%2.2dh.csv' % (exportBase, prefix, itnum, hour) df.to_csv(export_path) #ip.mapTo180180(export_path, 'longitude') # only use if necessary ip.sortByLatLon(df, export_path, 'longitude', 'latitude') ########### drop depth column ############### df = pd.read_csv(export_path) df = ip.removeColumn(['depth'], df) df.to_csv(export_path, index=False) ############################################## return export_path
def makeBulkALT(itnum, nrt): if itnum < 1993001: print('Error: Altimetry data is only available after 1993.') return if nrt: path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum else: path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum prefix = 'alt_' df = nc.ncToDF(path) df = ip.removeColumn(['err'], df) #df = ip.removeMissings(['sla'], df) # remove land ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order df = ip.arrangeColumns(['vgosa', 'vgos', 'sla', 'adt', 'ugosa', 'ugos'], df) # df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) ip.mapTo180180(export_path, 'longitude') # only use if necessary ip.sortByLatLon(df, export_path, 'longitude', 'latitude') return export_path
def makeBulkPisces(itnum, nrt): if itnum < 2011365: print('Error: Mercator-Pisces data is only availabe after 2012.') return if nrt: path = cfgv.nrt_mercator_pisces_raw + cfgv.nrt_mercator_pisces_prefix + '%7.7d.nc' % itnum else: path = 'unknown' #path = cfgv.rep_mercator_pisces_raw + cfgv.rep_mercator_pisces_prefix + '%7.7d.nc' % itnum prefix = 'pisces_' df = nc.ncToDF(path) ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order df = ip.arrangeColumns( ['Fe', 'PP', 'Si', 'NO3', 'CHL', 'PHYC', 'PO4', 'O2'], df) df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) #ip.mapTo180180(export_path, 'longitude') # only use if necessary ## sortByDepthLatLon_AddClim will sort the dataframe and will add "month" and "year" coloumns to the dataframe ip.sortByDepthLatLon_AddClim(df, export_path, 'longitude', 'latitude', 'depth') return export_path
rawFilePath = '/media/nrhagen/Drobo/OpediaVault/model/darwin_3day/' netcdf_list = glob.glob(rawFilePath + '*.nc') exportBase = cfgv.opedia_proj + 'db/dbInsert/export_temp/' prefix = tableName export_path = '%s%s.csv' % (exportBase, prefix) ############################ ############################ path = sys.argv[1] if os.path.isfile(exportBase + os.path.basename(path)[:-3] + '_DONE.txt'): #checks .txt 'catalog' file exists before reprocessing sys.exit(0) else: xdf = xr.open_dataset(path) df = xdf.to_dataframe() df.reset_index(inplace=True) # converts netcdf dims to cols df = ip.renameCol(df, 'lat_c', 'lat') df = ip.renameCol(df, 'lon_c', 'lon') df = ip.renameCol(df, 'dep_c', 'depth') df = ip.convertcolDatatype(df,['FeT', 'PO4', 'DIN', 'SiO2', 'O2']) # df = ip.removeMissings(['time','lat', 'lon', 'depth'], df) df = ip.arrangeColumns(['time','lat', 'lon','depth', 'FeT', 'PO4', 'DIN', 'SiO2', 'O2'], df) df = ip.NaNtoNone(df) df = ip.addIDcol(df) df = ip.colDatatypes(df) df.sort_values(['time', 'lat', 'lon', 'depth'], ascending=[True, True, True, True], inplace=True) df.to_csv(exportBase + os.path.basename(path)[:-3] + '.csv', mode='a', chunksize=1000000, index=False) # writes .txt file to catalog which files processed file = open(exportBase + os.path.basename(path)[:-3] + '_DONE.txt', "w") file.close()