def makeBulkCHL(itnum, nrt): if itnum < 1998001: print('Error: CHL data is only availabe after 1998.') return if nrt: path = cfgv.nrt_chl_raw + cfgv.nrt_chl_prefix + '%7.7d.nc' % itnum else: path = cfgv.rep_chl_raw + cfgv.rep_chl_prefix + '%7.7d.nc' % itnum prefix = 'chl_oi_' df = nc.ncToDF(path) df = ip.removeColumn(['CHL_error'], df) #df = ip.removeMissings(['CHL'], df) # remove land df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) ## seems like the code below is only needed for the NRT product ## longitude range in NRT products: 0 - 360 if nrt: ip.mapTo180180(export_path, 'lon') # only use if necessary ###df = downsampleCHL(export_path) ip.sortByLatLon(df, export_path, 'lon', 'lat') return export_path
def makeBulkSST(itnum, nrt): if itnum < 1981244: print('Error: SST data is only availabe after 1981224.') return if nrt: path = cfgv.nrt_sst_raw + cfgv.nrt_sst_prefix + '%7.7d.nc' % itnum else: path = 'unknown' #path = cfgv.rep_sst_raw + cfgv.rep_sst_prefix + '%7.7d.nc' % itnum prefix = 'sst_' df = nc.ncToDF(path) df = ip.removeColumn( ['analysis_error', 'mask', 'sea_ice_fraction', 'lat_bnds', 'lon_bnds'], df) if 'time_bnds' in df.columns: df = ip.removeColumn(['time_bnds'], df) #df = ip.removeMissings(['analysed_sst'], df) # remove land df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) #ip.mapTo180180(export_path, 'lon') # only use if necessary ip.sortByLatLon(df, export_path, 'lon', 'lat') ## 1. only keep records with nv=1 (remove nv=0 records) ## 2. drop nv column ## 3. unit conversion; kelvin to centigrade ## 4. fix time stamp: one or some of the sst files have incorrecty timestamp. df = pd.read_csv(export_path) df = df[df['nv'] == 1] df = ip.removeColumn(['nv'], df) df['analysed_sst'] = df['analysed_sst'] - 273.15 if itnum in [ 2016097 ]: # this is to account for a little bug in the time variable in the SST netcdf file df['time'] = pd.DatetimeIndex(df['time']) + pd.DateOffset(1) df.to_csv(export_path, index=False) #################################################### return export_path
def makeGlobal_PicoPhytoPlankton(rawFilePath, rawFileName, tableName): path = rawFilePath + rawFileName prefix = tableName exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df = pd.read_excel(path, sep=',', sheet_name='data', usecols=usecols) df['year'] = df['year'].astype('str') df['month'] = ((df['month'].astype('str')).apply(lambda x: x.zfill(2))) df['day'] = ((df['day'].astype('str')).apply(lambda x: x.zfill(2))) print(len(df)) df = df[(df['day'] != '-9') & (df['day'] != '-1')] df['year'] = df['year'].replace('10', '2010') df['year'] = df['year'].replace('11', '2011') df['year'] = df['year'].replace('6', '2006') # df = df[(df['year'] != '10') & (df['year'] != '11')& (df['year'] != '6')] df['time'] = pd.to_datetime(df[['year', 'month', 'day']], format='%Y%m%d') ip.renameCol(df, 'Lat', 'lat') ip.renameCol(df, 'Long', 'lon') ip.renameCol(df, 'Depth', 'depth') ip.renameCol(df, 'PromL', 'prochlorococcus_abundance') ip.renameCol(df, 'SynmL', 'synechococcus_abundance') ip.renameCol(df, 'PEukmL', 'picoeukaryote_abundance') ip.renameCol(df, 'pico_abund', 'picophytoplankton_abundance') ip.renameCol(df, 'picophyto [ug C/L]', 'picophytoplankton_biomass') ip.removeColumn(['year', 'day', 'month'], df) df = ip.reorderCol(df, [ 'time', 'lat', 'lon', 'depth', 'prochlorococcus_abundance', 'synechococcus_abundance', 'picoeukaryote_abundance', 'picophytoplankton_abundance', 'picophytoplankton_biomass' ]) df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df) df = ip.NaNtoNone(df) df = ip.colDatatypes(df) df = ip.addIDcol(df) df = ip.removeDuplicates(df) df.to_csv(export_path, index=False) ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth') print('export path: ', export_path) return export_path
def makeWOA_climatology(rawFilePath, tableName): for month in month_list: # ie 1 = jan print('Month: ' + month) df = merge_WOA_variables(month) df = ip.removeColumn(['I_gp', 'C_gp'], df) df = ip.removeMissings(['lat', 'lon', 'depth'], df) df = ip.NaNtoNone(df) df = ip.addIDcol(df) df.sort_values(['lat', 'lon', 'depth'], ascending=[True, True, True], inplace=True) df.to_csv(exportBase + tableName + '_' + month + '.csv', index=False) print('export path: ' , exportBase + tableName + '_' + month + '.csv') iF.toSQLbcp(exportBase + tableName + '_' + month + '.csv', tableName)
def makeBulkWind(itnum, nrt, hour): if itnum < 2012320: print('Error: Wind data is only availabe after 2012320.') return if nrt: path = cfgv.nrt_wind_raw + cfgv.nrt_wind_prefix + '%7.7d_%2.2dh.nc' % ( itnum, hour) else: path = 'unknown' #path = cfgv.rep_wind_raw + cfgv.rep_wind_prefix + '%7.7d_%2.2dh.nc' % (itnum, hour) prefix = 'wind_' df = nc.ncToDF(path) df = ip.removeColumn(['land_ice_mask', 'sampling_length'], df) #df = ip.removeMissings(['wind_stress'], df) # remove land ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order df = ip.arrangeColumns([ 'wind_speed_rms', 'eastward_wind_rms', 'wind_stress', 'eastward_wind', 'surface_downward_eastward_stress', 'wind_speed', 'surface_downward_northward_stress', 'northward_wind', 'northward_wind_rms' ], df) df['hour'] = hour df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d_%2.2dh.csv' % (exportBase, prefix, itnum, hour) df.to_csv(export_path) #ip.mapTo180180(export_path, 'longitude') # only use if necessary ip.sortByLatLon(df, export_path, 'longitude', 'latitude') ########### drop depth column ############### df = pd.read_csv(export_path) df = ip.removeColumn(['depth'], df) df.to_csv(export_path, index=False) ############################################## return export_path
def makeBulkSeaFlow(): path = cfgv.seaflow_raw + 'SeaFlow_colocal.csv' prefix = 'seaflow' df = pd.read_csv(path) df = ip.removeColumn(['file'], df) df = ip.removeMissings(['lat', 'lon', 'abundance'], df) # remove rows with missing lat/lon/abundance df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s.csv' % (exportBase, prefix) df.to_csv(export_path, index=False) #ip.mapTo180180(export_path, 'lon') # only use if necessary ip.sortByDepthLatLon(df, export_path, 'lon', 'lat', 'cruise') return export_path
def makeBulkALT(itnum, nrt): if itnum < 1993001: print('Error: Altimetry data is only available after 1993.') return if nrt: path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum else: path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum prefix = 'alt_' df = nc.ncToDF(path) df = ip.removeColumn(['err'], df) #df = ip.removeMissings(['sla'], df) # remove land df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) ip.mapTo180180(export_path, 'longitude') # only use if necessary ip.sortByLatLon(df, export_path, 'longitude', 'latitude') return export_path
def makeBulkALT(itnum, nrt): if itnum < 1993001: print('Error: Altimetry data is only available after 1993.') return if nrt: path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum else: path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum prefix = 'alt_' df = nc.ncToDF(path) df = ip.removeColumn(['err'], df) #df = ip.removeMissings(['sla'], df) # remove land ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order df = ip.arrangeColumns(['vgosa', 'vgos', 'sla', 'adt', 'ugosa', 'ugos'], df) # df['ID'] = None exportBase = cfgv.opedia_proj + 'db/dbInsert/export/' export_path = '%s%s%d.csv' % (exportBase, prefix, itnum) df.to_csv(export_path) ip.mapTo180180(export_path, 'longitude') # only use if necessary ip.sortByLatLon(df, export_path, 'longitude', 'latitude') return export_path