Python removeColumn Examples, insertPrep.removeColumn Python Examples

Example #1

0

Show file

File: insertCHL_OI.py Project: mdashkezari/opedia

def makeBulkCHL(itnum, nrt):
    if itnum < 1998001:
        print('Error: CHL data is only availabe after 1998.')
        return
    if nrt:
        path = cfgv.nrt_chl_raw + cfgv.nrt_chl_prefix + '%7.7d.nc' % itnum
    else:
        path = cfgv.rep_chl_raw + cfgv.rep_chl_prefix + '%7.7d.nc' % itnum

    prefix = 'chl_oi_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['CHL_error'], df)
    #df = ip.removeMissings(['CHL'], df)   # remove land
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)

    ## seems like the code below is only needed for the NRT product
    ## longitude range in NRT products: 0 - 360
    if nrt:
        ip.mapTo180180(export_path, 'lon')  # only use if necessary

    ###df = downsampleCHL(export_path)
    ip.sortByLatLon(df, export_path, 'lon', 'lat')
    return export_path

Example #2

0

Show file

File: insertSST.py Project: norlandrhagen/opedia

def makeBulkSST(itnum, nrt):
    if itnum < 1981244:
        print('Error: SST data is only availabe after 1981224.')
        return
    if nrt:
        path = cfgv.nrt_sst_raw + cfgv.nrt_sst_prefix + '%7.7d.nc' % itnum
    else:
        path = 'unknown'
        #path = cfgv.rep_sst_raw + cfgv.rep_sst_prefix + '%7.7d.nc' % itnum

    prefix = 'sst_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(
        ['analysis_error', 'mask', 'sea_ice_fraction', 'lat_bnds', 'lon_bnds'],
        df)
    if 'time_bnds' in df.columns:
        df = ip.removeColumn(['time_bnds'], df)
    #df = ip.removeMissings(['analysed_sst'], df)   # remove land
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)
    #ip.mapTo180180(export_path, 'lon')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'lon', 'lat')

    ## 1. only keep records with nv=1 (remove nv=0 records)
    ## 2. drop nv column
    ## 3. unit conversion; kelvin to centigrade
    ## 4. fix time stamp: one or some of the sst files have incorrecty timestamp.
    df = pd.read_csv(export_path)
    df = df[df['nv'] == 1]
    df = ip.removeColumn(['nv'], df)
    df['analysed_sst'] = df['analysed_sst'] - 273.15
    if itnum in [
            2016097
    ]:  # this is to account for a little bug in the time variable in the SST netcdf file
        df['time'] = pd.DatetimeIndex(df['time']) + pd.DateOffset(1)
    df.to_csv(export_path, index=False)
    ####################################################
    return export_path

Example #3

0

Show file

def makeGlobal_PicoPhytoPlankton(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data', usecols=usecols)
    df['year'] = df['year'].astype('str')
    df['month'] = ((df['month'].astype('str')).apply(lambda x: x.zfill(2)))
    df['day'] = ((df['day'].astype('str')).apply(lambda x: x.zfill(2)))
    print(len(df))
    df = df[(df['day'] != '-9') & (df['day'] != '-1')]

    df['year'] = df['year'].replace('10', '2010')
    df['year'] = df['year'].replace('11', '2011')
    df['year'] = df['year'].replace('6', '2006')
    # df = df[(df['year'] != '10') & (df['year'] != '11')& (df['year'] != '6')]
    df['time'] = pd.to_datetime(df[['year', 'month', 'day']], format='%Y%m%d')
    ip.renameCol(df, 'Lat', 'lat')
    ip.renameCol(df, 'Long', 'lon')
    ip.renameCol(df, 'Depth', 'depth')
    ip.renameCol(df, 'PromL', 'prochlorococcus_abundance')
    ip.renameCol(df, 'SynmL', 'synechococcus_abundance')
    ip.renameCol(df, 'PEukmL', 'picoeukaryote_abundance')
    ip.renameCol(df, 'pico_abund', 'picophytoplankton_abundance')
    ip.renameCol(df, 'picophyto [ug C/L]', 'picophytoplankton_biomass')
    ip.removeColumn(['year', 'day', 'month'], df)
    df = ip.reorderCol(df, [
        'time', 'lat', 'lon', 'depth', 'prochlorococcus_abundance',
        'synechococcus_abundance', 'picoeukaryote_abundance',
        'picophytoplankton_abundance', 'picophytoplankton_biomass'
    ])
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path

Example #4

0

Show file

def makeWOA_climatology(rawFilePath, tableName):
    for month in month_list: # ie 1 = jan
        print('Month: ' + month)
        df = merge_WOA_variables(month)
        df = ip.removeColumn(['I_gp', 'C_gp'], df)
        df = ip.removeMissings(['lat', 'lon', 'depth'], df)
        df = ip.NaNtoNone(df)
        df = ip.addIDcol(df)
        df.sort_values(['lat', 'lon', 'depth'], ascending=[True, True, True], inplace=True)

        df.to_csv(exportBase + tableName + '_' +  month + '.csv', index=False)
        print('export path: ' , exportBase + tableName + '_' +  month + '.csv')
        iF.toSQLbcp(exportBase + tableName + '_' +  month + '.csv', tableName)

Example #5

0

Show file

def makeBulkWind(itnum, nrt, hour):
    if itnum < 2012320:
        print('Error: Wind data is only availabe after 2012320.')
        return
    if nrt:
        path = cfgv.nrt_wind_raw + cfgv.nrt_wind_prefix + '%7.7d_%2.2dh.nc' % (
            itnum, hour)
    else:
        path = 'unknown'
        #path = cfgv.rep_wind_raw + cfgv.rep_wind_prefix + '%7.7d_%2.2dh.nc' % (itnum, hour)

    prefix = 'wind_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['land_ice_mask', 'sampling_length'], df)
    #df = ip.removeMissings(['wind_stress'], df)   # remove land

    ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order
    df = ip.arrangeColumns([
        'wind_speed_rms', 'eastward_wind_rms', 'wind_stress', 'eastward_wind',
        'surface_downward_eastward_stress', 'wind_speed',
        'surface_downward_northward_stress', 'northward_wind',
        'northward_wind_rms'
    ], df)

    df['hour'] = hour
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d_%2.2dh.csv' % (exportBase, prefix, itnum, hour)
    df.to_csv(export_path)
    #ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'longitude', 'latitude')

    ########### drop depth column ###############
    df = pd.read_csv(export_path)
    df = ip.removeColumn(['depth'], df)
    df.to_csv(export_path, index=False)
    ##############################################
    return export_path

Example #6

0

Show file

def makeBulkSeaFlow():
    path = cfgv.seaflow_raw + 'SeaFlow_colocal.csv'
    prefix = 'seaflow'
    df = pd.read_csv(path)
    df = ip.removeColumn(['file'], df)
    df = ip.removeMissings(['lat', 'lon', 'abundance'],
                           df)  # remove rows with missing lat/lon/abundance
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df.to_csv(export_path, index=False)
    #ip.mapTo180180(export_path, 'lon')   # only use if necessary
    ip.sortByDepthLatLon(df, export_path, 'lon', 'lat', 'cruise')
    return export_path

Example #7

0

Show file

def makeBulkALT(itnum, nrt):
    if itnum < 1993001:
        print('Error: Altimetry data is only available after 1993.')
        return
    if nrt:        
        path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum   
    else:
        path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum   
    
    prefix = 'alt_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['err'], df)
    #df = ip.removeMissings(['sla'], df)   # remove land
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)
    ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'longitude', 'latitude')
    return export_path

Example #8

0

Show file

def makeBulkALT(itnum, nrt):
    if itnum < 1993001:
        print('Error: Altimetry data is only available after 1993.')
        return
    if nrt:        
        path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum   
    else:
        path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum   
    
    prefix = 'alt_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['err'], df)
    #df = ip.removeMissings(['sla'], df)   # remove land

    ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order
    df = ip.arrangeColumns(['vgosa', 'vgos', 'sla', 'adt', 'ugosa', 'ugos'], df)
    # df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)
    ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'longitude', 'latitude')
    return export_path