Python arrangeColumns Examples, insertPrep.arrangeColumns Python Examples

Example #1

0

Show file

File: insertGLODAP_cruises.py Project: mdashkezari/opedia

def makeGLODAP(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',', usecols=usecols)

    df['year'] = df['year'].astype('int').astype(
        'str')  # removing ending zero, then str
    df['month'] = df['month'].astype('int').astype('str')
    df['day'] = df['day'].astype('int').astype('str')
    df['hour'] = df['hour'].astype('int').astype('str')
    df['minute'] = df['minute'].astype('int').astype('str')
    df['second'] = '0'
    #construct datetime
    df['time'] = pd.to_datetime(
        df[['year', 'month', 'day', 'hour', 'minute', 'second']],
        format='%Y%m%dT%H%M%S')

    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'longitude', 'lon')
    # renaming Variables
    ip.renameCol(df, 'theta', 'theta_potential_temperature')
    ip.renameCol(df, 'sigma0', 'sigma0_potential_density')
    ip.renameCol(df, 'sigma1', 'sigma1_potential_density_ref_1000_dbar')
    ip.renameCol(df, 'sigma2', 'sigma2_potential_density_ref_2000_dbar')
    ip.renameCol(df, 'sigma3', 'sigma3_potential_density_ref_3000_dbar')
    ip.renameCol(df, 'sigma4', 'sigma4_potential_density_ref_4000_dbar')

    ip.renameCol(df, 'gamma', 'gamma_neutral_density')
    ip.renameCol(df, 'TAlk', 'TAlk_total_alkalinity')
    ip.renameCol(df, 'phts25p0', 'phts25p0_pH_25C_0dbar')
    ip.renameCol(df, 'phtsinsitutp', 'phtsinsitutp_pH_insitu')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')
    ip.renameCol(df, 'latitude', 'lat')

    #import cruise data to ID file and do join
    expocodes = pd.read_csv(rawFilePath + rawFileName_expocodes,
                            sep='\t',
                            names=['cruise_ID', 'expocode'])
    df = pd.merge(df, expocodes, left_on='cruise', right_on='cruise_ID')
    df = df.drop('cruise_ID', 1)
    ip.renameCol(df, 'expocode', 'cruise_expocode')

    df = ip.arrangeColumns(usecols_rearange, df)
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path, df

Example #2

0

Show file

def makeSeaFlow(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',')
    df = df[df['flag'] == 0]

    df['prochloro_abundance'] = np.where(((df['pop'] == 'prochloro') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['prochloro_diameter'] = np.where(df['pop'] == 'prochloro', df['diam_mid'], np.nan)
    df['prochloro_carbon_content'] = np.where(df['pop'] == 'prochloro', df['Qc_mid'], np.nan)
    df['prochloro_biomass'] = df['prochloro_abundance'].astype(float) * df['prochloro_carbon_content'].astype(float)

    df['synecho_abundance'] = np.where(((df['pop'] == 'synecho') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['synecho_diameter'] = np.where(df['pop'] == 'synecho', df['diam_mid'], np.nan)
    df['synecho_carbon_content'] = np.where(df['pop'] == 'synecho', df['Qc_mid'], np.nan)
    df['synecho_biomass'] = df['synecho_abundance'] * df['synecho_carbon_content']

    df['croco_abundance'] = np.where(((df['pop'] == 'croco') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['croco_diameter'] = np.where(df['pop'] == 'croco', df['diam_mid'], np.nan)
    df['croco_carbon_content'] = np.where(df['pop'] == 'croco', df['Qc_mid'], np.nan)
    df['croco_biomass'] = df['croco_abundance'] * df['croco_carbon_content']

    df['picoeuk_abundance'] = np.where(((df['pop'] == 'picoeuk') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['picoeuk_diameter'] = np.where(df['pop'] == 'picoeuk', df['diam_mid'], np.nan)
    df['picoeuk_carbon_content'] = np.where(df['pop'] == 'picoeuk', df['Qc_mid'], np.nan)
    df['picoeuk_biomass'] = df['picoeuk_abundance'] * df['picoeuk_carbon_content']

    df['unknown_abundance'] = np.where(((df['pop'] == 'unknown') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['unknown_diameter'] = np.where(df['pop'] == 'unknown', df['diam_mid'], np.nan)
    df['unknown_carbon_content'] = np.where(df['pop'] == 'unknown', df['Qc_mid'], np.nan)
    df['unknown_biomass'] = df['unknown_abundance'] * df['unknown_carbon_content']

    df['total_abundance'] = np.where(((df['pop'] != 'beads') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['total_carbon_content'] = np.where(df['pop'] != 'beads', df['Qc_mid'], np.nan)
    df['total_biomass'] = df['total_abundance'] * df['total_carbon_content']

    df = ip.arrangeColumns(['time', 'lat', 'lon', 'depth', 'prochloro_abundance', 'prochloro_diameter', 'prochloro_carbon_content', 'prochloro_biomass','synecho_abundance', 'synecho_diameter', 'synecho_carbon_content', 'synecho_biomass','croco_abundance', 'croco_diameter', 'croco_carbon_content', 'croco_biomass','picoeuk_abundance', 'picoeuk_diameter', 'picoeuk_carbon_content', 'picoeuk_biomass','unknown_abundance', 'unknown_diameter', 'unknown_carbon_content', 'unknown_biomass','total_biomass', 'par'], df)

    df = ip.removeMissings(['time','lat', 'lon'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon')
    print('export path: ' ,export_path)
    return export_path

Example #3

0

Show file

def makeBulkWind(itnum, nrt, hour):
    if itnum < 2012320:
        print('Error: Wind data is only availabe after 2012320.')
        return
    if nrt:
        path = cfgv.nrt_wind_raw + cfgv.nrt_wind_prefix + '%7.7d_%2.2dh.nc' % (
            itnum, hour)
    else:
        path = 'unknown'
        #path = cfgv.rep_wind_raw + cfgv.rep_wind_prefix + '%7.7d_%2.2dh.nc' % (itnum, hour)

    prefix = 'wind_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['land_ice_mask', 'sampling_length'], df)
    #df = ip.removeMissings(['wind_stress'], df)   # remove land

    ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order
    df = ip.arrangeColumns([
        'wind_speed_rms', 'eastward_wind_rms', 'wind_stress', 'eastward_wind',
        'surface_downward_eastward_stress', 'wind_speed',
        'surface_downward_northward_stress', 'northward_wind',
        'northward_wind_rms'
    ], df)

    df['hour'] = hour
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d_%2.2dh.csv' % (exportBase, prefix, itnum, hour)
    df.to_csv(export_path)
    #ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'longitude', 'latitude')

    ########### drop depth column ###############
    df = pd.read_csv(export_path)
    df = ip.removeColumn(['depth'], df)
    df.to_csv(export_path, index=False)
    ##############################################
    return export_path

Example #4

0

Show file

def makeBulkALT(itnum, nrt):
    if itnum < 1993001:
        print('Error: Altimetry data is only available after 1993.')
        return
    if nrt:        
        path = cfgv.nrt_alt_raw + cfgv.nrt_alt_prefix + '%7.7d.nc' % itnum   
    else:
        path = cfgv.rep_alt_raw + cfgv.rep_alt_prefix + '%7.7d.nc' % itnum   
    
    prefix = 'alt_'
    df = nc.ncToDF(path)
    df = ip.removeColumn(['err'], df)
    #df = ip.removeMissings(['sla'], df)   # remove land

    ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order
    df = ip.arrangeColumns(['vgosa', 'vgos', 'sla', 'adt', 'ugosa', 'ugos'], df)
    # df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)
    ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ip.sortByLatLon(df, export_path, 'longitude', 'latitude')
    return export_path

Example #5

0

Show file

def makeBulkPisces(itnum, nrt):
    if itnum < 2011365:
        print('Error: Mercator-Pisces data is only availabe after 2012.')
        return
    if nrt:
        path = cfgv.nrt_mercator_pisces_raw + cfgv.nrt_mercator_pisces_prefix + '%7.7d.nc' % itnum
    else:
        path = 'unknown'
        #path = cfgv.rep_mercator_pisces_raw + cfgv.rep_mercator_pisces_prefix + '%7.7d.nc' % itnum

    prefix = 'pisces_'
    df = nc.ncToDF(path)
    ## arrange the columns: making sure that the columns are arranged in the correct (consistent with the undelying table) order
    df = ip.arrangeColumns(
        ['Fe', 'PP', 'Si', 'NO3', 'CHL', 'PHYC', 'PO4', 'O2'], df)
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s%d.csv' % (exportBase, prefix, itnum)
    df.to_csv(export_path)
    #ip.mapTo180180(export_path, 'longitude')   # only use if necessary
    ## sortByDepthLatLon_AddClim will sort the dataframe and will add "month" and "year" coloumns to the dataframe
    ip.sortByDepthLatLon_AddClim(df, export_path, 'longitude', 'latitude',
                                 'depth')
    return export_path

Example #6

0

Show file

File: insertDarwin_3day_Nutrient.py Project: mdashkezari/opedia

rawFilePath = '/media/nrhagen/Drobo/OpediaVault/model/darwin_3day/'
netcdf_list = glob.glob(rawFilePath + '*.nc')
exportBase = cfgv.opedia_proj + 'db/dbInsert/export_temp/'
prefix = tableName
export_path = '%s%s.csv' % (exportBase, prefix)
############################
############################
path = sys.argv[1]

if os.path.isfile(exportBase + os.path.basename(path)[:-3] + '_DONE.txt'): #checks .txt 'catalog' file exists before reprocessing
    sys.exit(0)
else:
    xdf = xr.open_dataset(path)
    df = xdf.to_dataframe()
    df.reset_index(inplace=True) # converts netcdf dims to cols
    df = ip.renameCol(df, 'lat_c', 'lat')
    df = ip.renameCol(df, 'lon_c', 'lon')
    df = ip.renameCol(df, 'dep_c', 'depth')
    df = ip.convertcolDatatype(df,['FeT', 'PO4', 'DIN', 'SiO2', 'O2'])
    # df = ip.removeMissings(['time','lat', 'lon', 'depth'], df)
    df = ip.arrangeColumns(['time','lat', 'lon','depth', 'FeT', 'PO4', 'DIN', 'SiO2', 'O2'], df)
    df = ip.NaNtoNone(df)
    df = ip.addIDcol(df)
    df = ip.colDatatypes(df)
    df.sort_values(['time', 'lat', 'lon', 'depth'], ascending=[True, True, True, True], inplace=True)
    df.to_csv(exportBase + os.path.basename(path)[:-3] + '.csv', mode='a', chunksize=1000000, index=False)

    # writes .txt file to catalog which files processed
    file = open(exportBase + os.path.basename(path)[:-3] + '_DONE.txt', "w")
    file.close()