Python removeMissings Exemples, insertPrep.removeMissings Python Exemples

Exemple #1

0

Afficher le fichier

def makeBulkCobalamin():
    path = cfgv.rep_KM1314_cobalamin_raw + 'KM1314_ParticulateCobalamins_2018_06_12_vPublished.xlsx'    
    prefix = 'KM1314_ParticulateCobalamins_2018_06_12_vPublished'
    
    df = pd.read_excel(open(path,'rb'), sheetname='data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)  
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df.to_csv(export_path, index=False)
    #ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    return export_path

Exemple #2

0

Afficher le fichier

def makeWOA_climatology(rawFilePath, tableName):
    for month in month_list: # ie 1 = jan
        print('Month: ' + month)
        df = merge_WOA_variables(month)
        df = ip.removeColumn(['I_gp', 'C_gp'], df)
        df = ip.removeMissings(['lat', 'lon', 'depth'], df)
        df = ip.NaNtoNone(df)
        df = ip.addIDcol(df)
        df.sort_values(['lat', 'lon', 'depth'], ascending=[True, True, True], inplace=True)

        df.to_csv(exportBase + tableName + '_' +  month + '.csv', index=False)
        print('export path: ' , exportBase + tableName + '_' +  month + '.csv')
        iF.toSQLbcp(exportBase + tableName + '_' +  month + '.csv', tableName)

Exemple #3

0

Afficher le fichier

def makeBulkSeaFlow():
    path = cfgv.seaflow_raw + 'SeaFlow_colocal.csv'
    prefix = 'seaflow'
    df = pd.read_csv(path)
    df = ip.removeColumn(['file'], df)
    df = ip.removeMissings(['lat', 'lon', 'abundance'],
                           df)  # remove rows with missing lat/lon/abundance
    df['ID'] = None
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df.to_csv(export_path, index=False)
    #ip.mapTo180180(export_path, 'lon')   # only use if necessary
    ip.sortByDepthLatLon(df, export_path, 'lon', 'lat', 'cruise')
    return export_path

Exemple #4

0

Afficher le fichier

def makeHL2A_diel_metagenomics(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path,  sep=',',sheet_name='data', usecols=usecols)
    df = ip.removeMissings(['time','lat', 'lon','depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ' ,export_path)
    return export_path

Exemple #5

0

Afficher le fichier

Fichier : insertBottle_Chisholm.py Projet : mdashkezari/opedia

def makeBottle_Chisholm(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    df = pd.read_excel(path, 'data')

    df = ip.removeMissings(['time','lat', 'lon','depth'], df)
    df = ip.colDatatypes(df)
    df['ID'] = None

    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    ip.sortByDepthLatLon(df, export_path, 'lon', 'lat', 'depth')
    df.to_csv(export_path, index=False)
    print('export path: ' ,export_path)
    return export_path

Exemple #6

0

Afficher le fichier

def makeSeaFlow(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_csv(path, sep=',')
    df = df[df['flag'] == 0]

    df['prochloro_abundance'] = np.where(((df['pop'] == 'prochloro') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['prochloro_diameter'] = np.where(df['pop'] == 'prochloro', df['diam_mid'], np.nan)
    df['prochloro_carbon_content'] = np.where(df['pop'] == 'prochloro', df['Qc_mid'], np.nan)
    df['prochloro_biomass'] = df['prochloro_abundance'].astype(float) * df['prochloro_carbon_content'].astype(float)

    df['synecho_abundance'] = np.where(((df['pop'] == 'synecho') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['synecho_diameter'] = np.where(df['pop'] == 'synecho', df['diam_mid'], np.nan)
    df['synecho_carbon_content'] = np.where(df['pop'] == 'synecho', df['Qc_mid'], np.nan)
    df['synecho_biomass'] = df['synecho_abundance'] * df['synecho_carbon_content']

    df['croco_abundance'] = np.where(((df['pop'] == 'croco') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['croco_diameter'] = np.where(df['pop'] == 'croco', df['diam_mid'], np.nan)
    df['croco_carbon_content'] = np.where(df['pop'] == 'croco', df['Qc_mid'], np.nan)
    df['croco_biomass'] = df['croco_abundance'] * df['croco_carbon_content']

    df['picoeuk_abundance'] = np.where(((df['pop'] == 'picoeuk') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['picoeuk_diameter'] = np.where(df['pop'] == 'picoeuk', df['diam_mid'], np.nan)
    df['picoeuk_carbon_content'] = np.where(df['pop'] == 'picoeuk', df['Qc_mid'], np.nan)
    df['picoeuk_biomass'] = df['picoeuk_abundance'] * df['picoeuk_carbon_content']

    df['unknown_abundance'] = np.where(((df['pop'] == 'unknown') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['unknown_diameter'] = np.where(df['pop'] == 'unknown', df['diam_mid'], np.nan)
    df['unknown_carbon_content'] = np.where(df['pop'] == 'unknown', df['Qc_mid'], np.nan)
    df['unknown_biomass'] = df['unknown_abundance'] * df['unknown_carbon_content']

    df['total_abundance'] = np.where(((df['pop'] != 'beads') & (df['quantile'] == 50)), df['abundance'], np.nan)
    df['total_carbon_content'] = np.where(df['pop'] != 'beads', df['Qc_mid'], np.nan)
    df['total_biomass'] = df['total_abundance'] * df['total_carbon_content']

    df = ip.arrangeColumns(['time', 'lat', 'lon', 'depth', 'prochloro_abundance', 'prochloro_diameter', 'prochloro_carbon_content', 'prochloro_biomass','synecho_abundance', 'synecho_diameter', 'synecho_carbon_content', 'synecho_biomass','croco_abundance', 'croco_diameter', 'croco_carbon_content', 'croco_biomass','picoeuk_abundance', 'picoeuk_diameter', 'picoeuk_carbon_content', 'picoeuk_biomass','unknown_abundance', 'unknown_diameter', 'unknown_carbon_content', 'unknown_biomass','total_biomass', 'par'], df)

    df = ip.removeMissings(['time','lat', 'lon'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLon(df, export_path, 'time', 'lat', 'lon')
    print('export path: ' ,export_path)
    return export_path

Exemple #7

0

Afficher le fichier

def makeBulkBottle_HOT(station):
    path = cfgv.rep_hot_raw + 'bottle_%s.csv' % station    
    missingValue = -9 
    df = pd.read_csv(path)   
    if ' ' in df.columns:
        df.drop(' ', axis=1, inplace=True)
    df = df.drop(df.index[[0]])   ## remove the units row    
    df.columns = df.columns.str.replace(' ','')   
    df = df.apply(pd.to_numeric)
    df = df.replace(missingValue, '')
    df = ip.removeMissings(['date'], df) 
    formatDate_Columns(df, ['date'])
    station_lat, station_lon = stationLoc(station)
    df['lat'] = station_lat
    df['lon'] = station_lon
    return df

Exemple #8

0

Afficher le fichier

def makeSingleCellGenomes_Chisholm(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, 'data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    df.to_csv(export_path, index=False)
    print('export path: ', export_path)
    return export_path

Exemple #9

0

Afficher le fichier

def makeFlombaum(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data')
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.convertYYYYMMDD(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df['lon'] = df['lon'].abs()
    df.to_csv(export_path, index=False)
    ip.mapTo180180(export_path, 'lon')
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path

Exemple #10

0

Afficher le fichier

def makeGlobal_PicoPhytoPlankton(rawFilePath, rawFileName, tableName):
    path = rawFilePath + rawFileName
    prefix = tableName
    exportBase = cfgv.opedia_proj + 'db/dbInsert/export/'
    export_path = '%s%s.csv' % (exportBase, prefix)
    df = pd.read_excel(path, sep=',', sheet_name='data', usecols=usecols)
    df['year'] = df['year'].astype('str')
    df['month'] = ((df['month'].astype('str')).apply(lambda x: x.zfill(2)))
    df['day'] = ((df['day'].astype('str')).apply(lambda x: x.zfill(2)))
    print(len(df))
    df = df[(df['day'] != '-9') & (df['day'] != '-1')]

    df['year'] = df['year'].replace('10', '2010')
    df['year'] = df['year'].replace('11', '2011')
    df['year'] = df['year'].replace('6', '2006')
    # df = df[(df['year'] != '10') & (df['year'] != '11')& (df['year'] != '6')]
    df['time'] = pd.to_datetime(df[['year', 'month', 'day']], format='%Y%m%d')
    ip.renameCol(df, 'Lat', 'lat')
    ip.renameCol(df, 'Long', 'lon')
    ip.renameCol(df, 'Depth', 'depth')
    ip.renameCol(df, 'PromL', 'prochlorococcus_abundance')
    ip.renameCol(df, 'SynmL', 'synechococcus_abundance')
    ip.renameCol(df, 'PEukmL', 'picoeukaryote_abundance')
    ip.renameCol(df, 'pico_abund', 'picophytoplankton_abundance')
    ip.renameCol(df, 'picophyto [ug C/L]', 'picophytoplankton_biomass')
    ip.removeColumn(['year', 'day', 'month'], df)
    df = ip.reorderCol(df, [
        'time', 'lat', 'lon', 'depth', 'prochlorococcus_abundance',
        'synechococcus_abundance', 'picoeukaryote_abundance',
        'picophytoplankton_abundance', 'picophytoplankton_biomass'
    ])
    df = ip.removeMissings(['time', 'lat', 'lon', 'depth'], df)
    df = ip.NaNtoNone(df)
    df = ip.colDatatypes(df)
    df = ip.addIDcol(df)
    df = ip.removeDuplicates(df)
    df.to_csv(export_path, index=False)
    ip.sortByTimeLatLonDepth(df, export_path, 'time', 'lat', 'lon', 'depth')
    print('export path: ', export_path)
    return export_path