Python StringIO.fillna Examples

Programming Language: Python

Namespace/Package Name: io

Class/Type: StringIO

Method/Function: fillna

Examples at hotexamples.com: 2

Python StringIO.fillna - 2 examples found. These are the top rated real world Python examples of io.StringIO.fillna extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StringIO(30)

close(30)

readlines(30)

readline(30)

read(30)

name(30)

getvalue(30)

flush(30)

__init__(30)

expandtabs(18)

decode(15)

fileno(13)

isatty(10)

buffer(6)

__next__(5)

content_type(5)

find(4)

append(3)

filename(3)

encode(3)

getValue(3)

id(2)

getheaders(2)

next(2)

get(2)

isspace(2)

find_clades(2)

iterrows(2)

fillna(2)

get_value(2)

mod_date(2)

buf(2)

encoding(2)

drop(2)

disposition(2)

release_conn(2)

create_date(2)

endswith(2)

read_date(2)

readable(2)

really_close(1)

mtime(1)

__exit__(1)

_complain_ifclosed(1)

old_write(1)

readinto(1)

original_close(1)

readLine(1)

pop(1)

query(1)

Example #1

Show file

File: CPC_CLIMATE.py Project: usbr/PyForecast

def dataLoader(stationDict, startDate, endDate):

    # Figure out which indice we are downloading
    stationNum = int(stationDict['ID'])

    # Grab the ENSO Nino3.4 data (SST and Anom)
    if stationNum == 1 or stationNum == 2:

        # We'll get as much weekly data as we can, then backfill with monthly data
        # Here are the relevant URLs
        urlMonth = 'http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices'
        urlWeek = 'http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for'

        # Get the data
        dataMonth = requests.get(urlMonth)
        dataWeek = requests.get(urlWeek)

        # Process the monthly data
        dataMonth = StringIO(dataMonth.content.decode('utf-8'))
        dataMonth = dataMonth.readlines()
        timestamps = []
        ssts = []
        anoms = []
        for line in dataMonth[1:]:
            values = line.split()
            year = str(values[0])
            month = '0' + str(values[1])
            timestamps.append(
                pd.to_datetime(year + month[-2:] + '15', format='%Y%m%d'))
            ssts.append(float(values[8]))
            anoms.append(float(values[9]))

        dfMonth = pd.DataFrame(np.array([ssts, anoms]).T,
                               index=timestamps,
                               columns=[
                                   'Nino3.4 SST | Indice | degC',
                                   'Nino3.4 ANOM | Indice | degC'
                               ])

        # Process the weekly data
        dataWeek = StringIO(dataWeek.content.decode('utf-8'))
        dataWeek = dataWeek.readlines()
        timestamps = []
        ssts = []
        anoms = []
        for line in dataWeek[4:]:
            values = line.split('     ')
            timestamps.append(pd.to_datetime(values[0]))
            ssts.append(float(values[3][:4]))
            anoms.append(float(values[3][4:]))

        dfWeek = pd.DataFrame(np.array([ssts, anoms]).T,
                              index=timestamps,
                              columns=[
                                  'Nino3.4 SST | Indice | degC',
                                  'Nino3.4 ANOM | Indice | degC'
                              ])
        print(dfWeek)

        # Merge the 2 datasets, keeping all the weekly data and cutting some monthly
        dfMonth = dfMonth[dfMonth.index < dfWeek.index[0]]

        dfCombined = pd.concat([dfMonth, dfWeek]).resample('D').mean()
        dfCombined = dfCombined.fillna(method='ffill')
        dfCombined = dfCombined[dfCombined.index >= startDate]

        df = pd.DataFrame(index=pd.date_range(startDate, endDate))
        df = pd.concat([df, dfCombined], axis=1)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]

        # Return the correct dataset
        if str(stationNum) == '1':
            del df['Nino3.4 ANOM | Indice | degC']
            return df
        else:
            del df['Nino3.4 SST | Indice | degC']
            return df

    # Otherwise, we'll grab the PNA dataset
    elif stationNum == 3:

        url = "ftp://ftp.cpc.ncep.noaa.gov/cwlinks/norm.daily.pna.index.b500101.current.ascii"
        dataDaily = pd.read_csv(url,
                                names=['year', 'month', 'day', 'PNA'],
                                sep='\s+',
                                error_bad_lines=False,
                                converters={
                                    "year":
                                    int,
                                    "month":
                                    int,
                                    "day":
                                    lambda x: int(x[:2])
                                    if '*' in x else int(x)
                                })
        dataDaily.index = pd.to_datetime(dataDaily[['year', 'month', 'day']])
        del dataDaily['year'], dataDaily['month'], dataDaily['day']
        #url = "http://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.pna.monthly.b5001.current.ascii"
        #dataMonth = pd.read_csv(url, names = ['year','month','PNA | Indice'], sep='\s+')
        #dataMonth['day'] = len(dataMonth.index)*[1]
        #datetimes = pd.to_datetime(dataMonth[['year','month','day']])
        #dataMonth.set_index(pd.DatetimeIndex(datetimes), inplace=True)
        #del dataMonth['year'], dataMonth['month'], dataMonth['day']
        #dataMonth = dataMonth.resample('D').mean()
        #lastDate = list(dataMonth.index)[-1]
        #if lastDate.month in [1,3,5,7,8,10,12]:
        #    endDay = 31
        #elif lastDate.month == 2:
        #    endDay = 28
        #else:
        #    endDay = 30
        #for day in range(lastDate.day,endDay + 1):
        #    dataMonth.loc[datetime(lastDate.year, lastDate.month, day)] = dataMonth.loc[lastDate]
        #dataMonth = dataMonth.fillna(method='ffill')
        dataDaily = dataDaily.fillna(method='ffill')
        #dataMonth = dataMonth[dataMonth.index >= startDate]
        #dataMonth = dataMonth[dataMonth.index <= endDate]
        dataDaily = dataDaily[dataDaily.index >= startDate]
        dataDaily = dataDaily[dataDaily.index <= endDate]
        df = pd.DataFrame(index=pd.date_range(startDate, endDate))
        df = pd.concat([df, dataDaily], axis=1)
        #df = pd.concat([df, dataMonth], axis = 1)

        return df

    elif stationNum == 4:
        """
        AMO Index"""
        url = 'https://www.esrl.noaa.gov/psd/data/correlation/amon.us.long.data'
        df = pd.read_csv(url,
                         skiprows=1,
                         names=[
                             'year', '1', '2', '3', '4', '5', '6', '7', '8',
                             '9', '10', '11', '12'
                         ],
                         sep='\s+')
        lastRow = df.index[df['year'] == 'AMO'].tolist()[0] - 1
        df = df[df.index < lastRow]
        df = df.melt(id_vars=['year'], var_name='month')
        dates = [
            str(df['year'][i]) + '-' + str(df['month'][i]) for i in df.index
        ]
        df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')),
                     inplace=True)
        df.sort_index(inplace=True)
        df['value'] = pd.to_numeric(df['value'])
        df.replace(to_replace=-99.990, value=np.nan, inplace=True)
        df = pd.DataFrame(df['value'])
        df = df.asfreq('D')
        lastDate = list(df.index)[-1]
        if lastDate.month in [1, 3, 5, 7, 8, 10, 12]:
            endDay = 31
        elif lastDate.month == 2:
            endDay = 28
        else:
            endDay = 30
        for day in range(lastDate.day, endDay + 1):
            df.loc[datetime(lastDate.year, lastDate.month,
                            day)] = df.loc[lastDate]
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]

        return df

    # elif stationNum == 5:
    #     """
    #     Mauna Loa CO2 Trend
    #     """
    #     url = 'ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt'
    #     df = pd.read_csv(url, index_col=False, sep='\s+', comment='#', names=['year','month','time','average_molFrac','interpolated_molFrac','trend','days'])
    #     dates = [str(df['year'][i])+'-'+str(df['month'][i]) for i in df.index]
    #     df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')), inplace=True)
    #     df = pd.DataFrame(df['trend'])
    #     df = df.asfreq('D')
    #     df.fillna(method='ffill',inplace=True)
    #     df = df[df.index>=startDate]
    #     df = df[df.index<=endDate]
    #     return df

    elif stationNum == 5:
        """
        Pacific Multidecadal Oscillation (PDO)
        """
        url = "https://www.ncdc.noaa.gov/teleconnections/pdo/data.json"
        response = requests.get(url)
        response = response.json()
        data = response['data']
        dates = [pd.to_datetime(i, format='%Y%m') for i in list(data.keys())]
        values = [float(val) for val in list(data.values())]
        values = [np.nan if x == -99.99 else x for x in values]
        df = pd.DataFrame(values, index=dates, columns=['PDO'])
        df = df.asfreq('D')
        lastDate = list(df.index)[-1]
        if lastDate.month in [1, 3, 5, 7, 8, 10, 12]:
            endDay = 31
        elif lastDate.month == 2:
            endDay = 28
        else:
            endDay = 30
        for day in range(lastDate.day, endDay + 1):
            df.loc[datetime(lastDate.year, lastDate.month,
                            day)] = df.loc[lastDate]
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]
        return df

    elif stationNum == 6:
        """
        Arctic Oscillation Index (AOI) 
        """
        url = "https://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii"
        dataMonth = pd.read_csv(url,
                                names=['year', 'month', 'AOI | Indice'],
                                sep='\s+')
        dataMonth['day'] = len(dataMonth.index) * [1]
        datetimes = pd.to_datetime(dataMonth[['year', 'month', 'day']])
        dataMonth.set_index(pd.DatetimeIndex(datetimes), inplace=True)
        del dataMonth['year'], dataMonth['month'], dataMonth['day']
        dataMonth = dataMonth.resample('D').mean()
        dataMonth = dataMonth.fillna(method='ffill')
        dataMonth = dataMonth[dataMonth.index >= startDate]
        dataMonth = dataMonth[dataMonth.index <= endDate]
        df = pd.DataFrame(index=pd.date_range(startDate, endDate))
        df = pd.concat([df, dataMonth], axis=1)
        return df

    elif stationNum == 7:
        """
        Southern Oscillation Index (SOI) 
        """
        url = "https://www.esrl.noaa.gov/psd/data/correlation/soi.data"
        df = pd.read_csv(url,
                         skiprows=1,
                         names=[
                             'year', '1', '2', '3', '4', '5', '6', '7', '8',
                             '9', '10', '11', '12'
                         ],
                         sep='\s+')
        lastRow = df.index[df['year'] == 'SOI'].tolist()[0] - 1
        df = df[df.index < lastRow]
        df = df.melt(id_vars=['year'], var_name='month')
        dates = [
            str(df['year'][i]) + '-' + str(df['month'][i]) for i in df.index
        ]
        df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')),
                     inplace=True)
        df.sort_index(inplace=True)
        df['value'] = pd.to_numeric(df['value'])
        df.replace(to_replace=-99.990, value=np.nan, inplace=True)
        df = pd.DataFrame(df['value'])
        df = df.asfreq('D')
        lastDate = list(df.index)[-1]
        if lastDate.month in [1, 3, 5, 7, 8, 10, 12]:
            endDay = 31
        elif lastDate.month == 2:
            endDay = 28
        else:
            endDay = 30
        for day in range(lastDate.day, endDay + 1):
            df.loc[datetime(lastDate.year, lastDate.month,
                            day)] = df.loc[lastDate]
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]
        return df

    elif stationNum == 8:
        """
        Multivariate ENSO Index (MEI) 
        """
        #https://psl.noaa.gov/enso/mei/data/meiv2.data
        url = "https://psl.noaa.gov/enso/mei/data/meiv2.data"
        df = pd.read_csv(url,
                         skiprows=1,
                         names=[
                             'year', '1', '2', '3', '4', '5', '6', '7', '8',
                             '9', '10', '11', '12'
                         ],
                         sep='\s+')
        lastRow = df.index[df['year'] == 'Multivariate'].tolist()[0] - 1
        df = df[df.index < lastRow]
        df = df.melt(id_vars=['year'], var_name='month')
        dates = [
            str(df['year'][i]) + '-' + str(df['month'][i]) for i in df.index
        ]
        df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')),
                     inplace=True)
        df.sort_index(inplace=True)
        df['value'] = pd.to_numeric(df['value'])
        df.replace(to_replace=-999.00, value=np.nan, inplace=True)
        df = pd.DataFrame(df['value'])
        df = df.asfreq('D')
        lastDate = list(df.index)[-1]
        if lastDate.month in [1, 3, 5, 7, 8, 10, 12]:
            endDay = 31
        elif lastDate.month == 2:
            endDay = 28
        else:
            endDay = 30
        for day in range(lastDate.day, endDay + 1):
            df.loc[datetime(lastDate.year, lastDate.month,
                            day)] = df.loc[lastDate]
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]
        return df

    else:
        return pd.DataFrame()

Example #2

Show file

def dataLoader(stationDict, startDate, endDate):
    """
    This dataloader loads data from NOAA's Climate Prediction Center (CPC). The datasets
    are climate indices that are useful for long-range precipitation forecasting. 
    The "DatasetExternalID" option specifies which dataset should be downloaded. Valid 
    paramters are: 
    'nino3.4' - Nino 3.4 Sea Surface Temperature Anomaly (aka ENSO)
    'pna' - Pacific North American Index
    'amo' - Atlantic Multidecadal Oscillation
    'pdo' - Pacific Decadal Oscillation
    DEFAULT OPTIONS
    DatasetExternalID: nino3.4
    """

    # Figure out which indice we are downloading
    stationNum = stationDict['DatasetExternalID']

    # Grab the ENSO Nino3.4 data (SST and Anom)
    if stationNum == 'nino3.4':

        # We'll get as much weekly data as we can, then backfill with monthly data
        # Here are the relevant URLs
        urlMonth = 'http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices'
        urlWeek = 'http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for'

        # Get the data
        dataMonth = requests.get(urlMonth)
        dataWeek = requests.get(urlWeek)

        # Process the monthly data
        dataMonth = StringIO(dataMonth.content.decode('utf-8'))
        dataMonth = dataMonth.readlines()
        timestamps = []
        anoms = []
        for line in dataMonth[1:]:
            values = line.split()
            year = str(values[0])
            month = '0' + str(values[1])
            timestamps.append(
                pd.to_datetime(year + month[-2:] + '15', format='%Y%m%d'))
            anoms.append(float(values[9]))

        dfMonth = pd.DataFrame(np.array(anoms).T,
                               index=timestamps,
                               columns=['Nino3.4 ANOM | Indice | degC'])

        # Process the weekly data
        dataWeek = StringIO(dataWeek.content.decode('utf-8'))
        dataWeek = dataWeek.readlines()
        timestamps = []
        anoms = []
        for line in dataWeek[4:]:
            values = line.split('     ')
            timestamps.append(pd.to_datetime(values[0]))
            anoms.append(float(values[3][4:]))

        dfWeek = pd.DataFrame(np.array(anoms).T,
                              index=timestamps,
                              columns=['Nino3.4 ANOM | Indice | degC'])

        # Merge the 2 datasets, keeping all the weekly data and cutting some monthly
        dfMonth = dfMonth[dfMonth.index < dfWeek.index[0]]

        dfCombined = pd.concat([dfMonth, dfWeek]).resample('D').mean()
        dfCombined = dfCombined.fillna(method='ffill')
        dfCombined = dfCombined[dfCombined.index >= startDate]

        df = pd.DataFrame(index=pd.date_range(startDate, endDate))
        df = pd.concat([df, dfCombined], axis=1)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]

        # Return the correct dataset
        return df

    # Otherwise, we'll grab the PNA dataset
    elif stationNum == 'pna':
        url = "http://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.pna.monthly.b5001.current.ascii"
        dataMonth = pd.read_csv(url,
                                names=['year', 'month', 'PNA | Indice'],
                                sep='\s+')
        dataMonth['day'] = len(dataMonth.index) * [1]
        datetimes = pd.to_datetime(dataMonth[['year', 'month', 'day']])
        dataMonth.set_index(pd.DatetimeIndex(datetimes), inplace=True)
        del dataMonth['year'], dataMonth['month'], dataMonth['day']
        dataMonth = dataMonth.resample('D').mean()
        dataMonth = dataMonth.fillna(method='ffill')
        dataMonth = dataMonth[dataMonth.index >= startDate]
        df = pd.DataFrame(index=pd.date_range(startDate, endDate))
        df = pd.concat([df, dataMonth], axis=1)
        return df

    elif stationNum == 'amo':
        """
        AMO Index"""
        url = 'https://www.esrl.noaa.gov/psd/data/correlation/amon.us.long.data'
        df = pd.read_csv(url,
                         skiprows=1,
                         names=[
                             'year', '1', '2', '3', '4', '5', '6', '7', '8',
                             '9', '10', '11', '12'
                         ],
                         sep='\s+')
        lastRow = df.index[df['year'] == 'AMO'].tolist()[0] - 1
        df = df[df.index < lastRow]
        df = df.melt(id_vars=['year'], var_name='month')
        dates = [
            str(df['year'][i]) + '-' + str(df['month'][i]) for i in df.index
        ]
        df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')),
                     inplace=True)
        df.sort_index(inplace=True)
        df['value'] = pd.to_numeric(df['value'])
        df.replace(to_replace=-99.990, value=np.nan, inplace=True)
        df = pd.DataFrame(df['value'])
        df = df.asfreq('D')
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]

        return df

    # elif stationNum == 5:
    #     """
    #     Mauna Loa CO2 Trend
    #     """
    #     url = 'ftp://aftp.cmdl.noaa.gov/products/trends/co2/co2_mm_mlo.txt'
    #     df = pd.read_csv(url, index_col=False, sep='\s+', comment='#', names=['year','month','time','average_molFrac','interpolated_molFrac','trend','days'])
    #     dates = [str(df['year'][i])+'-'+str(df['month'][i]) for i in df.index]
    #     df.set_index(pd.DatetimeIndex(pd.to_datetime(dates, format='%Y/%m')), inplace=True)
    #     df = pd.DataFrame(df['trend'])
    #     df = df.asfreq('D')
    #     df.fillna(method='ffill',inplace=True)
    #     df = df[df.index>=startDate]
    #     df = df[df.index<=endDate]
    #     return df

    elif stationNum == 'pdo':
        """
        Pacific Multidecadal Oscillation (PDO)
        """
        url = "https://www.ncdc.noaa.gov/teleconnections/pdo/data.json"
        response = requests.get(url)
        response = response.json()
        data = response['data']
        dates = [pd.to_datetime(i, format='%Y%m') for i in list(data.keys())]
        values = [float(val) for val in list(data.values())]
        values = [np.nan if x == -99.99 else x for x in values]
        df = pd.DataFrame(values, index=dates, columns=['PDO'])
        df = df.asfreq('D')
        df.fillna(method='ffill', inplace=True)
        df = df[df.index >= startDate]
        df = df[df.index <= endDate]
        return df

    else:
        return pd.DataFrame()