Example #1
0
def rotate(i_df,
           site,
           date):
    """Rotate XYZ to HDZ for selec sites, append
    to existing DataFrame and return

    Parameters
    ----------
    i_df : DataFrame
        IMAGE magnetometer data loaded with image.load()
    site : List[str]
        List of sites to rotate
    date : str or datetime-like
        Date to load declination for

    Returns
    -------
    i_df : DataFrame
        DataFrame with HD magnetic field coordinates
        and station cgm coordinates, lshell and 
        declination
    """
    dt = pd.to_datetime(date)

    for stn in site:
        stn_dat = utils.load_station_coor(param=stn, year=dt.year)
        # if the stn_dat can't be found don't rotate
        if stn_dat is None:
            return i_df
        dec = float(stn_dat['declination'])

        # some of the IMAGE magnetometers
        # have negative Z values. Z should
        # always positive. These mags are
        # likely measuring variations which
        # we can't calculate H and D for.
        if any(i_df[stn+'_Z'] < 0):
            i_df[stn+'_H'] = np.nan
            i_df[stn+'_D'] = np.nan
        else:
            h = i_df[stn+'_X'].astype(float) * np.cos(np.deg2rad(dec)) + \
                i_df[stn+'_Y'].astype(float) * np.sin(np.deg2rad(dec))
            d = i_df[stn+'_Y'].astype(float) * np.cos(np.deg2rad(dec)) - \
                i_df[stn+'_X'].astype(float) * np.sin(np.deg2rad(dec))

            i_df[stn+'_H'] = h
            i_df[stn+'_D'] = d

        # fill in station cooridinat info
        # this would be better as metadata
        # but not possible in pandas
        i_df[stn+'_declination'] = float(stn_dat['declination'])
        i_df[stn+'_cgmlat'] = float(stn_dat['cgm_latitude'])
        i_df[stn+'_cgmlon'] = float(stn_dat['cgm_longitude'])
        i_df[stn+'_lshell'] = float(stn_dat['lshell'])
        i_df[stn+'_mlt'] = float(stn_dat['mlt_midnight'])

    return i_df
Example #2
0
def rotate(i_df,
           site,
           date):
    """Rotate XYZ to HDZ for select sites, append
    to existing DataFrame and return

    Parameters
    ----------
    i_df : DataFrame
        CARISMA magnetometer data loaded with image.load()
    site : site to rotate
        List of sites to rotate
    date : str or datetime-like
        Date to load declination for

    Returns
    -------
    i_df : DataFrame
        DataFrame with HD magnetic field coordinates
        and station cgm coordinates, lshell and 
        declination
    """
    dt = pd.to_datetime(date)
    # get a list of column names
    c_name = list(i_df.columns.values)

    for stn in site:
        stn = stn.upper()
        if stn+'_X' not in c_name:
            continue

        stn_dat = utils.load_station_coor(param=stn, year=dt.year)
        # if the stn_dat can't be found don't rotate
        if stn_dat is None:
            return i_df
        dec = float(stn_dat['declination'])

        h = i_df[stn+'_X'].astype(float) * np.cos(np.deg2rad(dec)) + \
            i_df[stn+'_Y'].astype(float) * np.sin(np.deg2rad(dec))
        d = i_df[stn+'_Y'].astype(float) * np.cos(np.deg2rad(dec)) - \
            i_df[stn+'_X'].astype(float) * np.sin(np.deg2rad(dec))

        i_df[stn+'_H'] = h
        i_df[stn+'_D'] = d

        # fill in station cooridinat info
        # this would be better as metadata
        # but not possible in pandas
        i_df[stn+'_declination'] = float(stn_dat['declination'])
        i_df[stn+'_cgmlat'] = float(stn_dat['cgm_latitude'])
        i_df[stn+'_cgmlon'] = float(stn_dat['cgm_longitude'])
        i_df[stn+'_lshell'] = float(stn_dat['lshell'])
        i_df[stn+'_mlt'] = float(stn_dat['mlt_midnight'])

    return i_df
Example #3
0
def get_spec(site,
             sdate,
             edate=None,
             ndays=1,
             psd_dir=local_dir,
             fmin=0.80,
             fmax=15.01,
             add_omni: bool = False,
             omni_lags=[0],
             verbose: bool = False):
    """Returns a Pandas DataFrame containing the PSD spectrum, station name, 
    and magnetic local time (MLT) of the station for a set of dates. 

    Will loop through multiple stations if provided.
    
    Parameters
    ----------
    site : str
        String or array of strings with the station names 
        which the spectra will be loaded
    sdate : str of datetime-like
        Initial day to be loaded
    edate : [str of datetime-like
        Final day to be loaded
    ndays : int, optional
        Number of days to load if edate is not defined, by default 1
    psd_dir : str, optional
        Directory of PSD files, by default local_dir
    fmin : float, optional
        Minimum frequency (mHz) to load, by default 0.80
    fmax : float, optional
        Maximum frequency (mHz) to load, by default 15.01
    add_omni : bool, optional
        Add hourly omni data to data frame, by default False
    omni_lags : list, optional
        Add lagged omni data; list is a list of hourly lags to include,
        by default [0], only include the current hours omni data.
        This is attached to each spectra or row in the data frame
        and so the returned DataFrame can get large quite quickly if 
        looking at large lag times.
    verbose : bool, optional
        Print some simple messages, by default False
    
    Returns
    -------
    Pandas DataFrame
        DataFrame containing the spectrum, station, MLT, frequency resolution
    """
    if add_omni:
        if type(omni_lags) is int:
            omni_lags = [omni_lags]
        elif type(omni_lags) is np.ndarray:
            omni_lags = omni_lags.tolist()

        max_l = int(max(omni_lags) + 1)
        o_dat = omni.load(
            pd.to_datetime(sdate) - timedelta(hours=max_l),
            pd.to_datetime(edate) + timedelta(hours=max_l))

        print('Adding omni data with lags:{0}'.format(omni_lags))

    if type(site) is str:
        site = [site.upper()]

    # get a list of days to loop over
    if edate is not None:
        d_arr = pd.Series(pd.date_range(start=sdate, end=edate, freq='D'))
    else:
        d_arr = pd.Series(pd.date_range(start=sdate, periods=ndays, freq='D'))

    # convert fmin and fmax to Hz
    fmin = fmin / 1000.
    fmax = fmax / 1000.

    df_r = pd.DataFrame()

    for stn in site:
        yr = -1
        for di, dt in d_arr.iteritems():
            df_t = pd.Series(pd.date_range(dt, freq='H', periods=24))

            fn = '{0:04d}{1:02d}{2:02d}{3}_psd.txt.gz'.format(
                dt.year, dt.month, dt.day, stn.upper())

            fn = os.path.join(psd_dir, '{0:04d}'.format(dt.year),
                              '{0:02d}'.format(dt.month),
                              '{0:02d}'.format(dt.day), fn)

            # read in file, create an empty
            # data frame if file is not found
            try:
                df_in = pd.read_csv(fn, compression='gzip', header=0)
            except FileNotFoundError:
                if verbose:
                    print("File not found {0}".format(fn))
                continue

            if verbose:
                print("File loaded {0}".format(fn))
            # drop unnecessary columns
            df_in = df_in[(df_in.freq >= fmin) & (df_in.freq <= fmax)]
            freq = df_in['freq'].copy() * 1000.
            df_in = df_in.drop(
                labels=['freq', 'station', 'decl', 'L', 'cgm_lat', 'cgm_lon'],
                axis=1)
            # get frequency resolution
            f_res = freq.iloc[1] - freq.iloc[0]
            # trasnpose array and drop new index column
            # convert from nT^2/Hz  to nT^2/mHz
            df_in = df_in.transpose().reset_index()
            df_in = df_in.drop(labels=['index'], axis=1)
            df_in = df_in.div(1000.)
            # add time to the array
            df_in['t'] = df_t
            df_in['stn'] = stn.upper()
            df_in['f res mHz'] = f_res
            df_in = df_in.set_index('t')
            # add MLT to the array
            if dt.year != yr:
                print(stn, dt.year)
                stn_c = utils.load_station_coor(param=stn, year=dt.year)
            mlt = np.arange(0, 24)
            mlt = (mlt + 24 - float(stn_c['mlt_midnight'])) % 24.
            df_in['mlt'] = mlt

            df_r = df_r.append(df_in)
            yr = dt.year

        # append read in data
        if add_omni:
            df_in = df_in.join(o_dat, how='left')

    return df_r
Example #4
0
def sum_psd(site,
            sdate,
            edate=None,
            ndays=1,
            psd_dir=local_dir,
            fmin=0.80,
            fmax=15.01,
            add_omni: bool = False,
            omni_lags=[0],
            verbose: bool = False):
    """Returns a Pandas DataFrame containing the integrated and summed PSD.

    Will loop through multiple stations if provided.
    
    
    Parameters
    ----------
    site : str
        String or array of strings with the station names 
        which the spectra will be loaded
    sdate : str of datetime-like
        Initial day to be loaded
    edate : [str of datetime-like
        Final day to be loaded
    ndays : int, optional
        Number of days to load if edate is not defined, by default 1
    psd_dir : str, optional
        Directory of PSD files, by default local_dir
    fmin : float, optional
        Minimum frequency (mHz) to load, by default 0.80
    fmax : float, optional
        Maximum frequency (mHz) to load, by default 15.01
    add_omni : bool, optional
        Add hourly omni data to data frame, by default False
    omni_lags : list, optional
        Add lagged omni data; list is a list of hourly lags to include,
        by default [0], only include the current hours omni data.
        This is attached to each spectra or row in the data frame
        and so the returned DataFrame can get large quite quickly if 
        looking at large lag times.
    verbose : bool, optional
        Print some simple messages, by default False
    
    Returns
    -------
    Pandas DataFrame
        Date frame with integrated and summed PSD between fmin and fmax,
        MLT, L-shell, station.
    """
    if add_omni:
        if type(omni_lags) is int:
            omni_lags = [omni_lags]
        elif type(omni_lags) is np.ndarray:
            omni_lags = omni_lags.tolist()

        max_l = int(max(omni_lags) + 1)
        o_dat = omni.load(
            pd.to_datetime(sdate) - timedelta(hours=max_l),
            pd.to_datetime(edate) + timedelta(hours=max_l))

        print('Adding omni data with lags:{0}'.format(omni_lags))

    if type(site) is str:
        site = [site.upper()]

        # get a list of days to loop over
    if edate is not None:
        d_arr = pd.Series(pd.date_range(start=sdate, end=edate, freq='D'))
    else:
        d_arr = pd.Series(pd.date_range(start=sdate, periods=ndays, freq='D'))

    df_r = pd.DataFrame()

    for stn in site:
        df_psd = pd.DataFrame()
        yr = -1
        for di, dt in d_arr.iteritems():
            # hourly array for time axis
            df_t = pd.Series(pd.date_range(dt, freq='H', periods=24))

            fn = '{0:04d}{1:02d}{2:02d}{3}_psd.txt.gz'.format(
                dt.year, dt.month, dt.day, stn.upper())

            fn = os.path.join(psd_dir, '{0:04d}'.format(dt.year),
                              '{0:02d}'.format(dt.month),
                              '{0:02d}'.format(dt.day), fn)

            # read in file, create an empty
            # data frame if file is not found
            try:
                df_in = pd.read_csv(fn, compression='gzip', header=0)
            except FileNotFoundError:
                if verbose:
                    print("File not found: {0}".format(fn))
                sp = pd.DataFrame({'t': df_t})
                sp['ipsd'] = np.nan
                sp['spsd'] = np.nan
                sp['mlt'] = np.nan
                sp['lshell'] = np.nan

                df_psd = df_psd.append(sp, ignore_index=True, sort=True)
                continue

            if verbose:
                print("File loaded: {0}".format(fn))
            # read in station coordinates
            if dt.year != yr:
                print(stn, dt.year)
                stn_c = utils.load_station_coor(param=stn, year=dt.year)

            # convert frequency to mHz
            df_in['freq'] = df_in['freq'] * 1000.

            # find frequency range
            gf = df_in['freq'].between(fmin, fmax)
            gd = df_in[gf]

            # sum power and convert from
            # nT^2/Hz to nT^2/mHz
            sx = gd.sum(axis=0, min_count=gd.shape[0])['H00':'H23']
            sx = sx / 1000.
            # integrate power
            ix = sx / (gd['freq'].max() - gd['freq'].min())

            # fill data frame
            mlt = np.arange(0, 24)
            mlt = (mlt + 24 - float(stn_c['mlt_midnight'])) % 24.
            sp = pd.DataFrame({
                't': df_t,
                'ipsd': ix.values,
                'spsd': sx.values,
                'lshell': float(stn_c['lshell']),
                'mlt': mlt
            })

            df_psd = df_psd.append(sp, ignore_index=True, sort=True)
            yr = dt.year

        if df_psd.empty:
            continue

        df_psd['ipsd'] = pd.to_numeric(df_psd['ipsd'])
        df_psd['spsd'] = pd.to_numeric(df_psd['spsd'])
        df_psd['stn'] = stn.upper()
        df_psd = df_psd.sort_values(by=['t']).reset_index(drop=True)
        df_psd = df_psd.set_index('t')
        df_psd = df_psd.dropna(subset=['ipsd'])

        # add omni data and lagged
        # omni if needed to each station
        if add_omni:
            o_t = o_dat.index
            o_c = o_dat.columns
            for lags in omni_lags:
                if lags != 0:
                    o_dat['t'] = o_t + timedelta(hours=lags)
                    o_dat = o_dat.set_index('t')
                    o_dat.columns = o_c + '_{0}'.format(lags)
                df_psd = df_psd.join(o_dat, how='left', sort=True)

        # getting pandas warning here
        # do we need to sort here?
        if df_r.empty:
            df_r = df_psd.copy()
        else:
            df_r = df_r.append(df_psd)

    return df_r