Python DataFrame Examples, pysat.DataFrame Python Examples

Example #1

0

Show file

def load(cosmicFiles, tag=None, sat_id=None, altitude_bin=None):
    """
    cosmic data load routine, called by pysat
    """
    num = len(cosmicFiles)
    # make sure there are files to read
    if num != 0:
        # call separate load_files routine, segemented for possible
        # multiprocessor load, not included and only benefits about 20%
        output = pysat.DataFrame(
            load_files(cosmicFiles,
                       tag=tag,
                       sat_id=sat_id,
                       altitude_bin=altitude_bin))
        output.index = pysat.utils.create_datetime_index(
            year=output.year,
            month=output.month,
            day=output.day,
            uts=output.hour * 3600. + output.minute * 60. + output.second)
        # make sure UTS strictly increasing
        output.sort_index(inplace=True)
        # use the first available file to pick out meta information
        profile_meta = pysat.Meta()
        meta = pysat.Meta()
        ind = 0
        repeat = True
        while repeat:
            try:
                data = netcdf_file(cosmicFiles[ind], mode='r', mmap=False)
                keys = data.variables.keys()
                for key in keys:
                    profile_meta[key] = {
                        'units': data.variables[key].units,
                        'long_name': data.variables[key].long_name
                    }
                # ncattrsList = data.ncattrs()
                ncattrsList = data._attributes.keys()
                for d in ncattrsList:
                    meta[d] = {'units': '', 'long_name': d}
                repeat = False
            except RuntimeError:
                # file was empty, try the next one by incrementing ind
                ind += 1
        meta['profiles'] = profile_meta
        return output, meta
    else:
        # no data
        return pysat.DataFrame(None), pysat.Meta()

Example #2

0

Show file

File: pysat_testing.py Project: yangjian615/pysat

def load(fnames, tag=None, sat_id=None):
    # create an artifical satellite data set
    parts = fnames[0].split('/')
    yr = int('20' + parts[-1][0:2])
    month = int(parts[-3])
    day = int(parts[-2])
    date = pysat.datetime(yr, month, day)
    num = 86400  #int(tag)
    uts = np.arange(num)
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    time_delta = date - pysat.datetime(2009, 1, 1)
    uts_root = np.mod(time_delta.total_seconds(), 5820)
    mlt = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.)
    data['mlt'] = mlt

    # do slt, 20 second offset from mlt
    uts_root = np.mod(time_delta.total_seconds() + 20, 5820)
    data['slt'] = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.)

    index = pds.date_range(date,
                           date +
                           pds.DateOffset(hours=23, minutes=59, seconds=59),
                           freq='S')
    data.index = index
    data.index.name = 'time'
    return data, meta.copy()

Example #3

0

Show file

File: test_custom.py Project: raniabe31/pysat

 def custom1(inst):
     out = pysat.DataFrame({'doubleMLT': inst.data.mlt * 2,
                            'tripleMLT': inst.data.mlt * 3},
                           index=inst.index)
     return {'data': out,
             'long_name': ['doubleMLTlong', 'tripleMLTlong'],
             'units': ['hours1', 'hours2']}

Example #4

0

Show file

 def custom1(inst):
     out = pysat.DataFrame(
         {
             'doubleMLT': inst.data.mlt * 2,
             'tripleMLT': inst.data.mlt * 3
         },
         index=inst.index)
     return out

Example #5

0

Show file

File: nasa_cdaweb_methods.py Project: jcspence/pysat-1

def load(fnames,
         tag=None,
         sat_id=None,
         fake_daily_files_from_monthly=False,
         flatten_twod=True):
    """Load NASA CDAWeb CDF files

    Parameters
    ------------
    fnames : (pandas.Series)
        Series of filenames
    tag : (str or NoneType)
        tag or None (default=None)
    sat_id : (str or NoneType)
        satellite id or None (default=None)
    fake_daily_files_from_monthly : bool
        Some CDAWeb instrument data files are stored by month, interfering
        with pysat's functionality of loading by day. This flag, when true,
        parses of daily dates to monthly files that were added internally
        by the list_files routine, when flagged. These dates are
        used here to provide data by day. 

    Returns
    ---------
    data : (pandas.DataFrame)
        Object containing satellite data
    meta : (pysat.Meta)
        Object containing metadata such as column names and units
        
    """

    import pysatCDF

    if len(fnames) <= 0:
        return pysat.DataFrame(None), None
    else:
        # going to use pysatCDF to load the CDF and format
        # data and metadata for pysat using some assumptions.
        # Depending upon your needs the resulting pandas DataFrame may
        # need modification
        # currently only loads one file, which handles more situations via pysat
        # than you may initially think

        if fake_daily_files_from_monthly:
            # parse out date from filename
            fname = fnames[0][0:-11]
            date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d')
            with pysatCDF.CDF(fname) as cdf:
                # convert data to pysat format
                data, meta = cdf.to_pysat(flatten_twod=flatten_twod)
                # select data from monthly
                data = data.ix[date:date + pds.DateOffset(days=1) -
                               pds.DateOffset(microseconds=1), :]
                return data, meta
        else:
            # basic data return
            with pysatCDF.CDF(fnames[0]) as cdf:
                return cdf.to_pysat(flatten_twod=flatten_twod)

Example #6

0

Show file

def load_files(files, tag=None, sat_id=None, altitude_bin=None):
    '''Loads a list of COSMIC data files, supplied by user.
    
    Returns a list of dicts, a dict for each file.
    '''

    output = [None] * len(files)
    drop_idx = []
    for (i, file) in enumerate(files):
        try:
            #data = netCDF4.Dataset(file)
            data = netcdf_file(file, mode='r', mmap=False)
            # build up dictionary will all ncattrs
            new = {}
            # get list of file attributes
            #ncattrsList = data.ncattrs()
            ncattrsList = data._attributes.keys()
            for d in ncattrsList:
                new[d] = data._attributes[d]  #data.getncattr(d)
            # load all of the variables in the netCDF
            loadedVars = {}
            keys = data.variables.keys()
            for key in keys:
                if data.variables[key][:].dtype.byteorder != '=':
                    loadedVars[key] = data.variables[key][:].byteswap(
                    ).newbyteorder()
                else:
                    loadedVars[key] = data.variables[key][:]

            new['profiles'] = pysat.DataFrame(loadedVars)

            output[i] = new
            data.close()
        except RuntimeError:
            # some of the files have zero bytes, which causes a read error
            # this stores the index of these zero byte files so I can drop
            # the Nones the gappy file leaves behind
            drop_idx.append(i)

    # drop anything that came from the zero byte files
    drop_idx.reverse()
    for i in drop_idx:
        del output[i]

    if tag == 'ionprf':
        if altitude_bin is not None:
            for out in output:
                out['profiles'].index = (
                    out['profiles']['MSL_alt'] /
                    altitude_bin).round().values * altitude_bin
                out['profiles'] = out['profiles'].groupby(
                    out['profiles'].index.values).mean()
        else:
            for out in output:
                out['profiles'].index = out['profiles']['MSL_alt']

    return output

Example #7

0

Show file

def load(fnames, tag='survey', sat_id=''):
    """ Load DEMETER IAP data

    Parameters
    ----------
    fnames : (list)
        List of file names
    tag : (string)
        Denotes type of file to load.  Accepted types are 'survey'; 'burst'
        will be added in the future.  (default='survey')
    sat_id : (string or NoneType)
        Specifies the satellite ID for a constellation.  Not used.
        (default='')

    Returns
    -------
    data : (pds.DataFrame)
        DataFrame of DEMETER satellite data
    meta : 
        Metadata object

    """

    if len(fnames) == 0:
        print('need list of filenames')
        return pysat.DataFrame(None), None

    # Load the desired data and cast as a DataFrame
    data = list()
    for fname in fnames:
        fdata, fmeta = demeter_methods.load_binary_file(
            fname, load_experiment_data)
        data.extend(fdata)

    data = np.vstack(data)
    data = pysat.DataFrame(data, index=data[:, 3], columns=fmeta['data names'])

    # Assign metadata
    if len(data.columns) > 0:
        meta = demeter_methods.set_metadata(name, fmeta)
    else:
        meta = pysat.Meta(None)

    return data, meta

Example #8

0

Show file

def load_orig(fnames, tag=None):
    import pydarn
    if len(fnames) <= 0:
        return pysat.DataFrame(None), pysat.Meta(None)
    elif len(fnames) == 1:
        b = pydarn.sdio.sdDataOpen(pysat.datetime(1980, 1, 1),
                                   src='local',
                                   eTime=pysat.datetime(2050, 1, 1),
                                   fileName=fnames[0])

        data_list = pydarn.sdio.sdDataReadAll(b)
        sys.stdout.flush()
        in_dict = []
        for info in data_list:
            arr = np.arange(len(info.stid))
            drift_frame = pds.DataFrame(
                info.vector.__dict__,
                #index=[info.vector.mlon, info.vector.mlat])
                index=info.vector.index)
            drift_frame.index.name = 'index'
            drift_frame.sort(inplace=True)
            #drift_frame.index.names=['mlon', 'mlat']
            for i in arr:
                nvec = info.nvec[i]
                in_frame = drift_frame.iloc[0:nvec]
                drift_frame = drift_frame.iloc[nvec:]
                in_dict.append({
                    'stid': info.stid[i],
                    'channel': info.channel[i],
                    'noisemean': info.noisemean[i],
                    'noisesd': info.noisesd[i],
                    'gsct': info.gsct[i],
                    'nvec': info.nvec[i],
                    'pmax': info.pmax[i],
                    'vector': in_frame,
                    'start_time': info.sTime,
                    'end_time': info.eTime,
                    'vemax': info.vemax[i],
                    'vemin': info.vemin[i],
                    'pmin': info.pmin[i],
                    'programid': info.programid[i],
                    'wmax': info.wmax[i],
                    'wmin': info.wmin[i],
                    'freq': info.freq[i]
                })
        output = pds.DataFrame(in_dict)
        output.index = output.start_time
        output.drop('start_time', axis=1, inplace=True)
        return output, pysat.Meta()
    else:
        raise ValueError('Only one filename currently supported.')

Example #9

0

Show file

File: omni_hro.py Project: yangjian615/pysat

def load(fnames, tag=None, sat_id=None):
    import pysatCDF
    
    if len(fnames) <= 0 :
        return pysat.DataFrame(None), None
    else:
        # pull out date appended to filename
        fname = fnames[0][0:-11]
        date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d')
        with pysatCDF.CDF(fname) as cdf:
            data, meta = cdf.to_pysat()
            # pick out data for date
            data = data.ix[date:date+pds.DateOffset(days=1) - pds.DateOffset(microseconds=1)] 
            return data, meta

Example #10

0

Show file

def load(fnames, tag=None, inst_id=None):
    """Load CHAIN GPS Files
    Parameters
    ----------
    fnames : (list or array-like)
        series of filenames to be loaded
    tag : (string or NoneType)
        Denotes type of file to load.
        (default=None)
    inst_id : (string or NoneType)
        Specifies the satellite ID for a constellation.  Not used.
        (default=None)
    """

    if not fnames:
        print('fnames are go')
        warnings.warn('You no got no fname, you no get no data')
        return pysat.DataFrame(None), pysat.Meta(None)
    elif len(fnames) == 1:
        print('fnames are greater than 1')
        meta = pysat.Meta()
        signal_meta = pysat.Meta()

        # load the rinex
        data = gr.load(fnames[0])
        # get the metadata from the xarray.Dataset
        xr_attrs = data.attrs
        # format the metadata
        #        for at in xr_attrs:
        #            print(type(at))
        #            meta[at] = {'units': '', 'long_name': at}
        #        keys = data.data_vars.keys()
        #        for key in keys:
        #            # find the xarray equivalents of units and long_name
        #            print(key)
        #            signal_meta[key] = {meta.labels.units: var_units[key][0],
        #                                meta.labels.name: var_units[key][1]}
        #        # format the data
        #        meta['signals'] = signal_meta
        #        output = data.variables
        return data, meta
    else:
        raise ValueError('Only one filename currently supported')
    print('no idea wahts goin on')

Example #11

0

Show file

File: pysat_testing2d.py Project: jcspence/pysat-1

def load(fnames, tag=None, sat_id=None):
    # create an artifical satellite data set
    parts = fnames[0].split('/')
    yr = int('20' + parts[-1][0:2])
    month = int(parts[-3])
    day = int(parts[-2])
    date = pysat.datetime(yr, month, day)
    num = 864  #int(tag)
    uts = np.arange(num)
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    time_delta = date - pysat.datetime(2009, 1, 1)
    uts_root = np.mod(time_delta.total_seconds(), 5820)
    mlt = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.)
    data['mlt'] = mlt

    # do slt, 20 second offset from mlt
    uts_root = np.mod(time_delta.total_seconds() + 20, 5820)
    data['slt'] = np.mod(uts_root + np.arange(num), 5820) * (24. / 5820.)

    index = pds.date_range(date,
                           date +
                           pds.DateOffset(hours=23, minutes=59, seconds=59),
                           freq='100S')
    data.index = index
    data.index.name = 'epoch'

    profiles = []
    frame = pds.DataFrame(
        {
            'density': data.ix[0:50, 'mlt'].values.copy(),
            'dummy_str': ['test'] * 50,
            'dummy_ustr': [u'test'] * 50
        },
        index=data.index[0:50],
        columns=['density', 'dummy_str', 'dummy_ustr'])
    for time in data.index:
        profiles.append(frame)
    data['profiles'] = pds.Series(profiles, index=data.index)

    return data, meta.copy()

Example #12

0

Show file

def load_files(files, tag=None, sat_id=None):
    '''Loads a list of COSMIC data files, supplied by user.
    
    Returns a list of dicts, a dict for each file.
    '''

    output = [None] * len(files)
    drop_idx = []
    for (i, file) in enumerate(files):
        try:
            data = netCDF4.Dataset(file)
            # build up dictionary will all ncattrs
            new = {}
            # get list of file attributes
            ncattrsList = data.ncattrs()
            for d in ncattrsList:
                new[d] = data.getncattr(d)
            # load all of the variables in the netCDF
            loadedVars = {}
            keys = data.variables.keys()
            for key in keys:
                loadedVars[key] = data.variables[key][:]
            new['profiles'] = pysat.DataFrame(loadedVars)
            if tag == 'ionprf':
                new['profiles'].index = new['profiles']['MSL_alt']
            output[i] = new
            data.close()
        except RuntimeError:
            # some of the S4 files have zero bytes, which causes a read error
            # this stores the index of these zero byte files so I can drop
            # the Nones the gappy file leaves behind
            drop_idx.append(i)

    # drop anything that came from the zero byte files
    drop_idx.reverse()
    for i in drop_idx:
        del output[i]
    return output

Example #13

0

Show file

def load(fnames,
         tag=None,
         sat_id=None,
         sim_multi_file_right=False,
         sim_multi_file_left=False,
         root_date=None,
         file_date_range=None,
         malformed_index=False,
         mangle_file_dates=False):
    """ Loads the test files

    Parameters
    ----------
    fnames : list
        List of filenames
    tag : str or NoneType
        Instrument tag (accepts '')
    sat_id : str or NoneType
        Instrument satellite ID (accepts '' or a number (i.e., '10'), which
        specifies the number of data points to include in the test instrument)
    sim_multi_file_right : boolean
        Adjusts date range to be 12 hours in the future or twelve hours beyond
        root_date (default=False)
    sim_multi_file_left : boolean
        Adjusts date range to be 12 hours in the past or twelve hours before
        root_date (default=False)
    root_date : NoneType
        Optional central date, uses _test_dates if not specified.
        (default=None)
    file_date_range : pds.date_range or NoneType
        Range of dates for files or None, if this optional arguement is not
        used. Shift actually performed by the init function.
        (default=None)
    malformed_index : boolean
        If True, time index will be non-unique and non-monotonic.
    mangle_file_dates : bool
        If True, the loaded file list time index is shifted by 5-minutes.
        This shift is actually performed by the init function.

    Returns
    -------
    data : (pds.DataFrame)
        Testing data
    meta : (pysat.Meta)
        Metadataxs

    """

    # create an artifical satellite data set
    iperiod = mm_test.define_period()
    drange = mm_test.define_range()
    uts, index, date = mm_test.generate_times(fnames, sat_id, freq='1S')

    # Specify the date tag locally and determine the desired date range
    pds_offset = pds.DateOffset(hours=12)
    if sim_multi_file_right:
        root_date = root_date or _test_dates[''][''] + pds_offset
    elif sim_multi_file_left:
        root_date = root_date or _test_dates[''][''] - pds_offset
    else:
        root_date = root_date or _test_dates['']['']

    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit default
    # to 1 Jan 2009, 00:00 UT. 14.84 orbits per day
    time_delta = date - root_date
    data['mlt'] = mm_test.generate_fake_data(time_delta.total_seconds(),
                                             uts,
                                             period=iperiod['lt'],
                                             data_range=drange['lt'])

    # do slt, 20 second offset from mlt
    data['slt'] = mm_test.generate_fake_data(time_delta.total_seconds() + 20,
                                             uts,
                                             period=iperiod['lt'],
                                             data_range=drange['lt'])

    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    data['longitude'] = mm_test.generate_fake_data(time_delta.total_seconds(),
                                                   uts,
                                                   period=iperiod['lon'],
                                                   data_range=drange['lon'])

    # create latitude area for testing polar orbits
    angle = mm_test.generate_fake_data(time_delta.total_seconds(),
                                       uts,
                                       period=iperiod['angle'],
                                       data_range=drange['angle'])
    data['latitude'] = 90.0 * np.cos(angle)

    # fake orbit number
    fake_delta = date - (_test_dates[''][''] - pds.DateOffset(years=1))
    data['orbit_num'] = mm_test.generate_fake_data(fake_delta.total_seconds(),
                                                   uts,
                                                   period=iperiod['lt'],
                                                   cyclic=False)

    # create some fake data to support testing of averaging routines
    mlt_int = data['mlt'].astype(int)
    long_int = (data['longitude'] / 15.0).astype(int)
    if tag == 'ascend':
        data['dummy1'] = [i for i in range(len(data['mlt']))]
    elif tag == 'descend':
        data['dummy1'] = [-i for i in range(len(data['mlt']))]
    elif tag == 'plus10':
        data['dummy1'] = [i + 10 for i in range(len(data['mlt']))]
    elif tag == 'fives':
        data['dummy1'] = [5 for i in range(len(data['mlt']))]
    elif tag == 'mlt_offset':
        data['dummy1'] = mlt_int + 5
    else:
        data['dummy1'] = mlt_int
    data['dummy2'] = long_int
    data['dummy3'] = mlt_int + long_int * 1000.0
    data['dummy4'] = uts
    data['string_dummy'] = ['test'] * len(data)
    data['unicode_dummy'] = [u'test'] * len(data)
    data['int8_dummy'] = np.ones(len(data), dtype=np.int8)
    data['int16_dummy'] = np.ones(len(data), dtype=np.int16)
    data['int32_dummy'] = np.ones(len(data), dtype=np.int32)
    data['int64_dummy'] = np.ones(len(data), dtype=np.int64)

    if malformed_index:
        index = index.tolist()
        # nonmonotonic
        index[0:3], index[3:6] = index[3:6], index[0:3]
        # non unique
        index[6:9] = [index[6]] * 3

    data.index = index
    data.index.name = 'Epoch'
    return data, meta.copy()

Example #14

0

Show file

File: champ_star.py Project: lisatibbetts/pysat

def load(fnames, tag=None, sat_id=None):
    """Load CHAMP STAR files

    Parameters
    ------------
    fnames : (pandas.Series)
        Series of filenames
    tag : (str or NoneType)
        tag or None (default=None)
    sat_id : (str or NoneType)
        satellite id or None (default=None)

    Returns
    ---------
    data : (pandas.DataFrame)
        Object containing satellite data
    meta : (pysat.Meta)
        Object containing metadata such as column names and units
    """
    import re
    if len(fnames) <= 0:
        return pysat.DataFrame(None), pysat.Meta(None)

    if isinstance(fnames, str):
        fnames = [fnames]

    # Define the CHAMP STAR data types by column
    champ_labels = {
        'Two-digit Year (years)':
        "year",
        'Day of the Year (days)':
        "doy",
        'Second of the Day (GPS time,sec)':
        "sod",
        'Center Latitude of 3-degree Bin (deg)':
        "bin_lat",
        'Satellite Geodetic Latitude (deg)':
        "sat_glat",
        'Satellite Longitude (deg)':
        "sat_lon",
        'Satellite Height (km)':
        "sat_h",
        'Satellite Local Time (hours)':
        "sat_lt",
        'Satellite Quasi-Dipole Latitude (deg)':
        "sat_qdlat",
        'Satellite Magnetic Longitude (deg)':
        "sat_mlon",
        'Satellite Magnetic Local Time (hours)':
        "sat_mlt",
        'Neutral Density (kg/m^3)':
        "ndens",
        'Neutral Density Normalized to 400km using NRLMSISe00':
        "ndens400",
        'Neutral Density Normalized to 410km using NRLMSISe00':
        "ndens410",
        'NRLMSISe00 Neutral Density at Satellite Height':
        "nrlmsis_ndens",
        'Uncertainty in Neutral Density (kg/m^3)':
        "ndens_err",
        'Number of Data Points in Current Averaging Bin':
        "npnts",
        ' '.join(('Number of Points in Current Averaging Bin that', 'Required Interpolation')):
        "npnts_interp",
        ' '.join(('Average Coefficient of Drag Used in Current', 'Averaging Bin')):
        "avg_drag_coeff",
    }

    champ_dtypes = {
        'year': np.int32,
        'doy': np.int32,
        'sod': float,
        'bin_lat': float,
        'sat_glat': float,
        'sat_lon': float,
        'sat_h': float,
        'sat_lt': float,
        'sat_qdlat': float,
        'sat_mlon': float,
        'sat_mlt': float,
        'ndens': float,
        'ndens400': float,
        'ndens410': float,
        'nrlmsis_ndens': float,
        'ndens_err': float,
        'npnts': int,
        'npnts_interp': float,
        'avg_drag_coeff': float,
    }

    champ_units = {
        'year': "2-digit years",
        'doy': "day of year",
        'sod': "seconds of day",
        'bin_lat': "degrees",
        'sat_glat': "degrees",
        'sat_lon': "degrees",
        'sat_h': "km",
        'sat_lt': "hours",
        'sat_qdlat': "degrees",
        'sat_mlon': "degrees",
        'sat_mlt': "hours",
        'ndens': "km m^{-3}",
        'ndens400': "km m^{-3}",
        'ndens410': "km m^{-3}",
        'nrlmsis_ndens': "km m^{-3}",
        'ndens_err': "km m^{-3}",
        'npnts': "number",
        'npnts_interp': "number",
        'avg_drag_coeff': "unitless",
    }

    # Define the routine needed to create datetime object from the
    # CHAMP time (YY DDD SSSSS)
    def parse_champdate(y, d, s):
        '''parse CHAMP date string (YY DDD SSSSS) into a datetime object
        '''
        import datetime as dt

        t = dt.datetime.strptime("{:02d} {:03d}".format(int(y), int(d)),
                                 "%y %j")
        fsec = float(s)
        isec = np.floor(fsec)
        microsec = (fsec - isec) * 1.0e6
        t += dt.timedelta(seconds=isec, microseconds=microsec)
        return (t)

    # The header is formatted differently from the rest of the file, read it in
    # first to obtain the necessary meta data
    f = open(fnames[0], "r")
    hdata = re.split(";|\n", f.readline())
    f.close()
    try:
        hdata.pop(hdata.index(''))
    except:
        pass

    # If there are files, read in the data
    data = pds.read_csv(fnames[0],
                        delim_whitespace=True,
                        skiprows=2,
                        header=None,
                        names=[champ_labels[h] for h in hdata],
                        keep_date_col=True,
                        index_col='datetime',
                        parse_dates={'datetime': [0, 1, 2]},
                        date_parser=parse_champdate)

    # Initialize the meta data
    meta = pysat.Meta()

    # Because the native dtype declaration interferred with datetime indexing,
    # define the data types here.  Also set the meta data
    for h in hdata:
        col = champ_labels[h]
        data[col].astype(champ_dtypes[col])
        meta[col] = {"units": champ_units[col], "long_name": h}

    # Return data frame and metadata object
    return data, meta

Example #15

0

Show file

File: pysat_testing2d.py Project: raniabe31/pysat

def load(fnames, tag=None, sat_id=None, malformed_index=False):
    """ Loads the test files

    Parameters
    ----------
    fnames : (list)
        List of filenames
    tag : (str or NoneType)
        Instrument tag (accepts '' or a number (i.e., '10'), which specifies
        the number of times to include in the test instrument)
    sat_id : (str or NoneType)
        Instrument satellite ID (accepts '')
    malformed_index : bool (False)
        If True, the time index will be non-unique and non-monotonic. 

    Returns
    -------
    data : (pds.DataFrame)
        Testing data
    meta : (pysat.Meta)
        Metadataxs

    """

    # create an artifical satellite data set
    parts = os.path.split(fnames[0])[-1].split('-')
    yr = int(parts[0])
    month = int(parts[1])
    day = int(parts[2][0:2])
    date = pysat.datetime(yr, month, day)
    # scalar divisor below used to reduce the number of time samples
    # covered by the simulation per day. The higher the number the lower
    # the number of samples (86400/scalar)
    scalar = 100
    num = 86400 / scalar
    # basic time signal in UTS
    uts = np.arange(num) * scalar
    num_array = np.arange(num) * scalar
    # seed DataFrame with UT array
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    # figure out how far in time from the root start
    # use that info to create a signal that is continuous from that start
    # going to presume there are 5820 seconds per orbit (97 minute period)
    time_delta = date - pysat.datetime(2009, 1, 1)
    # mlt runs 0-24 each orbit.
    data['mlt'] = test.generate_fake_data(time_delta.total_seconds(),
                                          np.arange(num) * scalar,
                                          period=5820,
                                          data_range=[0.0, 24.0])
    # do slt, 20 second offset from mlt
    data['slt'] = test.generate_fake_data(time_delta.total_seconds() + 20,
                                          np.arange(num) * scalar,
                                          period=5820,
                                          data_range=[0.0, 24.0])
    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    data['longitude'] = test.generate_fake_data(time_delta.total_seconds(),
                                                num_array,
                                                period=6240,
                                                data_range=[0.0, 360.0])
    # create latitude signal for testing polar orbits
    angle = test.generate_fake_data(time_delta.total_seconds(),
                                    num_array,
                                    period=5820,
                                    data_range=[0.0, 2.0 * np.pi])
    data['latitude'] = 90.0 * np.cos(angle)

    # create real UTC time signal
    index = pds.date_range(date,
                           date +
                           pds.DateOffset(hours=23, minutes=59, seconds=59),
                           freq=str(scalar) + 'S')
    if malformed_index:
        index = index[0:num].tolist()
        # nonmonotonic
        index[0:3], index[3:6] = index[3:6], index[0:3]
        # non unique
        index[6:9] = [index[6]] * 3

    data.index = index
    data.index.name = 'epoch'
    # higher rate time signal (for scalar >= 2)
    # this time signal used for 2D profiles associated with each time in main
    # DataFrame
    high_rate_template = pds.date_range(
        date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S')

    # create a few simulated profiles
    # DataFrame at each time with mixed variables
    profiles = []
    # DataFrame at each time with numeric variables only
    alt_profiles = []
    # Serie at each time, numeric data only
    series_profiles = []
    # frame indexed by date times
    frame = pds.DataFrame(
        {
            'density': data.loc[data.index[0:50], 'mlt'].values.copy(),
            'dummy_str': ['test'] * 50,
            'dummy_ustr': [u'test'] * 50
        },
        index=data.index[0:50],
        columns=['density', 'dummy_str', 'dummy_ustr'])
    # frame indexed by float
    dd = np.arange(50) * 1.2
    ff = np.arange(50) / 50.
    ii = np.arange(50) * 0.5
    frame_alt = pds.DataFrame({
        'density': dd,
        'fraction': ff
    },
                              index=ii,
                              columns=['density', 'fraction'])
    # series version of storage
    series_alt = pds.Series(dd, index=ii, name='series_profiles')

    for time in data.index:
        frame.index = high_rate_template + (time - data.index[0])
        profiles.append(frame)
        alt_profiles.append(frame_alt)
        series_profiles.append(series_alt)
    # store multiple data types into main frame
    data['profiles'] = pds.Series(profiles, index=data.index)
    data['alt_profiles'] = pds.Series(alt_profiles, index=data.index)
    data['series_profiles'] = pds.Series(series_profiles, index=data.index)
    return data, meta.copy()

Example #16

0

Show file

def load(fnames, tag=None, sat_id=None, malformed_index=False):
    """ Loads the test files

    Parameters
    ----------
    fnames : list
        List of filenames
    tag : str or NoneType
        Instrument tag (accepts '')
    sat_id : str or NoneType
        Instrument satellite ID (accepts '' or a number (i.e., '10'), which
        specifies the number of data points to include in the test instrument)
    malformed_index : bool
        If True, the time index will be non-unique and non-monotonic.
        (default=False)

    Returns
    -------
    data : pds.DataFrame
        Testing data
    meta : pysat.Meta
        Metadataxs

    """

    # create an artifical satellite data set
    iperiod = mm_test.define_period()
    drange = mm_test.define_range()
    # Using 100s frequency for compatibility with seasonal analysis unit tests
    uts, index, date = mm_test.generate_times(fnames, sat_id, freq='100S')
    # seed DataFrame with UT array
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    # figure out how far in time from the root start
    # use that info to create a signal that is continuous from that start
    # going to presume there are 5820 seconds per orbit (97 minute period)
    time_delta = date - pysat.datetime(2009, 1, 1)
    # mlt runs 0-24 each orbit.
    data['mlt'] = mm_test.generate_fake_data(time_delta.total_seconds(),
                                             uts,
                                             period=iperiod['lt'],
                                             data_range=drange['lt'])
    # do slt, 20 second offset from mlt
    data['slt'] = mm_test.generate_fake_data(time_delta.total_seconds() + 20,
                                             uts,
                                             period=iperiod['lt'],
                                             data_range=drange['lt'])
    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    data['longitude'] = mm_test.generate_fake_data(time_delta.total_seconds(),
                                                   uts,
                                                   period=iperiod['lon'],
                                                   data_range=drange['lon'])
    # create latitude signal for testing polar orbits
    angle = mm_test.generate_fake_data(time_delta.total_seconds(),
                                       uts,
                                       period=iperiod['angle'],
                                       data_range=drange['angle'])
    data['latitude'] = 90.0 * np.cos(angle)

    if malformed_index:
        index = index.tolist()
        # nonmonotonic
        index[0:3], index[3:6] = index[3:6], index[0:3]
        # non unique
        index[6:9] = [index[6]] * 3

    data.index = index
    data.index.name = 'epoch'
    # higher rate time signal (for scalar >= 2)
    # this time signal used for 2D profiles associated with each time in main
    # DataFrame
    high_rate_template = pds.date_range(
        date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S')

    # create a few simulated profiles
    # DataFrame at each time with mixed variables
    profiles = []
    # DataFrame at each time with numeric variables only
    alt_profiles = []
    # Serie at each time, numeric data only
    series_profiles = []
    # frame indexed by date times
    frame = pds.DataFrame(
        {
            'density': data.loc[data.index[0:50], 'mlt'].values.copy(),
            'dummy_str': ['test'] * 50,
            'dummy_ustr': [u'test'] * 50
        },
        index=data.index[0:50],
        columns=['density', 'dummy_str', 'dummy_ustr'])
    # frame indexed by float
    dd = np.arange(50) * 1.2
    ff = np.arange(50) / 50.
    ii = np.arange(50) * 0.5
    frame_alt = pds.DataFrame({
        'density': dd,
        'fraction': ff
    },
                              index=ii,
                              columns=['density', 'fraction'])
    # series version of storage
    series_alt = pds.Series(dd, index=ii, name='series_profiles')

    for time in data.index:
        frame.index = high_rate_template + (time - data.index[0])
        profiles.append(frame)
        alt_profiles.append(frame_alt)
        series_profiles.append(series_alt)
    # store multiple data types into main frame
    data['profiles'] = pds.Series(profiles, index=data.index)
    data['alt_profiles'] = pds.Series(alt_profiles, index=data.index)
    data['series_profiles'] = pds.Series(series_profiles, index=data.index)
    return data, meta.copy()

Example #17

0

Show file

def load_files(files, tag=None, sat_id=None, altitude_bin=None):
    """Load COSMIC data files directly from a given list.

    May be directly called by user, but in general is called by load.  This is
    separate from the main load function for future support of multiprocessor
    loading.

    Parameters
    ----------
    files : (pandas.Series)
        Series of filenames
    tag : (str or NoneType)
        tag or None (default=None)
    sat_id : (str or NoneType)
        satellite id or None (default=None)
    altitude_bin : integer
        Number of kilometers to bin altitude profiles by when loading.
        Currently only supported for tag='ionprf'.

    Returns
    -------
    output : (list of dicts, one per file)
        Object containing satellite data

    """
    output = [None] * len(files)
    drop_idx = []
    for (i, file) in enumerate(files):
        try:
            data = netCDF4.Dataset(file)
            # build up dictionary will all ncattrs
            new = {}
            # get list of file attributes
            ncattrsList = data.ncattrs()
            for d in ncattrsList:
                new[d] = data.getncattr(d)
            # load all of the variables in the netCDF
            loadedVars = {}
            keys = data.variables.keys()
            for key in keys:
                if data.variables[key][:].dtype.byteorder != '=':
                    loadedVars[key] = \
                        data.variables[key][:].byteswap().newbyteorder()
                else:
                    loadedVars[key] = data.variables[key][:]

            new['profiles'] = pysat.DataFrame(loadedVars)

            output[i] = new
            data.close()
        except RuntimeError:
            # some of the files have zero bytes, which causes a read error
            # this stores the index of these zero byte files so I can drop
            # the Nones the gappy file leaves behind
            drop_idx.append(i)

    # drop anything that came from the zero byte files
    drop_idx.reverse()
    for i in drop_idx:
        del output[i]

    if tag == 'ionprf':
        if altitude_bin is not None:
            for out in output:
                rval = (out['profiles']['MSL_alt'] /
                        altitude_bin).round().values
                out['profiles'].index = rval * altitude_bin
                out['profiles'] = \
                    out['profiles'].groupby(out['profiles'].index.values).mean()
        else:
            for out in output:
                out['profiles'].index = out['profiles']['MSL_alt']

    return output

Example #18

0

Show file

def load(fnames,
         tag=None,
         sat_id=None,
         sim_multi_file_right=False,
         sim_multi_file_left=False,
         root_date=None):
    # create an artifical satellite data set
    parts = fnames[0].split('/')
    yr = int('20' + parts[-1][0:2])
    month = int(parts[-3])
    day = int(parts[-2])

    date = pysat.datetime(yr, month, day)
    if sim_multi_file_right:
        root_date = root_date or pysat.datetime(2009, 1, 1, 12)
        data_date = date + pds.DateOffset(hours=12)
    elif sim_multi_file_left:
        root_date = root_date or pysat.datetime(2008, 12, 31, 12)
        data_date = date - pds.DateOffset(hours=12)
    else:
        root_date = root_date or pysat.datetime(2009, 1, 1)
        data_date = date
    num = 86400 if tag is '' else int(tag)
    num_array = np.arange(num)
    uts = num_array
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    time_delta = date - root_date
    uts_root = np.mod(time_delta.total_seconds(), 5820)
    mlt = np.mod(uts_root + num_array, 5820) * (24. / 5820.)
    data['mlt'] = mlt

    # fake orbit numbermedC1 = resultsC['dummy1']['median']
    fake_delta = date - pysat.datetime(2008, 1, 1)
    fake_uts_root = fake_delta.total_seconds()

    data['orbit_num'] = ((fake_uts_root + num_array) / 5820.).astype(int)

    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    long_uts_root = np.mod(time_delta.total_seconds(), 6240)
    longitude = np.mod(long_uts_root + num_array, 6240) * (360. / 6240.)
    data['longitude'] = longitude

    # create latitude area for testing polar orbits
    latitude = 90. * np.cos(
        np.mod(uts_root + num_array, 5820) * (2. * np.pi / 5820.))
    data['latitude'] = latitude

    # do slt, 20 second offset from mlt
    uts_root = np.mod(time_delta.total_seconds() + 20, 5820)
    data['slt'] = np.mod(uts_root + num_array, 5820) * (24. / 5820.)

    # create some fake data to support testing of averaging routines
    dummy1 = []
    for i in range(len(data['mlt'])):
        dummy1.append(i)
    long_int = (data['longitude'] / 15.).astype(int)
    data['dummy1'] = dummy1
    data['string_dummy'] = ['test'] * len(data)
    data['unicode_dummy'] = [u'test'] * len(data)
    data['int8_dummy'] = np.array([1] * len(data), dtype=np.int8)
    data['int16_dummy'] = np.array([1] * len(data), dtype=np.int16)
    data['int32_dummy'] = np.array([1] * len(data), dtype=np.int32)
    data['int64_dummy'] = np.array([1] * len(data), dtype=np.int64)
    # print (data['string_dummy'])

    index = pds.date_range(data_date,
                           data_date + pds.DateOffset(seconds=num - 1),
                           freq='S')
    data.index = index[0:num]
    data.index.name = 'time'
    return data, meta.copy()

Example #19

0

Show file

def load(fnames, tag=None, sat_id=None, altitude_bin=None):
    """Load COSMIC GPS files.

    Parameters
    ----------
    fnames : pandas.Series
        Series of filenames
    tag : str or NoneType
        tag or None (default=None)
    sat_id : str or NoneType
        satellite id or None (default=None)
    altitude_bin : integer
        Number of kilometers to bin altitude profiles by when loading.
        Currently only supported for tag='ionprf'. (default=None)

    Returns
    -------
    output : pandas.DataFrame
        Object containing satellite data
    meta : pysat.Meta
        Object containing metadata such as column names and units

    """

    # input check
    if altitude_bin is not None:
        if tag != 'ionprf':
            estr = 'altitude_bin keyword only supported for "tag=ionprf"'
            raise ValueError(estr)

    num = len(fnames)
    # make sure there are files to read
    if num != 0:
        # call separate load_files routine, segmented for possible
        # multiprocessor load, not included and only benefits about 20%
        output = pysat.DataFrame(load_files(fnames, tag=tag, sat_id=sat_id,
                                            altitude_bin=altitude_bin))
        utsec = output.hour * 3600. + output.minute * 60. + output.second
        # make times unique by adding a unique amount of time less than a second
        # FIXME: need to switch to xarray so unique time stamps not needed
        if tag != 'scnlv1':
            # add 1E-6 seconds to time based upon occulting_sat_id
            # additional 1E-7 seconds added based upon cosmic ID
            # get cosmic satellite ID
            c_id = np.array([snip[3] for snip in output.fileStamp]).astype(int)
            # time offset
            utsec += output.occulting_sat_id*1.e-5 + c_id*1.e-6
        else:
            # construct time out of three different parameters
            # duration must be less than 10,000
            # prn_id is allowed two characters
            # antenna_id gets one
            # prn_id and antenna_id are not sufficient for a unique time
            utsec += output.prn_id*1.e-2 + output.duration.astype(int)*1.E-6
            utsec += output.antenna_id*1.E-7
        # move to Index
        output.index = \
            pysat.utils.time.create_datetime_index(year=output.year,
                                                   month=output.month,
                                                   day=output.day,
                                                   uts=utsec)
        if not output.index.is_unique:
            raise ValueError('Datetimes returned by load_files not unique.')
        # make sure UTS strictly increasing
        output.sort_index(inplace=True)
        # use the first available file to pick out meta information
        profile_meta = pysat.Meta()
        meta = pysat.Meta()
        ind = 0
        repeat = True
        while repeat:
            try:
                data = netCDF4.Dataset(fnames[ind])
                ncattrsList = data.ncattrs()
                for d in ncattrsList:
                    meta[d] = {'units': '', 'long_name': d}
                keys = data.variables.keys()
                for key in keys:
                    if 'units' in data.variables[key].ncattrs():
                        profile_meta[key] = {'units': data.variables[key].units,
                                             'long_name':
                                             data.variables[key].long_name}
                repeat = False
            except RuntimeError:
                # file was empty, try the next one by incrementing ind
                ind += 1

        meta['profiles'] = profile_meta
        return output, meta
    else:
        # no data
        return pysat.DataFrame(None), pysat.Meta()

Example #20

0

Show file

File: superdarn_grdex.py Project: lisatibbetts/pysat

def load(fnames, tag=None, sat_id=None):
    import davitpy
    if len(fnames) <= 0:
        return pysat.DataFrame(None), pysat.Meta(None)
    elif len(fnames) == 1:

        myPtr = davitpy.pydarn.sdio.sdDataPtr(sTime=pysat.datetime(1980, 1, 1),
                                              fileType='grdex',
                                              eTime=pysat.datetime(2250, 1, 1),
                                              hemi=tag,
                                              fileName=fnames[0])
        myPtr.open()

        in_list = []
        in_dict = {
            'stid': [],
            'channel': [],
            'noisemean': [],
            'noisesd': [],
            'gsct': [],
            'nvec': [],
            'pmax': [],
            'start_time': [],
            'end_time': [],
            'vemax': [],
            'vemin': [],
            'pmin': [],
            'programid': [],
            'wmax': [],
            'wmin': [],
            'freq': []
        }

        while True:
            info = myPtr.readRec()
            if info is None:
                myPtr.close()
                break

            drift_frame = pds.DataFrame.from_records(info.vector.__dict__,
                                                     nrows=len(info.pmax),
                                                     index=info.vector.index)
            drift_frame['partial'] = 1
            drift_frame.drop('index', axis=1, inplace=True)
            drift_frame.index.name = 'index'
            sum_vec = 0
            for nvec in info.nvec:
                in_list.append(drift_frame.iloc[sum_vec:sum_vec + nvec])
                sum_vec += nvec

            in_dict['stid'].extend(info.stid)
            in_dict['channel'].extend(info.channel)
            in_dict['noisemean'].extend(info.noisemean)
            in_dict['noisesd'].extend(info.noisesd)
            in_dict['gsct'].extend(info.gsct)
            in_dict['nvec'].extend(info.nvec)
            in_dict['pmax'].extend(info.pmax)
            in_dict['start_time'].extend([info.sTime] * len(info.pmax))
            in_dict['end_time'].extend([info.eTime] * len(info.pmax))
            in_dict['vemax'].extend(info.vemax)
            in_dict['vemin'].extend(info.vemin)
            in_dict['pmin'].extend(info.pmin)
            in_dict['programid'].extend(info.programid)
            in_dict['wmax'].extend(info.wmax)
            in_dict['wmin'].extend(info.wmin)
            in_dict['freq'].extend(info.freq)

        output = pds.DataFrame(in_dict)
        output['vector'] = in_list
        output.index = output.start_time
        output.drop('start_time', axis=1, inplace=True)

        return output, pysat.Meta()
    else:
        raise ValueError('Only one filename currently supported.')

Example #21

0

Show file

File: pysat_testing.py Project: raniabe31/pysat

def load(fnames,
         tag=None,
         sat_id=None,
         sim_multi_file_right=False,
         sim_multi_file_left=False,
         root_date=None,
         file_date_range=None,
         malformed_index=False,
         **kwargs):
    """ Loads the test files

    Parameters
    ----------
    fnames : (list)
        List of filenames
    tag : (str or NoneType)
        Instrument tag (accepts '' or a number (i.e., '10'), which specifies
        the number of times to include in the test instrument)
    sat_id : (str or NoneType)
        Instrument satellite ID (accepts '')
    sim_multi_file_right : (boolean)
        Adjusts date range to be 12 hours in the future or twelve hours beyond
        root_date (default=False)
    sim_multi_file_left : (boolean)
        Adjusts date range to be 12 hours in the past or twelve hours before
        root_date (default=False)
    root_date : (NoneType)
        Optional central date, uses test_dates if not specified.
        (default=None)
    file_date_range : (pds.date_range or NoneType)
        Range of dates for files or None, if this optional arguement is not
        used
        (default=None)
    malformed_index : bool (default=False)
        If True, time index for simulation will be non-unique and non-monotonic.
    **kwargs : Additional keywords
        Additional keyword arguments supplied at pyast.Instrument instantiation
        are passed here

    Returns
    -------
    data : (pds.DataFrame)
        Testing data
    meta : (pysat.Meta)
        Metadataxs

    """

    # create an artifical satellite data set
    parts = os.path.split(fnames[0])[-1].split('-')
    yr = int(parts[0])
    month = int(parts[1])
    day = int(parts[2][0:2])

    # Specify the date tag locally and determine the desired date range
    date = pysat.datetime(yr, month, day)
    pds_offset = pds.DateOffset(hours=12)
    if sim_multi_file_right:
        root_date = root_date or test_dates[''][''] + pds_offset
        data_date = date + pds_offset
    elif sim_multi_file_left:
        root_date = root_date or test_dates[''][''] - pds_offset
        data_date = date - pds_offset
    else:
        root_date = root_date or test_dates['']['']
        data_date = date

    # The sat_id can be used to specify the number of indexes to load for
    # any of the testing objects
    num = 86400 if sat_id == '' else int(sat_id)
    num_array = np.arange(num)
    uts = num_array
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit default
    # to 1 Jan 2009, 00:00 UT. 14.84 orbits per day
    time_delta = date - root_date
    data['mlt'] = test.generate_fake_data(time_delta.total_seconds(),
                                          num_array,
                                          period=5820,
                                          data_range=[0.0, 24.0])

    # do slt, 20 second offset from mlt
    data['slt'] = test.generate_fake_data(time_delta.total_seconds() + 20,
                                          num_array,
                                          period=5820,
                                          data_range=[0.0, 24.0])

    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    data['longitude'] = test.generate_fake_data(time_delta.total_seconds(),
                                                num_array,
                                                period=6240,
                                                data_range=[0.0, 360.0])

    # create latitude area for testing polar orbits
    angle = test.generate_fake_data(time_delta.total_seconds(),
                                    num_array,
                                    period=5820,
                                    data_range=[0.0, 2.0 * np.pi])
    data['latitude'] = 90.0 * np.cos(angle)

    # fake orbit number
    fake_delta = date - (test_dates[''][''] - pds.DateOffset(years=1))
    data['orbit_num'] = test.generate_fake_data(fake_delta.total_seconds(),
                                                num_array,
                                                period=5820,
                                                cyclic=False)

    # create some fake data to support testing of averaging routines
    mlt_int = data['mlt'].astype(int)
    long_int = (data['longitude'] / 15.0).astype(int)
    if tag == 'ascend':
        data['dummy1'] = [i for i in range(len(data['mlt']))]
    elif tag == 'descend':
        data['dummy1'] = [-i for i in range(len(data['mlt']))]
    elif tag == 'plus10':
        data['dummy1'] = [i + 10 for i in range(len(data['mlt']))]
    elif tag == 'fives':
        data['dummy1'] = [5 for i in range(len(data['mlt']))]
    elif tag == 'mlt_offset':
        data['dummy1'] = mlt_int + 5
    else:
        data['dummy1'] = mlt_int
    data['dummy2'] = long_int
    data['dummy3'] = mlt_int + long_int * 1000.0
    data['dummy4'] = num_array
    data['string_dummy'] = ['test'] * len(data)
    data['unicode_dummy'] = [u'test'] * len(data)
    data['int8_dummy'] = np.ones(len(data), dtype=np.int8)
    data['int16_dummy'] = np.ones(len(data), dtype=np.int16)
    data['int32_dummy'] = np.ones(len(data), dtype=np.int32)
    data['int64_dummy'] = np.ones(len(data), dtype=np.int64)

    index = pds.date_range(data_date,
                           data_date + pds.DateOffset(seconds=num - 1),
                           freq='S')
    if malformed_index:
        index = index[0:num].tolist()
        # nonmonotonic
        index[0:3], index[3:6] = index[3:6], index[0:3]
        # non unique
        index[6:9] = [index[6]] * 3

    data.index = index[0:num]
    data.index.name = 'Epoch'
    return data, meta.copy()

Example #22

0

Show file

def load(fnames,
         tag=None,
         sat_id=None,
         obs_long=0.,
         obs_lat=0.,
         obs_alt=0.,
         TLE1=None,
         TLE2=None):
    """		          
    Returns data and metadata in the format required by pysat. Finds position		
    of satellite in both ECI and ECEF co-ordinates.
    
    Routine is directly called by pysat and not the user.		
    		
    Parameters		
    ----------		
    fnames : list-like collection		
        File name that contains date in its name. 		
    tag : string		
        Identifies a particular subset of satellite data		
    sat_id : string		
        Satellite ID			
    obs_long: float		
        Longitude of the observer on the Earth's surface		
    obs_lat: float		
        Latitude of the observer on the Earth's surface			
    obs_alt: float		
        Altitude of the observer on the Earth's surface		
    TLE1 : string
        First string for Two Line Element. Must be in TLE format	          
    TLE2 : string
        Second string for Two Line Element. Must be in TLE format	          
        
    Example
    -------
      inst = pysat.Instrument('pysat', 'sgp4', 
              TLE1='1 25544U 98067A   18135.61844383  .00002728  00000-0  48567-4 0  9998',
              TLE2='2 25544  51.6402 181.0633 0004018  88.8954  22.2246 15.54059185113452')
      inst.load(2018, 1)
      
    """

    import sgp4
    # wgs72 is the most commonly used gravity model in satellite tracking community
    from sgp4.earth_gravity import wgs72
    from sgp4.io import twoline2rv
    import ephem
    import pysatMagVect

    # TLEs (Two Line Elements for ISS)
    # format of TLEs is fixed and available from wikipedia...
    # lines encode list of orbital elements of an Earth-orbiting object
    # for a given point in time
    line1 = (
        '1 25544U 98067A   18135.61844383  .00002728  00000-0  48567-4 0  9998'
    )
    line2 = (
        '2 25544  51.6402 181.0633 0004018  88.8954  22.2246 15.54059185113452'
    )
    # use ISS defaults if not provided by user
    if TLE1 is not None:
        line1 = TLE1
    if TLE2 is not None:
        line2 = TLE2

    # create satellite from TLEs and assuming a gravity model
    # according to module webpage, wgs72 is common
    satellite = twoline2rv(line1, line2, wgs72)

    # grab date from filename
    parts = os.path.split(fnames[0])[-1].split('-')
    yr = int(parts[0])
    month = int(parts[1])
    day = int(parts[2][0:2])
    date = pysat.datetime(yr, month, day)

    # create timing at 1 Hz (for 1 day)
    times = pds.date_range(start=date,
                           end=date + pds.DateOffset(seconds=86399),
                           freq='1S')
    # reduce requirements if on testing server
    # TODO Remove this when testing resources are higher
    on_travis = os.environ.get('ONTRAVIS') == 'True'
    if on_travis:
        times = times[0:100]

    # create list to hold satellite position, velocity
    position = []
    velocity = []
    for time in times:
        # orbit propagator - computes x,y,z position and velocity
        pos, vel = satellite.propagate(time.year, time.month, time.day,
                                       time.hour, time.minute, time.second)
        # print (pos)
        position.extend(pos)
        velocity.extend(vel)

    # put data into DataFrame
    data = pysat.DataFrame(
        {
            'position_eci_x': position[::3],
            'position_eci_y': position[1::3],
            'position_eci_z': position[2::3],
            'velocity_eci_x': velocity[::3],
            'velocity_eci_y': velocity[1::3],
            'velocity_eci_z': velocity[2::3]
        },
        index=times)
    data.index.name = 'Epoch'

    # add position and velocity in ECEF
    # add call for GEI/ECEF translation here
    # instead, since available, I'll use an orbit predictor from another
    # package that outputs in ECEF
    # it also supports ground station calculations

    # the observer's (ground station) position on the Earth surface
    site = ephem.Observer()
    site.lon = str(obs_long)
    site.lat = str(obs_lat)
    site.elevation = obs_alt

    # The first parameter in readtle() is the satellite name
    sat = ephem.readtle('pysat', line1, line2)
    output_params = []
    for time in times:
        lp = {}
        site.date = time
        sat.compute(site)
        # parameters relative to the ground station
        lp['obs_sat_az_angle'] = ephem.degrees(sat.az)
        lp['obs_sat_el_angle'] = ephem.degrees(sat.alt)
        # total distance away
        lp['obs_sat_slant_range'] = sat.range
        # satellite location
        # sub latitude point
        lp['glat'] = np.degrees(sat.sublat)
        # sublongitude point
        lp['glong'] = np.degrees(sat.sublong)
        # elevation of sat in m, stored as km
        lp['alt'] = sat.elevation / 1000.
        # get ECEF position of satellite
        lp['x'], lp['y'], lp['z'] = pysatMagVect.geodetic_to_ecef(
            lp['glat'], lp['glong'], lp['alt'])
        output_params.append(lp)
    output = pds.DataFrame(output_params, index=times)
    # modify input object to include calculated parameters
    data[['glong', 'glat', 'alt']] = output[['glong', 'glat', 'alt']]
    data[['position_ecef_x', 'position_ecef_y',
          'position_ecef_z']] = output[['x', 'y', 'z']]
    data['obs_sat_az_angle'] = output['obs_sat_az_angle']
    data['obs_sat_el_angle'] = output['obs_sat_el_angle']
    data['obs_sat_slant_range'] = output['obs_sat_slant_range']
    return data, meta.copy()

Example #23

0

Show file

File: _cdf.py Project: PhotonAudioLab/pysatCDF

    def to_pysat(self, flatten_twod=True, units_label='UNITS', name_label='long_name',
                        fill_label='FILLVAL', plot_label='FieldNam', 
                        min_label='ValidMin', max_label='ValidMax', 
                        notes_label='Var_Notes', desc_label='CatDesc',
                        axis_label = 'LablAxis'):
        """
        Exports loaded CDF data into data, meta for pysat module
        
        Notes
        -----
        The *_labels should be set to the values in the file, if present.
        Note that once the meta object returned from this function is attached
        to a pysat.Instrument object then the *_labels on the Instrument
        are assigned to the newly attached Meta object.
        
        The pysat Meta object will use data with labels that match the patterns
        in *_labels even if the case does not match.

        Parameters
        ----------
        flatten_twod : bool (True)
            If True, then two dimensional data is flattened across 
            columns. Name mangling is used to group data, first column
            is 'name', last column is 'name_end'. In between numbers are 
            appended 'name_1', 'name_2', etc. All data for a given 2D array
            may be accessed via, data.ix[:,'item':'item_end']
            If False, then 2D data is stored as a series of DataFrames, 
            indexed by Epoch. data.ix[0, 'item']
        units_label : str
            Identifier within metadata for units. Defults to CDAWab standard.
        name_label : str
            Identifier within metadata for variable name. Defults to 'long_name',
            not normally present within CDAWeb files. If not, will use values
            from the variable name in the file.
        fill_label : str
            Identifier within metadata for Fill Values. Defults to CDAWab standard.
        plot_label : str
            Identifier within metadata for variable name used when plotting.
            Defults to CDAWab standard.
        min_label : str
            Identifier within metadata for minimim variable value. 
            Defults to CDAWab standard.
        max_label : str
            Identifier within metadata for maximum variable value.
            Defults to CDAWab standard.
        notes_label : str
            Identifier within metadata for notes. Defults to CDAWab standard.
        desc_label : str
            Identifier within metadata for a variable description.
            Defults to CDAWab standard.
        axis_label : str
            Identifier within metadata for axis name used when plotting. 
            Defults to CDAWab standard.
            
                             
        Returns
        -------
        pandas.DataFrame, pysat.Meta
            Data and Metadata suitable for attachment to a pysat.Instrument
            object.
        
        """

        import string
        import pysat
        import pandas

        # copy data
        cdata = self.data.copy()
        #
        # create pysat.Meta object using data above
        # and utilizing the attribute labels provided by the user
        meta = pysat.Meta(pysat.DataFrame.from_dict(self.meta, orient='index'),
                          units_label=units_label, name_label=name_label,
                          fill_label=fill_label, plot_label=plot_label,
                          min_label=min_label, max_label=max_label,
                          notes_label=notes_label, desc_label=desc_label,
                          axis_label=axis_label)
                          
        # account for different possible cases for Epoch, epoch, EPOCH, epOch
        lower_names = [name.lower() for name in meta.keys()] 
        for name, true_name in zip(lower_names, meta.keys()):
            if name == 'epoch':
                meta.data.rename(index={true_name: 'Epoch'}, inplace=True)
                epoch = cdata.pop(true_name)
                cdata['Epoch'] = epoch

        # ready to format data, iterate over all of the data names
        # and put into a pandas DataFrame
        two_d_data = []
        drop_list = []
        for name in cdata.keys():
            temp = np.shape(cdata[name])
            # treat 2 dimensional data differently
            if len(temp) == 2:
                if not flatten_twod:
                    # put 2D data into a Frame at each time
                    # remove data from dict when adding to the DataFrame
                    frame = pysat.DataFrame(cdata[name].flatten(), columns=[name])
                    drop_list.append(name)

                    step = temp[0]
                    new_list = []
                    new_index = np.arange(step)
                    for i in np.arange(len(epoch)):
                        new_list.append(frame.iloc[i*step:(i+1)*step, :])
                        new_list[-1].index = new_index
                    #new_frame = pandas.DataFrame.from_records(new_list, index=epoch, columns=[name])
                    new_frame = pandas.Series(new_list, index=epoch, name=name)
                    two_d_data.append(new_frame)

                else:
                    # flatten 2D into series of 1D columns
                    new_names = [name + '_{i}'.format(i=i) for i in np.arange(temp[0] - 2)]
                    new_names.append(name + '_end')
                    new_names.insert(0, name)
                    # remove data from dict when adding to the DataFrame
                    drop_list.append(name)
                    frame = pysat.DataFrame(cdata[name].T,
                                            index=epoch,
                                            columns=new_names)
                    two_d_data.append(frame)
        for name in drop_list:
            _ = cdata.pop(name)
        # all of the data left over is 1D, add as Series
        data = pysat.DataFrame(cdata, index=epoch)
        two_d_data.append(data)
        data = pandas.concat(two_d_data, axis=1)
        data.drop('Epoch', axis=1, inplace=True)
        return data, meta

Example #24

0

Show file

def load(fnames, tag=None, sat_id=None):
    """Load COSMIC GPS files.

    Parameters
    ----------
    fnames : (pandas.Series)
        Series of filenames
    tag : (str or NoneType)
        tag or None (default=None)
    sat_id : (str or NoneType)
        satellite id or None (default=None)

    Returns
    -------
    output : (pandas.DataFrame)
        Object containing satellite data
    meta : (pysat.Meta)
        Object containing metadata such as column names and units

    """

    num = len(fnames)
    # make sure there are files to read
    if num != 0:
        # call separate load_files routine, segemented for possible
        # multiprocessor load, not included and only benefits about 20%
        output = pysat.DataFrame(load_files(fnames, tag=tag, sat_id=sat_id))
        utsec = output.hour * 3600. + output.minute * 60. + output.second
        output.index = \
            pysat.utils.time.create_datetime_index(year=output.year,
                                                   month=output.month,
                                                   day=output.day,
                                                   uts=utsec)
        # make sure UTS strictly increasing
        output.sort_index(inplace=True)
        # use the first available file to pick out meta information
        profile_meta = pysat.Meta()
        meta = pysat.Meta()
        ind = 0
        repeat = True
        while repeat:
            try:
                data = netCDF4.Dataset(fnames[ind])
                ncattrsList = data.ncattrs()
                for d in ncattrsList:
                    meta[d] = {'units': '', 'long_name': d}
                keys = data.variables.keys()
                for key in keys:
                    profile_meta[key] = {
                        'units': data.variables[key].units,
                        'long_name': data.variables[key].long_name
                    }
                repeat = False
            except RuntimeError:
                # file was empty, try the next one by incrementing ind
                ind += 1
        meta['profiles'] = profile_meta
        return output, meta
    else:
        # no data
        return pysat.DataFrame(None), pysat.Meta()

Example #25

0

Show file

def load(fnames, tag=None, sat_id=None):
    import pydarn
    if len(fnames) <= 0:
        return pysat.DataFrame(None), pysat.Meta(None)
    elif len(fnames) == 1:

        #b = pydarn.sdio.sdDataOpen(pysat.datetime(1980,1,1),
        #                            src='local',
        #                            eTime=pysat.datetime(2050,1,1),
        #                            fileName=fnames[0])

        myPtr = pydarn.sdio.sdDataPtr(sTime=pysat.datetime(1980, 1, 1),
                                      eTime=pysat.datetime(2250, 1, 1),
                                      hemi=tag)
        myPtr.fType, myPtr.dType = 'grdex', 'dmap'
        myPtr.ptr = open(fnames[0], 'r')

        #data_list = pydarn.sdio.sdDataReadAll(myPtr)
        in_list = []
        in_dict = {
            'stid': [],
            'channel': [],
            'noisemean': [],
            'noisesd': [],
            'gsct': [],
            'nvec': [],
            'pmax': [],
            'start_time': [],
            'end_time': [],
            'vemax': [],
            'vemin': [],
            'pmin': [],
            'programid': [],
            'wmax': [],
            'wmin': [],
            'freq': []
        }

        #for info in data_list:
        while True:
            info = pydarn.dmapio.readDmapRec(myPtr.ptr)
            info = pydarn.sdio.sdDataTypes.gridData(dataDict=info)
            if info.channel is None:
                break

            drift_frame = pds.DataFrame.from_records(info.vector.__dict__,
                                                     nrows=len(info.pmax),
                                                     index=info.vector.index)
            drift_frame['partial'] = 1
            drift_frame.drop('index', axis=1, inplace=True)
            drift_frame.index.name = 'index'
            sum_vec = 0
            for nvec in info.nvec:
                #in_frame = drift_frame.iloc[0:nvec]
                #drift_frame = drift_frame.iloc[nvec:]
                in_list.append(drift_frame.iloc[sum_vec:sum_vec + nvec])
                sum_vec += nvec

            in_dict['stid'].extend(info.stid)
            in_dict['channel'].extend(info.channel)
            in_dict['noisemean'].extend(info.noisemean)
            in_dict['noisesd'].extend(info.noisesd)
            in_dict['gsct'].extend(info.gsct)
            in_dict['nvec'].extend(info.nvec)
            in_dict['pmax'].extend(info.pmax)
            in_dict['start_time'].extend([info.sTime] * len(info.pmax))
            in_dict['end_time'].extend([info.eTime] * len(info.pmax))
            in_dict['vemax'].extend(info.vemax)
            in_dict['vemin'].extend(info.vemin)
            in_dict['pmin'].extend(info.pmin)
            in_dict['programid'].extend(info.programid)
            in_dict['wmax'].extend(info.wmax)
            in_dict['wmin'].extend(info.wmin)
            in_dict['freq'].extend(info.freq)

        output = pds.DataFrame(in_dict)
        output['vector'] = in_list
        output.index = output.start_time
        output.drop('start_time', axis=1, inplace=True)
        myPtr.ptr.close()
        return output, pysat.Meta()
    else:
        raise ValueError('Only one filename currently supported.')

Example #26

0

Show file

def load(fnames, tag='', sat_id=None):
    """ Load the SuperMAG files

    Parameters
    -----------
    fnames : (list)
        List of filenames
    tag : (str)
        Denotes type of file to load.  Accepted types are 'indices', 'all',
        'stations', and '' (for just magnetometer measurements). (default='')
    sat_id : (str or NoneType)
        Satellite ID for constellations, not used. (default=None)

    Returns
    --------
    data : (pandas.DataFrame)
        Object containing satellite data
    meta : (pysat.Meta)
        Object containing metadata such as column names and units

    """

    # Ensure that there are files to load
    if len(fnames) <= 0:
        return pysat.DataFrame(None), pysat.Meta(None)

    # Ensure that the files are in a list
    if isinstance(fnames, str):
        fnames = [fnames]

    # Initialise the output data
    data = pds.DataFrame()
    baseline = list()

    # Cycle through the files
    for fname in fnames:
        fname = fname[:-11]  # Remove date index from end of filename
        file_type = path.splitext(fname)[1].lower()

        # Open and load the files for each file type
        if file_type == ".csv":
            if tag != "indices":
                temp = load_csv_data(fname, tag)
        else:
            temp, bline = load_ascii_data(fname, tag)

            if bline is not None:
                baseline.append(bline)

        # Save the loaded data in the output data structure
        if len(temp.columns) > 0:
            data = pds.concat([data, temp], sort=True, axis=0)
        del temp

    # If data was loaded, update the meta data
    if len(data.columns) > 0:
        meta = pysat.Meta()
        for cc in data.columns:
            meta[cc] = update_smag_metadata(cc)

        meta.info = {'baseline': format_baseline_list(baseline)}
    else:
        meta = pysat.Meta(None)

    return data, meta

Example #27

0

Show file

def load_files(files, tag=None, sat_id=None, altitude_bin=None):
    """Load COSMIC data files directly from a given list.

    May be directly called by user, but in general is called by load.  This is
    separate from the main load function for future support of multiprocessor
    loading.

    Parameters
    ----------
    files : pandas.Series
        Series of filenames
    tag : str or NoneType
        tag or None (default=None)
    sat_id : str or NoneType
        satellite id or None (default=None)
    altitude_bin : integer
        Number of kilometers to bin altitude profiles by when loading.
        Currently only supported for tag='ionprf'. (default=None)

    Returns
    -------
    output : list of dicts
        Object containing satellite data, one dict per file

    """

    output = [None] * len(files)
    drop_idx = []
    main_dict = {}
    main_dict_len = {}

    safe_keys = []
    for (i, fname) in enumerate(files):
        try:
            data = netCDF4.Dataset(fname)
            # build up dictionary will all ncattrs
            new = {}
            # get list of file attributes
            ncattrsList = data.ncattrs()
            # these include information about where the profile observed
            for d in ncattrsList:
                new[d] = data.getncattr(d)

            if i == 0:
                keys = data.variables.keys()
                for key in keys:
                    safe_keys.append(key)
                    main_dict[key] = []
                    main_dict_len[key] = []

            # load all of the variables in the netCDF
            for key in safe_keys:
                # grab data
                t_list = data.variables[key][:]
                # reverse byte order if needed
                if t_list.dtype.byteorder != '=':
                    main_dict[key].append(t_list.byteswap().newbyteorder())
                else:
                    main_dict[key].append(t_list)
                # store lengths
                main_dict_len[key].append(len(main_dict[key][-1]))

            output[i] = new
            data.close()
        except RuntimeError:
            # some of the files have zero bytes, which causes a read error
            # this stores the index of these zero byte files so I can drop
            # the Nones the gappy file leaves behind
            drop_idx.append(i)

    # drop anything that came from the zero byte files
    drop_idx.reverse()
    for i in drop_idx:
        del output[i]

    # combine different sub lists in main_dict into one
    for key in safe_keys:
        main_dict[key] = np.hstack(main_dict[key])
        main_dict_len[key] = np.cumsum(main_dict_len[key])

    if tag == 'atmprf':
        # this file has three groups of variable lengths
        # each goes into its own DataFrame
        # two are processed here, last is processed like other
        # file types
        # see code just after this if block for more
        # general explanation on lines just below
        p_keys = ['OL_vec2', 'OL_vec1', 'OL_vec3', 'OL_vec4']
        p_dict = {}
        # get indices needed to parse data
        plengths = main_dict_len['OL_vec1']
        max_p_length = np.max(plengths)
        plengths, plengths2 = _process_lengths(plengths)
        # collect data
        for key in p_keys:
            p_dict[key] = main_dict.pop(key)
            _ = main_dict_len.pop(key)
        psub_frame = pysat.DataFrame(p_dict)

        # change in variables in this file type
        # depending upon the processing applied at UCAR
        if 'ies' in main_dict.keys():
            q_keys = ['OL_ipar', 'OL_par', 'ies', 'hes', 'wes']
        else:
            q_keys = ['OL_ipar', 'OL_par']
        q_dict = {}
        # get indices needed to parse data
        qlengths = main_dict_len['OL_par']
        max_q_length = np.max(qlengths)
        qlengths, qlengths2 = _process_lengths(qlengths)
        # collect data
        for key in q_keys:
            q_dict[key] = main_dict.pop(key)
            _ = main_dict_len.pop(key)
        qsub_frame = pysat.DataFrame(q_dict)

        max_length = np.max([max_p_length, max_q_length])
        length_arr = np.arange(max_length)
        # small sub DataFrames
        for i in np.arange(len(output)):
            output[i]['OL_vecs'] = psub_frame.iloc[plengths[i]:plengths[i+1], :]
            output[i]['OL_vecs'].index = \
                length_arr[:plengths2[i+1]-plengths2[i]]
            output[i]['OL_pars'] = qsub_frame.iloc[qlengths[i]:qlengths[i+1], :]
            output[i]['OL_pars'].index = \
                length_arr[:qlengths2[i+1]-qlengths2[i]]

    # create a single data frame with all bits, then
    # break into smaller frames using views
    main_frame = pysat.DataFrame(main_dict)
    # get indices needed to parse data
    lengths = main_dict_len[list(main_dict.keys())[0]]
    # get largest length and create numpy array with it
    # used to speed up reindexing below
    max_length = np.max(lengths)
    length_arr = np.arange(max_length)
    # process lengths for ease of parsing
    lengths, lengths2 = _process_lengths(lengths)
    # break main profile data into each individual profile
    for i in np.arange(len(output)):
        output[i]['profiles'] = main_frame.iloc[lengths[i]:lengths[i+1], :]
        output[i]['profiles'].index = length_arr[:lengths2[i+1]-lengths2[i]]

    if tag == 'ionprf':
        if altitude_bin is not None:
            for out in output:
                rval = (out['profiles']['MSL_alt']/altitude_bin).round().values
                out['profiles'].index = rval * altitude_bin
                out['profiles'] = \
                    out['profiles'].groupby(out['profiles'].index.values).mean()
        else:
            for out in output:
                out['profiles'].index = out['profiles']['MSL_alt']

    return output

Example #28

0

Show file

def load(fnames,
         tag=None,
         sat_id=None,
         fake_daily_files_from_monthly=False,
         flatten_twod=True):
    """Load NASA CDAWeb CDF files.

    This routine is intended to be used by pysat instrument modules supporting
    a particular NASA CDAWeb dataset.

    Parameters
    ------------
    fnames : (pandas.Series)
        Series of filenames
    tag : (str or NoneType)
        tag or None (default=None)
    sat_id : (str or NoneType)
        satellite id or None (default=None)
    fake_daily_files_from_monthly : bool
        Some CDAWeb instrument data files are stored by month, interfering
        with pysat's functionality of loading by day. This flag, when true,
        parses of daily dates to monthly files that were added internally
        by the list_files routine, when flagged. These dates are
        used here to provide data by day.
    flatted_twod : bool
        Flattens 2D data into different columns of root DataFrame rather
        than produce a Series of DataFrames

    Returns
    ---------
    data : (pandas.DataFrame)
        Object containing satellite data
    meta : (pysat.Meta)
        Object containing metadata such as column names and units

    Examples
    --------
    ::

        # within the new instrument module, at the top level define
        # a new variable named load, and set it equal to this load method
        # code below taken from cnofs_ivm.py.

        # support load routine
        # use the default CDAWeb method
        load = cdw.load


    """

    import pysatCDF

    if len(fnames) <= 0:
        return pysat.DataFrame(None), None
    else:
        # going to use pysatCDF to load the CDF and format
        # data and metadata for pysat using some assumptions.
        # Depending upon your needs the resulting pandas DataFrame may
        # need modification
        # currently only loads one file, which handles more situations via
        # pysat than you may initially think

        if fake_daily_files_from_monthly:
            # parse out date from filename
            fname = fnames[0][0:-11]
            date = pysat.datetime.strptime(fnames[0][-10:], '%Y-%m-%d')
            with pysatCDF.CDF(fname) as cdf:
                # convert data to pysat format
                data, meta = cdf.to_pysat(flatten_twod=flatten_twod)
                # select data from monthly
                data = data.loc[date:date + pds.DateOffset(days=1) -
                                pds.DateOffset(microseconds=1), :]
                return data, meta
        else:
            # basic data return
            with pysatCDF.CDF(fnames[0]) as cdf:
                return cdf.to_pysat(flatten_twod=flatten_twod)

Example #29

0

Show file

File: pysat_testing2d.py Project: silviolleite/pysat

def load(fnames, tag=None, sat_id=None):
    # create an artifical satellite data set
    parts = os.path.split(fnames[0])[-1].split('-')
    yr = int(parts[0])
    month = int(parts[1])
    day = int(parts[2][0:2])
    date = pysat.datetime(yr, month, day)
    # scalar divisor below used to reduce the number of time samples
    # covered by the simulation per day. The higher the number the lower
    # the number of samples (86400/scalar)
    scalar = 100
    num = 86400 / scalar
    # basic time signal in UTS
    uts = np.arange(num) * scalar
    num_array = np.arange(num) * scalar
    # seed DataFrame with UT array
    data = pysat.DataFrame(uts, columns=['uts'])

    # need to create simple orbits here. Have start of first orbit
    # at 2009,1, 0 UT. 14.84 orbits per day
    # figure out how far in time from the root start
    # use that info to create a signal that is continuous from that start
    # going to presume there are 5820 seconds per orbit (97 minute period)
    time_delta = date - pysat.datetime(2009, 1, 1)
    # root start
    uts_root = np.mod(time_delta.total_seconds(), 5820)
    # mlt runs 0-24 each orbit.
    mlt = np.mod(uts_root + np.arange(num) * scalar, 5820) * (24. / 5820.)
    data['mlt'] = mlt
    # do slt, 20 second offset from mlt
    uts_root = np.mod(time_delta.total_seconds() + 20, 5820)
    data['slt'] = np.mod(uts_root + np.arange(num) * scalar,
                         5820) * (24. / 5820.)

    # create a fake longitude, resets every 6240 seconds
    # sat moves at 360/5820 deg/s, Earth rotates at 360/86400, takes extra time
    # to go around full longitude
    long_uts_root = np.mod(time_delta.total_seconds(), 6240)
    longitude = np.mod(long_uts_root + num_array, 6240) * (360. / 6240.)
    data['longitude'] = longitude

    # create latitude signal for testing polar orbits
    latitude = 90. * np.cos(
        np.mod(uts_root + num_array, 5820) * (2. * np.pi / 5820.))
    data['latitude'] = latitude

    # create real UTC time signal
    index = pds.date_range(date,
                           date +
                           pds.DateOffset(hours=23, minutes=59, seconds=59),
                           freq=str(scalar) + 'S')
    data.index = index
    data.index.name = 'epoch'
    # higher rate time signal (for scalar >= 2)
    # this time signal used for 2D profiles associated with each time in main
    # DataFrame
    high_rate_template = pds.date_range(
        date, date + pds.DateOffset(hours=0, minutes=1, seconds=39), freq='2S')

    # create a few simulated profiles
    # DataFrame at each time with mixed variables
    profiles = []
    # DataFrame at each time with numeric variables only
    alt_profiles = []
    # Serie at each time, numeric data only
    series_profiles = []
    # frame indexed by date times
    frame = pds.DataFrame(
        {
            'density': data.ix[0:50, 'mlt'].values.copy(),
            'dummy_str': ['test'] * 50,
            'dummy_ustr': [u'test'] * 50
        },
        index=data.index[0:50],
        columns=['density', 'dummy_str', 'dummy_ustr'])
    # frame indexed by float
    dd = np.arange(50) * 1.2
    ff = np.arange(50) / 50.
    ii = np.arange(50) * 0.5
    frame_alt = pds.DataFrame({
        'density': dd,
        'fraction': ff
    },
                              index=ii,
                              columns=['density', 'fraction'])
    # series version of storage
    series_alt = pds.Series(dd, index=ii, name='series_profiles')

    for time in data.index:
        frame.index = high_rate_template + (time - data.index[0])
        profiles.append(frame)
        alt_profiles.append(frame_alt)
        series_profiles.append(series_alt)
    # store multiple data types into main frame
    data['profiles'] = pds.Series(profiles, index=data.index)
    data['alt_profiles'] = pds.Series(alt_profiles, index=data.index)
    data['series_profiles'] = pds.Series(series_profiles, index=data.index)
    return data, meta.copy()

Example #30

0

Show file

File: _cdf.py Project: radar-bear/pysatCDF

    def to_pysat(self, flatten_twod=True):
        """
        Exports loaded CDF data into data,meta for pysat module

        Parameters
        ----------
        flatten_twod : bool (True)
            If True, then two dimensional data is flattened across 
            columns. Name mangling is used to group data, first column
            is 'name', last column is 'name_end'. In between numbers are 
            appended 'name_1', 'name_2', etc. All data for a given 2D array
            may be accessed via, data.ix[:,'item':'item_end']
            If False, then 2D data is stored as a series of DataFrames, 
            indexed by Epoch. data.ix[0, 'item']
                             
        Returns
        -------
        data, meta
        
        """

        import string
        import pysat
        import pandas

        # copy data
        cdata = self.data.copy()

        meta = pysat.Meta(pysat.DataFrame.from_dict(self.meta,
                                                    orient='index'))
        # all column names should be lower case
        lower_names = [name.lower() for name in meta.data.columns] #map(str.lower, meta.data.columns)
        meta.data.columns = lower_names
        # replace standard CDAWeb terms with more pysat friendly versions
        if 'lablaxis' in meta.data.columns:
            meta.data.drop('long_name', inplace=True, axis=1)
            meta.data.rename(columns={'lablaxis': 'long_name'}, inplace=True)
        if 'catdesc' in meta.data.columns:
            meta.data.rename(columns={'catdesc': 'description'}, inplace=True)

        # account for different possible cases for Epoch, epoch, EPOCH, epOch
        lower_names = [name.lower() for name in meta.data.index.values] #lower_names = map(str.lower, meta.data.index.values)
        for name, true_name in zip(lower_names, meta.data.index.values):
            if name == 'epoch':
                meta.data.rename(index={true_name: 'Epoch'}, inplace=True)
                epoch = cdata.pop(true_name)
                cdata['Epoch'] = epoch

        # ready to format data, iterate over all of the data names
        # and put into a pandas DataFrame
        two_d_data = []
        drop_list = []
        for name in cdata.keys():
            temp = np.shape(cdata[name])
            # treat 2 dimensional data differently
            if len(temp) == 2:
                if not flatten_twod:
                    # put 2D data into a Frame at each time
                    # remove data from dict when adding to the DataFrame
                    frame = pysat.DataFrame(cdata[name].flatten(), columns=[name])
                    drop_list.append(name)

                    step = temp[0]
                    new_list = []
                    new_index = np.arange(step)
                    for i in np.arange(len(epoch)):
                        new_list.append(frame.iloc[i*step:(i+1)*step, :])
                        new_list[-1].index = new_index
                    #new_frame = pandas.DataFrame.from_records(new_list, index=epoch, columns=[name])
                    new_frame = pandas.Series(new_list, index=epoch, name=name)
                    two_d_data.append(new_frame)

                else:
                    # flatten 2D into series of 1D columns
                    new_names = [name + '_{i}'.format(i=i) for i in np.arange(temp[0] - 2)]
                    new_names.append(name + '_end')
                    new_names.insert(0, name)
                    # remove data from dict when adding to the DataFrame
                    drop_list.append(name)
                    frame = pysat.DataFrame(cdata[name].T,
                                            index=epoch,
                                            columns=new_names)
                    two_d_data.append(frame)
        for name in drop_list:
            _ = cdata.pop(name)
        # all of the data left over is 1D, add as Series
        data = pysat.DataFrame(cdata, index=epoch)
        two_d_data.append(data)
        data = pandas.concat(two_d_data, axis=1)
        data.drop('Epoch', axis=1, inplace=True)
        return data, meta