Python fromarraysの例、numpy.rec.fromarrays Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_util.py プロジェクト: melizalab/mspikes

def test_event_offset():
    from numpy import asarray, rec
    data = [1, 2, 3]
    assert_array_equal(util.event_offset(data, 1), [2, 3, 4])
    assert_array_equal(util.event_offset(asarray(data), 2), [3, 4, 5])
    marked = rec.fromarrays((data, ['a', 'b', 'c']), names=('start', 'names'))
    assert_array_equal(util.event_offset(marked, 1)['start'], [2, 3, 4])

コード例 #2

0

ファイルを表示

ファイル: pycbc.py プロジェクト: alarmcom/gwpy

def recarray_from_file(source, ifo=None, columns=None, loudest=False):
    """Read a `GWRecArray` from a PyCBC live HDF5 file
    """
    # read HDF5 file
    if isinstance(source, CacheEntry):
        source = source.path
    if isinstance(source, str):
        h5f = source = h5py.File(source, 'r')
        opened = True
    else:
        opened = False
    # find group
    if isinstance(source, h5py.File):
        if ifo is None:
            try:
                ifo, = list(source)
            except ValueError as e:
                e.args = ("PyCBC live HDF5 file contains multiple IFO groups, "
                          "please select ifo manually", )
                raise
        try:
            source = source[ifo]
        except KeyError as e:
            e.args = ("No group for ifo %r in PyCBC live HDF5 file" % ifo, )
            raise
    # at this stage, 'source' should be an HDF5 group in the pycbc live format
    if columns is None:
        columns = [c for c in source if c not in INVALID_COLUMNS]
    names, data = zip(*[(k, source[k][:]) for k in source if k in columns])
    names = list(map(str, names))
    if loudest:  # recover only the 'loudest' events
        loudest = source['loudest'][:]
        data = [d[loudest] for d in data]
    else:
        data = list(data)
    # calculate new_snr on-the-fly
    if 'new_snr' in columns and 'new_snr' not in source:
        # get columns needed for newsnr
        snr = data[names.index('snr')]
        rchisq = data[names.index('chisq')]  # chisq is already reduced
        # calculate and append to column list
        data.append(get_new_snr(snr, rchisq))
        names.append('new_snr')
    # calculate mchirp
    if 'mchirp' in columns and 'mchirp' not in source:
        mass1 = data[names.index('mass1')]
        mass2 = data[names.index('mass2')]
        data.append(get_mchirp(mass1, mass2))
        names.append('mchirp')
    # read columns into numpy recarray
    out = rec.fromarrays(data, names=map(str, names)).view(GWRecArray)
    if 'end_time' in columns:
        out.sort(order='end_time')
    if opened:
        h5f.close()
    return out

コード例 #3

0

ファイルを表示

ファイル: pycbc.py プロジェクト: rpfisher/gwpy

def recarray_from_file(source, ifo=None, columns=None, loudest=False):
    """Read a `GWRecArray` from a PyCBC live HDF5 file
    """
    # read HDF5 file
    if isinstance(source, CacheEntry):
        source = source.path
    if isinstance(source, str):
        h5f = source = h5py.File(source, 'r')
        opened = True
    else:
        opened = False
    # find group
    if isinstance(source, h5py.File):
        if ifo is None:
            try:
                ifo, = list(source)
            except ValueError as e:
                e.args = ("PyCBC live HDF5 file contains multiple IFO groups, "
                          "please select ifo manually",)
                raise
        try:
            source = source[ifo]
        except KeyError as e:
            e.args = ("No group for ifo %r in PyCBC live HDF5 file" % ifo,)
            raise
    # at this stage, 'source' should be an HDF5 group in the pycbc live format
    if columns is None:
        columns = [c for c in source if c not in INVALID_COLUMNS]
    names, data = zip(*[(k, source[k][:]) for k in source if k in columns])
    names = list(map(str, names))
    if loudest:  # recover only the 'loudest' events
        loudest = source['loudest'][:]
        data = [d[loudest] for d in data]
    else:
        data = list(data)
    # calculate new_snr on-the-fly
    if 'new_snr' in columns and 'new_snr' not in source:
        # get columns needed for newsnr
        snr = data[names.index('snr')]
        rchisq = data[names.index('chisq')]  # chisq is already reduced
        # calculate and append to column list
        data.append(get_new_snr(snr, rchisq))
        names.append('new_snr')
    # calculate mchirp
    if 'mchirp' in columns and 'mchirp' not in source:
        mass1 = data[names.index('mass1')]
        mass2 = data[names.index('mass2')]
        data.append(get_mchirp(mass1, mass2))
        names.append('mchirp')
    # read columns into numpy recarray
    out = rec.fromarrays(data, names=map(str, names)).view(GWRecArray)
    if 'end_time' in columns:
        out.sort(order='end_time')
    if opened:
        h5f.close()
    return out

コード例 #4

0

ファイルを表示

ファイル: frame.py プロジェクト: bertrandchenal/lakota

    def is_sorted(self):
        idx_cols = list(self.schema.idx)
        if len(idx_cols) == 1:
            arr = self[idx_cols[0]]
            return all(arr[1:] >= arr[:-1])

        # Multi-column index we fallback on argsort
        arr = rec.fromarrays([self[n] for n in idx_cols], names=idx_cols)
        sort_mask = self.argsort()
        a_range = arange(len(sort_mask))
        return all(sort_mask == a_range)

コード例 #5

0

ファイルを表示

ファイル: test_numexpr.py プロジェクト: fish2000/numexpr

    def test_select(self):
        f0 = arange(10, dtype=int32)
        f1 = arange(10, dtype=float64)

        irregular = rec.fromarrays([f0, f1])

        f0 = irregular['f0']
        f1 = irregular['f1']

        i0 = evaluate('f0 < 5')
        i1 = evaluate('f1 < 5')

        assert_array_equal(f0[i0], arange(5, dtype=int32))
        assert_array_equal(f1[i1], arange(5, dtype=float64))

コード例 #6

0

ファイルを表示

ファイル: test_numexpr.py プロジェクト: GitContainer/iPython-Framework

    def test_select(self):
        f0 = arange(10, dtype=int32)
        f1 = arange(10, dtype=float64)

        irregular = rec.fromarrays([f0, f1])

        f0 = irregular['f0']
        f1 = irregular['f1']

        i0 = evaluate('f0 < 5')
        i1 = evaluate('f1 < 5')

        assert_array_equal(f0[i0], arange(5, dtype=int32))
        assert_array_equal(f1[i1], arange(5, dtype=float64))

コード例 #7

0

ファイルを表示

ファイル: fits2h5.py プロジェクト: mSedore/terra

def dict_list_to_frame(dict_list):
    df = pd.DataFrame(dict_list)
    d0 = dict( df.iloc[0] )
    goodkeys = [ k for k in d0.keys() if (type(d0[k])!=fits.card.Undefined)]
    df = df[goodkeys]

#    comb = pdplus.df_to_rec_strings(df)
    dfs = df.select_dtypes(include=['object'])
    dfns = df.select_dtypes(exclude=['object'])
    dfs = rec.fromarrays(np.array(dfs).astype('S100').T,names=list(dfs.columns))

    names = list(dfns.columns)
    arrs = [dfns[n] for n in names]
    comb = mlab.rec_append_fields(dfs,names,arrs)

    return comb

コード例 #8

0

ファイルを表示

ファイル: extrema.py プロジェクト: mirams/cgptoolbox

def extrema(x, max=True, min=True, withend=True):
    """
    Return indexes, values, and sign of curvature of local extrema of 1-d array.
    
    The boolean arguments max, min, withend determine whether to 
    include maxima and minima, and include the endpoints.
    
    Basic usage.
    
    >>> x = [2, 1, 0, 1, 2]
    >>> extrema(x)   # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    rec.array([(0, 2, -1), (2, 0, 1), (4, 2, -1)],
          dtype=[('index', '<i...'), ('value', '<i...'), ('curv', '<i...')])
    
    Options to include only certain types of extrema.
    
    >>> extrema(x, withend=False)
    rec.array([(2, 0, 1)],...
    >>> extrema(x, max=False)
    rec.array([(2, 0, 1)],...
    >>> extrema(x, min=False)
    rec.array([(0, 2, -1), (4, 2, -1)],...
    >>> extrema(x, max=False, min=False)
    rec.array([],...
    
    The beginning and end of flat segments both count as extrema, 
    except the first and last data point.
    
    >>> extrema([0, 0, 1, 1, 2, 2])
    rec.array([(1, 0, 1), (2, 1, -1), (3, 1, 1), (4, 2, -1)],...
    >>> extrema([0, 0, 0])
    rec.array([],...)
    >>> extrema([0, 0, 1, 1], withend=False)
    rec.array([(1, 0, 1), (2, 1, -1)],...
    
    @todo: Add options on how to handle flat segments.
    """
    x = squeeze(x) # ensure 1-d numpy array
    xpad = r_[x[1], x, x[-2]] # pad x so endpoints become minima or maxima
    curv = sign(diff(sign(diff(xpad)))) # +1 at minima, -1 at maxima
    i = curv.nonzero()[0] # nonzero() wraps the indices in a 1-tuple
    ext = rec.fromarrays([i, x[i], curv[i]], names=["index", "value", "curv"])
    if not withend: ext = ext[(i > 0) & (i < len(x) - 1)]
    if not max: ext = ext[ext.curv >= 0]
    if not min: ext = ext[ext.curv <= 0]
    return ext

コード例 #9

0

ファイルを表示

ファイル: plg.py プロジェクト: bshaw1975/chirp

 def torec(self):
     """
     Returns a recarray, averaging across chains as needed.
     """
     from numpy import sqrt, rec
     fields = ['time', 'p.sd', 'p.mmse']
     values = [self.tgrid, sqrt(_reduce(self.pvar)), _reduce(self.pmmse)]
     if self.nchains > 1:
         fields.append("p.mmse.sd")
         values.append(self.pmmse.std(1))
     for k, v in self.estimates.items():
         fields.append(k)
         values.append(_reduce(v))
         if v.ndim > 1:
             fields.append(k + ".sd")
             values.append(v.std(1))
     return rec.fromarrays(values, names=fields)

コード例 #10

0

ファイルを表示

ファイル: archive.py プロジェクト: berkowitze/gwsumm

def load_recarray(group):
    """Read recarray from the given HDF5 group
    """
    columns = map(str, list(group))
    # read segments
    try:
        epoch = LIGOTimeGPS(group['segments'].attrs['epoch'])
    except TypeError:
        epoch = LIGOTimeGPS(float(group['segments'].attrs['epoch']))
    segments = SegmentList(Segment(epoch + x[0], epoch + x[1]) for
                           x in group['segments'][:])
    columns.pop(columns.index('segments'))
    # read columns
    data = [group[c] for c in columns]
    # format and add
    table = rec.fromarrays(data, names=columns).view(GWRecArray)
    add_triggers(table, group.name.split('/')[-1], segments=segments)
    return table

コード例 #11

0

ファイルを表示

ファイル: frame.py プロジェクト: bertrandchenal/lakota

 def argsort(self):
     idx_cols = list(self.schema.idx)
     arr = rec.fromarrays([self[n] for n in idx_cols], names=idx_cols)
     # Mergesort is faster on pre-sorted arrays
     return argsort(arr, kind="mergesort")

コード例 #12

0

ファイルを表示

ファイル: frame.py プロジェクト: bertrandchenal/lakota

    def reduce(self, *col_list, **col_dict):
        """
        Return a new frame containing the choosen columns. A column can be
        one of the existing column or an s-expression that we be automatically
        evaluated.
        """

        # Merge all args in one dict
        columns = dict(zip(col_list, col_list))
        columns.update(col_dict)

        # Detect aggregations
        all_ast = {}
        for alias, expr in columns.items():
            if expr.startswith("("):
                all_ast[alias] = AST.parse(expr)
        agg_ast = {}
        other_ast = {}
        for alias, ast in all_ast.items():
            if ast.is_aggregate():
                agg_ast[alias] = ast
            else:
                other_ast[alias] = ast

        # Eval non-aggregated columns
        env = self.eval_env()
        non_agg = {}
        for alias, expr in columns.items():
            if alias in agg_ast:
                continue
            ast = other_ast.get(alias)
            if ast:
                arr = ast.eval(env=env)
            else:
                arr = self.columns[expr]

            if isinstance(arr, Alias):
                # un-pack alias
                arr, alias = arr.value, arr.name
            non_agg[alias] = arr

        # Early exit if we don't need to compute aggregates
        if not agg_ast:
            schema = Schema.from_frame(non_agg, idx_columns=list(non_agg))
            return Frame(schema, non_agg)

        res = {}
        if non_agg:
            # Compute binning
            records = rec.fromarrays(non_agg.values(), names=list(non_agg))
            keys, bins = unique(records, return_inverse=True)
            # Build resulting columns
            for alias in non_agg:
                arr = keys[alias]
                if isinstance(arr, Alias):
                    # un-pack alias
                    arr, alias = arr.value, arr.name
                res[alias] = arr
            env.update({"_keys": keys, "_bins": bins})

        # Compute aggregates
        for alias, expr in agg_ast.items():
            arr = expr.eval(env)
            if isinstance(arr, Alias):
                # un-pack alias
                arr, alias = arr.value, arr.name
            # Without bins, eval will return a scalar value
            res[alias] = arr if non_agg else asarray([arr])
        schema = Schema.from_frame(res, idx_columns=list(non_agg))
        return Frame(schema, res)

コード例 #13

0

ファイルを表示

from numpy import array, rec
from numpy.random import normal as nprandom
from rpy2.robjects import numpy2ri, r

foo = array(range(10))
bar = foo + nprandom(0,1,10)

d = rec.fromarrays([foo, bar], names=('foo','bar'))
print d
fit = r.lm('bar ~ foo', data=d)
print fit.rx2('coefficients')

コード例 #14

0

ファイルを表示

ファイル: globwave.py プロジェクト: regeirk/atlantis

    def read(self, var, x=None, y=None, radius=0., tlim=None, ylim=None,
        xlim=None, missions=None, sort=True, profile=True):
        """Reads dataset.
        
        PARAMETERS
            var (string) :
                Variable to be read from dataset. It also accepts
                special naming conventions in order to rename the
                original dataset variable and to load alternative
                variables in case of invalid data according to the
                syntax '[new_var_name]:var[|other_var]'.
            x, y (array like, optional) :
                List of zonal and meridional point coordinate of 
                interest.
            radius (float, optional) :
                Search radius in degrees.
            tlim, ylim, xlim  (array like, optional) :
                The temporal, meridional and zonal limits (minimum,
                maximum) for which data will be read.
            missions (array like, optional) :
                List of missions to read data from. If omitted, defaults
                available missions on dataset class intialization.
            sort (boolean optional) :
                If true, sorts the data record in order of ascendant 
                time, latitude and longitude.
            profile (boolean, optional) :
                Sets whether the status is send to screen.
        
        RETURNS
            dat (record array) :
                Record time-series of 'time', 'latitude', 'longitude', 
                selected variable and 'mission'.
        
        """
        t0 = time()
        # Checks input parameters.
        T = self.variables['time'].data
        if var.find(':') >= 0:  # Checks spetial variable syntax
            var_name, var = var.split(':')
        else:
            var_name = var
        if tlim == None:
            tlim = (T.min(), T.max())
        if (x != None) | (y != None):
            x, y = asarray(x), asarray(y)
            if x.size != y.size:
                raise ValueError('Zonal and meridional coordinate dimensions '
                    'do not match.')
            npoints = x.size
            radius2 = radius ** 2
        else:
            npoints = 0
            x = y = []
            #
            if ylim == None:
                ylim = (-90., 90.)
            if xlim == None:
                xlim = (0., 360.)
            else:
                # Make sure longitude limits are between 0 and 360.
                xlim = list(lon360(asarray(xlim)))
        if missions == None:
            missions = self.params['missions']
        
        # First we have to select which files will be loaded, which will 
        # depend on the temporal limits given in $t$.
        sel_time = flatnonzero((T >= floor(min(tlim))) & 
            (T <= ceil(max(tlim))))
        N = len(sel_time)
        
        # Second we will walk through each of the selected time in the dataset
        # and load the correspondant file for the available missions.
        t1 = time()
        if profile:
            s = '\rLoading data...'
            stdout.write(s)
            stdout.flush()
        # Reset important variables
        TIME, LAT, LON, VAR, MISSION = [array([])] * 5
        #
        for i, tm in enumerate(T[sel_time]):
            t2 = time()
            for (mission, dset, fname, cycle,
                orbit) in self.attributes['time_dataset'][tm]:
                # Skips mission not in missions list.
                if mission not in missions:
                    continue
                # Uncompresses gzipped file and opens NetCDF instance.
                data = self.read_file('%s/%s/%s' % (self.params['path'], 
                    mission, fname))
                # Reads variable from NetCDF file.
                raw_time = self.read_variable(data, 'time')
                raw_lat = self.read_variable(data, 'lat')
                raw_lon = self.read_variable(data, 'lon')
                raw_dat = self.read_variable(data, var)
                # Select relevant data range according to limit parameters
                sel_from_time = (
                    (raw_time >= min(tlim)) & (raw_time <= max(tlim))
                )
                if (ylim != None) | (xlim !=None):
                    sel_from_limits = ones(data.dimensions['time'], dtype=bool)
                else:
                    sel_from_limits = zeros(data.dimensions['time'],
                        dtype=bool)
                if ylim != None:
                    sel_from_limits = (sel_from_limits & 
                        ((raw_lat >= min(ylim)) & (raw_lat <= max(ylim))))
                if xlim != None:
                    sel_from_limits = (sel_from_limits & 
                        ((raw_lon >= min(xlim)) & (raw_lon <= max(xlim))))
                # Select relevant data according to points and search radius.
                sel_from_radius =  zeros(data.dimensions['time'], dtype=bool)
                for xx, yy in zip(x, y):
                    distance2 = ((raw_lat - yy) ** 2 + 
                        (raw_lon - lon360(xx)) ** 2)
                    sel_from_radius = sel_from_radius | (distance2 <= radius2)
                #
                sel_data = flatnonzero(sel_from_time & 
                    (sel_from_limits | sel_from_radius) & (~isnan(raw_dat)))
                _time = raw_time[sel_data]
                _lat = raw_lat[sel_data]
                _lon = raw_lon[sel_data]
                _dat = raw_dat[sel_data]
                #
                TIME = append(TIME, _time)
                LAT = append(LAT, _lat)
                LON = append(LON, _lon)
                VAR = append(VAR, _dat)
                MISSION = append(MISSION, [mission] * len(sel_data))
                #
                self.close_file(data)
            #
            # Profiling
            if profile:
                s = '\rLoading data... %s ' % (profiler(N, i+1, t0, t1, t2),)
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()

        # Converts the data a structured array
        DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION), 
            dtype=[('time', float64), ('latitude', float64), 
            ('longitude', float64), (var_name, float64), ('mission', '|S3')])
        
        # Some data sorting?
        if sort:
            DAT.sort(order=('time', 'latitude', 'longitude'), axis=0)
        
        return DAT

コード例 #15

0

ファイルを表示

ファイル: globwave.py プロジェクト: regeirk/atlantis

    def read(self,
             var,
             x=None,
             y=None,
             radius=0.,
             tlim=None,
             ylim=None,
             xlim=None,
             missions=None,
             sort=True,
             profile=True):
        """Reads dataset.
        
        PARAMETERS
            var (string) :
                Variable to be read from dataset. It also accepts
                special naming conventions in order to rename the
                original dataset variable and to load alternative
                variables in case of invalid data according to the
                syntax '[new_var_name]:var[|other_var]'.
            x, y (array like, optional) :
                List of zonal and meridional point coordinate of 
                interest.
            radius (float, optional) :
                Search radius in degrees.
            tlim, ylim, xlim  (array like, optional) :
                The temporal, meridional and zonal limits (minimum,
                maximum) for which data will be read.
            missions (array like, optional) :
                List of missions to read data from. If omitted, defaults
                available missions on dataset class intialization.
            sort (boolean optional) :
                If true, sorts the data record in order of ascendant 
                time, latitude and longitude.
            profile (boolean, optional) :
                Sets whether the status is send to screen.
        
        RETURNS
            dat (record array) :
                Record time-series of 'time', 'latitude', 'longitude', 
                selected variable and 'mission'.
        
        """
        t0 = time()
        # Checks input parameters.
        T = self.variables['time'].data
        if var.find(':') >= 0:  # Checks spetial variable syntax
            var_name, var = var.split(':')
        else:
            var_name = var
        if tlim == None:
            tlim = (T.min(), T.max())
        if (x != None) | (y != None):
            x, y = asarray(x), asarray(y)
            if x.size != y.size:
                raise ValueError('Zonal and meridional coordinate dimensions '
                                 'do not match.')
            npoints = x.size
            radius2 = radius**2
        else:
            npoints = 0
            x = y = []
            #
            if ylim == None:
                ylim = (-90., 90.)
            if xlim == None:
                xlim = (0., 360.)
            else:
                # Make sure longitude limits are between 0 and 360.
                xlim = list(lon360(asarray(xlim)))
        if missions == None:
            missions = self.params['missions']

        # First we have to select which files will be loaded, which will
        # depend on the temporal limits given in $t$.
        sel_time = flatnonzero((T >= floor(min(tlim)))
                               & (T <= ceil(max(tlim))))
        N = len(sel_time)

        # Second we will walk through each of the selected time in the dataset
        # and load the correspondant file for the available missions.
        t1 = time()
        if profile:
            s = '\rLoading data...'
            stdout.write(s)
            stdout.flush()
        # Reset important variables
        TIME, LAT, LON, VAR, MISSION = [array([])] * 5
        #
        for i, tm in enumerate(T[sel_time]):
            t2 = time()
            for (mission, dset, fname, cycle,
                 orbit) in self.attributes['time_dataset'][tm]:
                # Skips mission not in missions list.
                if mission not in missions:
                    continue
                # Uncompresses gzipped file and opens NetCDF instance.
                data = self.read_file('%s/%s/%s' %
                                      (self.params['path'], mission, fname))
                # Reads variable from NetCDF file.
                raw_time = self.read_variable(data, 'time')
                raw_lat = self.read_variable(data, 'lat')
                raw_lon = self.read_variable(data, 'lon')
                raw_dat = self.read_variable(data, var)
                # Select relevant data range according to limit parameters
                sel_from_time = ((raw_time >= min(tlim)) &
                                 (raw_time <= max(tlim)))
                if (ylim != None) | (xlim != None):
                    sel_from_limits = ones(data.dimensions['time'], dtype=bool)
                else:
                    sel_from_limits = zeros(data.dimensions['time'],
                                            dtype=bool)
                if ylim != None:
                    sel_from_limits = (sel_from_limits &
                                       ((raw_lat >= min(ylim)) &
                                        (raw_lat <= max(ylim))))
                if xlim != None:
                    sel_from_limits = (sel_from_limits &
                                       ((raw_lon >= min(xlim)) &
                                        (raw_lon <= max(xlim))))
                # Select relevant data according to points and search radius.
                sel_from_radius = zeros(data.dimensions['time'], dtype=bool)
                for xx, yy in zip(x, y):
                    distance2 = ((raw_lat - yy)**2 + (raw_lon - lon360(xx))**2)
                    sel_from_radius = sel_from_radius | (distance2 <= radius2)
                #
                sel_data = flatnonzero(sel_from_time
                                       & (sel_from_limits | sel_from_radius)
                                       & (~isnan(raw_dat)))
                _time = raw_time[sel_data]
                _lat = raw_lat[sel_data]
                _lon = raw_lon[sel_data]
                _dat = raw_dat[sel_data]
                #
                TIME = append(TIME, _time)
                LAT = append(LAT, _lat)
                LON = append(LON, _lon)
                VAR = append(VAR, _dat)
                MISSION = append(MISSION, [mission] * len(sel_data))
                #
                self.close_file(data)
            #
            # Profiling
            if profile:
                s = '\rLoading data... %s ' % (profiler(N, i + 1, t0, t1,
                                                        t2), )
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()

        # Converts the data a structured array
        DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION),
                             dtype=[('time', float64), ('latitude', float64),
                                    ('longitude', float64),
                                    (var_name, float64), ('mission', '|S3')])

        # Some data sorting?
        if sort:
            DAT.sort(order=('time', 'latitude', 'longitude'), axis=0)

        return DAT

コード例 #16

0

ファイルを表示

ファイル: aviso_atg.py プロジェクト: regeirk/atlantis

    def read(self, x=None, y=None, radius=0., tlim=None, ylim=None, xlim=None,
            missions=None, sort=True, profile=True):
        """Reads dataset.
        
        PARAMETERS
            x, y (array like, optional) :
                List of zonal and meridional point coordinate of 
                interest.
            radius (float, optional) :
                Search radius in degrees.
            tlim, ylim, xlim  (array like, optional) :
                The temporal, meridional and zonal limits (minimum,
                maximum) for which data will be read.
            missions (array like, optional) :
                List of missions to read data from. If omitted, defaults
                available missions on dataset class intialization.
            sort (boolean optional) :
                If true, sorts the data record in order of ascendant 
                time, latitude and longitude.
            profile (boolean, optional) :
                Sets whether the status is send to screen.
        
        RETURNS
            dat (record array) :
                Record time-series of 'time', 'latitude', 'longitude', 
                selected variable and 'mission'.
        
        """
        t0 = time()
        # Checks input parameters.
        T = self.variables['time'].data
        if tlim == None:
            tlim = (T.min(), T.max())
        if (x != None) | (y != None):
            x, y = asarray(x), asarray(y)
            if x.size != y.size:
                raise ValueError('Zonal and meridional coordinate dimensions '
                    'do not match.')
            npoints = x.size
            radius2 = radius ** 2
        else:
            npoints = 0
            x = y = []
            #
            if ylim == None:
                ylim = (-90., 90.)
            if xlim == None:
                xlim = (0., 360.)
            else:
                # Make sure longitude limits are between 0 and 360.
                xlim = list(lon360(asarray(xlim)))
        if missions == None:
            missions = self.params['missions']
        
        # Aviso uses time in days since 1950-01-01 00:00:00 UTC, therefore
        # we have to calculate the initial time in matplotlib's format. We
        # also have to determine the proper variable using product name.
        T0 = dates.datestr2num('1950-01-01 00:00:00 UTC')
        var = self.params['product'].upper()
        
        # First we have to select which files will be loaded, which will 
        # depend on the temporal limits given in $t$.
        sel_time = flatnonzero((T >= floor(min(tlim))) & 
            (T <= ceil(max(tlim))))
        N = len(sel_time)
        
        # Second we will walk through each of the selected time in the dataset
        # and load the correspondant file for the available missions.
        t1 = time()
        if profile:
            s = '\rLoading data...'
            stdout.write(s)
            stdout.flush()
        # Reset important variables
        TIME, LAT, LON, VAR, MISSION = [array([])] * 5
        #
        for i, tm in enumerate(T[sel_time]):
            t2 = time()
            for (mission, fname) in self.attributes['time_dataset'][tm]:
                # Skips mission not in missions list.
                if mission not in missions:
                    continue
                # Uncompresses gzipped file and opens NetCDF instance.
                data = self.read_file('%s/%s/%s' % (self.params['path'], 
                    mission, fname))
                # Retrieve the scale factor for each variable
                scale_lat = data.variables['latitude'].scale_factor
                scale_lon = data.variables['latitude'].scale_factor
                scale_dat = data.variables[var].scale_factor
                # Get the raw time, latitude and longitude
                raw_time = data.variables['time'].data + T0
                raw_lat = data.variables['latitude'].data * scale_lat
                raw_lon = data.variables['longitude'].data * scale_lon
                # Select relevant data range according to limit parameters
                sel_from_time = (
                    (raw_time >= min(tlim)) & (raw_time <= max(tlim))
                )
                sel_from_limits = zeros(data.dimensions['time'], dtype=bool)
                if ylim != None:
                    sel_from_limits = (sel_from_limits | 
                        ((raw_lat >= min(ylim)) & (raw_lat <= max(ylim))))
                if xlim != None:
                    sel_from_limits = (sel_from_limits | 
                        ((raw_lon >= min(xlim)) & (raw_lon <= max(xlim))))
                # Select relevant data according to points and search radius.
                sel_from_radius =  zeros(data.dimensions['time'], dtype=bool)
                for xx, yy in zip(x, y):
                    distance2 = ((raw_lat - yy) ** 2 + 
                        (raw_lon - lon360(xx)) ** 2)
                    sel_from_radius = sel_from_radius | (distance2 <= radius2)
                #
                sel_data = flatnonzero(sel_from_time & 
                    (sel_from_limits | sel_from_radius))
                _time = raw_time[sel_data]
                _lat = raw_lat[sel_data]
                _lon = raw_lon[sel_data]
                _dat = data.variables[var].data[sel_data] * scale_dat
                #
                TIME = append(TIME, _time)
                LAT = append(LAT, _lat)
                LON = append(LON, _lon)
                VAR = append(VAR, _dat)
                MISSION = append(MISSION, [mission] * len(sel_data))
                #
                self.close_file(data)
            #
            # Profiling
            if profile:
                s = '\rLoading data... %s ' % (profiler(N, i+1, t0, t1, t2),)
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()

        # Converts the data a structured array
        DAT = rec.fromarrays((TIME, LAT, LON, VAR, MISSION), 
            dtype=[('time', float64), ('latitude', float64), 
            ('longitude', float64), (self.params['product'], float64), 
            ('mission', '|S3')])
        #DAT = hstack((TIME[:, None], LAT[:, None], LON[:, None], 
        #    VAR[:, None], MISSION[:, None])).view(dtype=[('time', float64), 
        #    ('latitude', float64), ('longitude', float64), 
        #    (self.params['product'], float64), ('mission', '|S3')])
        
        # Some data sorting?
        if sort:
            DAT.sort(order=('time', 'latitude', 'longitude'), axis=0)
        
        return DAT