Esempio n. 1
0
def _retrieve_ts(book, selection, time_column, **args):
    """ private helper funcs to retrieve mixed its 
    from excel.
    """
    rvar = _parse_range(selection)
    sheet = book.Worksheets(rvar[0])

    if "header_labels" in args.keys():
        header_labels = args["header_labels"]
    else:
        header_labels = None

    (header_lst,data_start_row)=_retrieve_headers_by_tryerror\
                                 (sheet,rvar,header_labels)

    data_range = sheet.Range(rvar[1] + str(data_start_row) + ":" + rvar[3] +
                             str(data_start_row))
    ts_time_data = data_range.Value
    num_ts = len(ts_time_data[0]) / 2
    tsl = []

    t1 = sheet.Range(rvar[1] + str(data_start_row))
    col = t1.Columns[0].Column
    t2 = sheet.Cells(rvar[4], col)
    d1 = sheet.Cells(data_start_row, col + 1)
    d2 = sheet.Cells(rvar[4], col + 1)

    for i in range(0, num_ts):
        ts_time = sheet.Range(t1, t2).Value
        ts_data = sheet.Range(d1, d2).Value
        ts_time = array(ts_time).flatten()
        ts_data = array(ts_data).flatten()

        ## removing possilbe trailing none
        ts_time = _strip_ending_none(ts_time)
        ts_data = ts_data[0:len(ts_time)]

        ts_time = map(_pytime2datetime, ts_time)

        if header_lst:
            ts = its(ts_time, ts_data, header_lst[i])
        else:
            ts = its(ts_time, ts_data)
        tsl.append(ts)
        col = col + 2
        t1 = sheet.Cells(data_start_row, col)
        t2 = sheet.Cells(rvar[4], col)
        d1 = sheet.Cells(data_start_row, col + 1)
        d2 = sheet.Cells(rvar[4], col + 1)

    if len(tsl) == 1:
        return tsl[0]
    else:
        return tsl
Esempio n. 2
0
def interpolate_ts(ts, times, method=LINEAR, filter_nan=True, **dic):
    """ Interpolate a time series to a new time sequence.

    Parameters
    -----------
    ts : :class:`~vtools.data.timeseries.TimeSeries`
        Series to interpolate. Must has data of one dimension, regular or irregular.
    times : :ref:`time_interval <time_intervals>`  or :ref:`time_sequence <time_sequence>`
        The new times to which the series will be interpolated. Can also be a string that can be parsed into a time interval. 
    method : string, optional
        See :func:`interpolate_ts_nan` for a list of methods
    filter_nan : boolean, optional
        True if nan should be omitted or not. If retained, 
        nan values in the source series will be used in interpolation algorithm, which may cause new nan points in resulting ts.
    **dic : dictonary
        Extra parameters.

    Returns
    -------
    result : :class:`~vtools.data.timeseries.TimeSeries`
        A regular or irregular series with times based on times and values interpolated from ts.
        
    See Also
    --------
    interpolate_ts_nan : Fill internal nan values using interpolation
    
    """
    from vtools.data.api import TimeSeries
    if type(times) == TimeSeries:
        if times.start < ts.start or times.end > ts.end:
            raise ValueError(
                "Time series used to provide requested interpolation times is outside the original series"
            )
        interval = times.interval if times.is_regular() else None
        seq = times.times
        data=_interpolate_ts2array(ts,seq,method=method,\
                                  filter_nan=filter_nan,**dic)
        if interval:
            return rts(data, seq[0], interval)
        else:
            return its(times, data)
    elif not (type(times) == str) and isSequenceType(times):
        data=_interpolate_ts2array(ts,times,method=method,\
                                   filter_nan=filter_nan,**dic)
        ts = its(times, data, {})
    else:
        # interval
        ts=_interpolate2rts(ts,times,method=method,\
                            filter_nan=filter_nan,**dic)
    return ts
Esempio n. 3
0
    def test_save_data(self):
        ## save some ts into dss file, ts may contain
        ## header.

        ## save rts first.
        data = range(1000)
        start = "12/21/2000 2:00"
        interval = "1hour"
        prop = {}
        prop[TIMESTAMP] = PERIOD_START
        prop[AGGREGATION] = MEAN

        prop["datum"] = "NGVD88"
        prop["manager"] = "John Doe"
        prop["model"] = "hydro 7.5"

        rt1 = rts(data, start, interval, prop)

        id = "vtools.datastore.dss.DssService"
        path = "/TEST/DOWNSTREAM/EC//1HOUR/STAGE/"
        source = self.test_file_path
        data_ref = DataReference(id, source=source, selector=path)
        self.dss_service.add_data(data_ref, rt1)
        dssc = self.dss_service.get_catalog(source)
        path = "/TEST/DOWNSTREAM/EC//1HOUR/STAGE/"
        data_ref = dssc.data_references(path).next()
        rtt = self.dss_service.get_data(data_ref)
        self.assertTrue(len(rtt) == len(data))
        self.assertTrue(rtt.props[TIMESTAMP] == PERIOD_START)
        self.assertTrue(rtt.props[AGGREGATION] == MEAN)
        self.assertTrue(rtt.times[0], dtm.datetime(2000, 12, 21, 2))
        extent = "time_window=(12/21/2000 02:00,01/31/2001 18:00)"
        data_ref = DataReference(id, source, None, path, extent)
        rtt2 = self.dss_service.get_data(data_ref)
        self.assertTrue(rtt.start == rtt2.start)
        self.assertTrue(rtt.end == rtt2.end)

        ## then its.

        path = "/HERE/IS/ITS//IR-YEAR/TEST/"
        data = range(20)
        data_ref = DataReference(id, source=source, selector=path)
        prop[AGGREGATION] = INDIVIDUAL

        times=["01/15/1997","02/17/1997","03/5/1997",\
               "04/25/1997","05/1/1997","06/15/1997",\
               "07/25/1997","08/14/1997","09/17/1997",\
               "10/15/1997","11/21/1997","12/3/1997",\
               "01/9/1998","02/15/1998","03/19/1998",\
               "04/15/1998","05/19/1998","06/30/1998",\
               "07/15/1998","08/24/1998"]

        times = map(parse_time, times)
        itt = its(times, data, prop)
        self.dss_service.add_data(data_ref, itt)
        extent = "time_window=(1/10/1997 02:00,09/30/1998 18:00)"
        data_ref = DataReference(id, source, None, path, extent)
        rtt3 = self.dss_service.get_data(data_ref)
        self.assertTrue(parse_time("01/15/1997") == rtt3.start)
        self.assertTrue(parse_time("08/24/1998") == rtt3.end)
Esempio n. 4
0
def dss_its_to_ts(data,jbdate,itimes,prop,flags):
    """ Convert dss irregular time series into TimeSeries class.
    data:   list of raw data returned by zrits or zritsx
    jbadte: integer dss base date returned by zrits or zritsx
    itimes: offset list in minutes from base date for each time point
            it is also generated by zritx or zritsx
    prop:   dict save info get from dss func
    flags:  list of data quality flag returned by zritx or zritsx
    """

    if len(itimes)<len(data):
        raise ValueError("lenght of input itimes must not be less than data")

    (data,start_i)=_validate_dss_data_series(data,flags)
    itimes=itimes[start_i:start_i+len(data)]
    
    ts=None
    new_times=[]
    
    for itime in itimes:
        a_time=_dss_julian_datetime_to_python_datetime(jbdate,itime)
        new_times.append(a_time)
        
    new_times.sort()
    
    prop[TIMESTAMP]=INST
    prop[AGGREGATION]=INDIVIDUAL   
    ts= its(new_times[start_i:start_i+len(data)],data,prop)    
    return ts
Esempio n. 5
0
def ts_split(ts, shared=True):
    """ Splits a 2D multivariate series into constituent univariate series.

    Parameters
    ----------
    ts : :class:`~vtools.data.timeseries.TimeSeries`
    shared :: Boolean
         Return time sereis share or copy data array of input one
    
    Returns
    -------    
    out1,out2 : :class:`~vtools.data.timeseries.TimeSeries`
        Two comonent time series.     
    
    """
    if ts.data.ndim > 2: raise ValueError("Only 2D arrays can be split")
    if shared:
        dsource = ts.data
    else:
        dsource = ts.data.copy()

    ncol = dsource.shape[1]
    out = []
    for jcol in range(ncol):
        if ts.is_regular():
            colts = rts(dsource[:, jcol], ts.start, ts.interval)
        else:
            colts = its(ts.ticks, dsource[:, jcol])
        out.append(colts)

    return tuple(out)
Esempio n. 6
0
    def make_depth_average(self, time_basis):
        """ Make depth averaged values from numpy array of outputs

            Parameters
            ----------
            time_basis: datetime.datetime
                time base of the outputs

            Returns
            -------
            lists of a set vtools.data.timeseries of depth and depth-averaged values
                For scalars, the list has only one set of time series.
                For vectors, the list has two sets of time series.
                Each time series has multiple columns of data, and each column
                is for stations.
        """
        nvrt = self._nvrt
        n_casts = self._n_casts
        # collect time stamp first
        times = list()
        output = self._outputs[0]
        for cast_i in range(n_casts):
            i_begin = cast_i * nvrt
            time = time_basis + time_interval(days=output[i_begin, 0])
            times.append(time)
        # collect data
        values = list()
        for output in self._outputs:
            values_at_point = list()
            depths_at_point = list()
            for xy_i in range(self._nxy):
                depths_at_cast = list()
                values_at_cast = list()
                for cast_i in range(n_casts):
                    i_begin = cast_i * nvrt + xy_i * (n_casts * nvrt)
                    depth = -output[i_begin + nvrt - 1, 2]
                    x = -output[i_begin:i_begin + nvrt, 2]
                    y = output[i_begin:i_begin + nvrt, 1]
                    avg = scipy.integrate.simps(y, x) / depth
                    depths_at_cast.append(depth)
                    values_at_cast.append(avg)
                depths_at_point.append(depths_at_cast)
                values_at_point.append(values_at_cast)
            ts_depths = its(times, numpy.array(depths_at_point).transpose())
            ts_values = its(times, numpy.array(values_at_point).transpose())
            values.append((ts_depths, ts_values))
        return values
Esempio n. 7
0
def _bind(ts1, ts2):
    """ bind data from timeseries ts1 and ts2.
    
    Parameters
    ----------
    ts1,ts2 : :class:`~vtools.data.timeseries.TimeSeries`
        Two  timeseries

    Returns
    -------    
    merged : :class:`~vtools.data.timeseries.TimeSeries`
        A new binded time series if success. 

    """

    if (not ((ts1.data.ndim == 1) and (ts2.data.ndim == 1))):
        raise ValueError("bind only support time series of univariate")
    ts = None
    ts_is_regular = False
    new_ts_time_sequence = []
    new_start = None
    new_interval = None
    if ((ts1.is_regular()) and (ts2.is_regular())):
        ts1_start = ts1.times[0]
        ts1_end = ts1.times[-1]
        ts2_start = ts2.times[0]
        ts2_end = ts2.times[-1]
        new_start = ts1_start
        if new_start > ts2_start:
            new_start = ts2_start
        new_end = ts1_end
        if new_end < ts2_end:
            new_end = ts2_end
        new_interval = ts1.interval
        ts2_interval = ts2.interval
        if new_interval > ts2_interval:
            new_interval = ts2_interval
        num_data = number_intervals(new_start, new_end, new_interval) + 1
        new_ts_time_sequence = time_sequence(new_start, new_interval, num_data)
        ts_is_regular = True
    else:
        new_ts_time_sequence = np.union1d(ts1.ticks, ts2.ticks)

    new_ts_len = len(new_ts_time_sequence)
    new_data = np.array([[np.nan] * new_ts_len, [np.nan] * new_ts_len])

    ts1_data_id = np.searchsorted(new_ts_time_sequence, ts1.ticks)
    ts2_data_id = np.searchsorted(new_ts_time_sequence, ts2.ticks)
    new_data[0, ts1_data_id] = ts1.data
    new_data[1, ts2_data_id] = ts2.data

    new_data = new_data.transpose()

    if ts_is_regular:
        ts = rts(new_data, new_start, new_interval)
    else:
        ts = its(new_ts_time_sequence, new_data)

    return ts
Esempio n. 8
0
    def test_shift_operation_its(self):
        ts_len = 1000
        data = repeat(10.0, ts_len)
        ts_start = datetime(year=1990, month=2, day=3, hour=11, minute=15)
        times = [ts_start] * ts_len

        for i in range(1, ts_len):
            times[i] = times[i - 1] + time_interval(hours=i % 5)

        ts0 = its(times, data, {})
        test_input = [hours(1), months(1), days(3), years(1)]

        for shift_interval in test_input:
            ts = its(times, data)
            ts = shift(ts, shift_interval)
            t = ts0.times + shift_interval
            for a1, b1 in zip(t, ts.times):
                self.assertEqual(a1, b1)
Esempio n. 9
0
    def test_bind_op_irregular(self):
        """ Test behaviour of bind operation on irregular TS."""
        times=[12,15,32,38,43,52,84,138,161,172]
        #times=sciadd.accumulate(times)
        start_datetime = parse_time("1996-2-1")
        start_ticks = ticks(start_datetime)
        times=scimultiply(times,ticks_per_minute)
        times=sciadd(times,start_ticks)
        data=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
        ts1=its(times,data,{})
        ts2=its(times,data,{})
 
        new_ts = ts_bind(ts1,ts2)
        self.assertEqual(len(new_ts),len(ts1))
        self.assertEqual(new_ts.start,ts1.start)
       
        for (d1,d2),d  in zip(new_ts.data,data):
            self.assertEqual(d1,d)
            self.assertEqual(d2,d)
Esempio n. 10
0
def ts_accumulate(ts, ufunc):
    """Apply ufunc.accumulate to the data and produce a neatened time series
       The function will be applied cumulatively to the data.
       So... ts_apply(ts, add) will produce a time series, where each entry 
       is the cumulative sum up to that index.
    """
    if ts.is_regular():
        return rts(ufunc.accumulate(ts.data), ts.start, ts.interval, ts.props)
    else:
        return its(ts.ticks, ufunc.accumulate(ts.data), ts.props)
Esempio n. 11
0
    def create_its(self):
        """ Only create a irregular time series for usage of testing.
        """

        times = self.create_irregular_timesequence(2000, 1)
        self.its1_years = 2
        num = len(times)
        data=[random.uniform(self.min_val,self.max_val) \
              for k in range(num)]
        ts = its(times, data, {})
        return ts
Esempio n. 12
0
    def _retrieve_irregularTS(self, data_ref, overlap=None):
        """ Retrieve irregular time sereis referenced by data_re.
            An instance of class TimeSereis is returned. 
        """

        path = data_ref.selector
        dss_file_path = data_ref.source
        dssf = open_dss(dss_file_path)

        juls, jule, istime, ietime = self._gen_its_jultime(data_ref)
        # Max number of vals can be retrived one time, this consant is defined in dss_constants.py
        kval = DSS_MAX_ITS_POINTS
        lflags = True
        kheadu = DSS_MAX_HEADER_ITEMS
        if (overlap == (0, 0)) or (overlap is None):
            inflag = int(0)
        elif (overlap == (1, 1)):
            inflag = int(3)
        elif (overlap == (1, 0)):
            inflag = int(1)
        else:
            inflag = int(2)

        (itimes,data,nval,jbdate,flags,lfread,cunits,ctype,headu,nheadu,istat)= \
        dssf.zritsx(path,juls,istime,jule,ietime,kval,lflags,kheadu,inflag)
        del dssf
        ## add those props.
        prop = {}
        #prop[CTYPE]=ctype
        prop[UNIT] = cunits

        if istat == 5:
            raise DssAccessError("no record of %s is found" % path)
        if istat > 5:
            raise DssAccessError("error in access data of %s" % path)
        if istat == 4:  ##no data found,return a 0 len its
            return its([], [], prop)

        if (nval == kval):
            message ="Input time window contains more data than equals or exceeds the size of the cache used to retrieve "\
            "irregular time series at path %s. You may try to increase the size of irregular time series "\
            "cache by changing the value of DSS_MAX_ITS_POINTS defined in the file dss_constants.py under vtools\dastastore\dss." % path
            raise ValueError(message)

        if nheadu > 0:
            hdic = self._unstuff_header(headu, nheadu, 2)
            for key in hdic.keys():
                if not ((key == UNIT) or (key == CTYPE)):
                    prop[key] = hdic[key]

        data = dss_its_to_ts(data, jbdate, itimes, prop, flags)
        return data
Esempio n. 13
0
def ts_apply(ts, ufunc, other=None):
    """Apply a numpy ufunc to the data and produce a neatened time series
       The function will be applied pointwise to the data.
       The argument other must be a scalar and serves as the
       second argument to a binary operator or function.
       
       So... ts_apply(ts, add, 5) will add 5 to every data member 
    """
    if not (other is None):
        ndata = ufunc(ts.data, other)
    else:
        ndata = ufunc(ts.data)
    if ts.is_regular():
        return rts(ndata, ts.start, ts.interval, ts.props)
    else:
        return its(ts.ticks, ndata, ts.props)
Esempio n. 14
0
    def test_flat(self):
        """ Test the behavior of flat interpolation. """

        data = [sin(i * 3.14 / 3) for i in range(20)]
        times = [
            1, 3, 7, 14, 19, 29, 35, 41, 42, 44, 60, 61, 67, 69, 72, 76, 77,
            79, 89, 90
        ]
        data[4] = nan
        data[5] = nan
        ts = its(times, data)

        ## Interpolation time locations.
        st = [2, 9, 19, 20, 29, 34, 50, 72.4]

        for func in [nearest_neighbor, previous_neighbor, next_neighbor]:
            irt = func(ts, st)
            for vv in irt.data:
                self.assertTrue(vv in ts.data)
Esempio n. 15
0
 def test_split_op_irregular(self):
     """ Test behaviour of split operation on irregular TS."""
     times=[12,15,32,38,43,52,84,138,161,172]
     #times=sciadd.accumulate(times)
     start_datetime = parse_time("1996-2-1")
     start_ticks = ticks(start_datetime)
     times=scimultiply(times,ticks_per_minute)
     times=sciadd(times,start_ticks)
     data1=sciarray([1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0])
     data2=sciarray([7.0,1.2,10.5,3.0,1.0,1.0,9.0,3.0,0.0,0.2])
     
     ts = its(times,sciarray([data1,data2]).transpose())
     
     ts1,ts2 =ts_split(ts,False)
        
     
     for d1,d2 in zip(ts.data[:,0],ts1.data):
         self.assertEqual(d1,d2)
     for d1,d2 in zip(ts.data[:,1],ts2.data):
         self.assertEqual(d1,d2)
     ts1.data[5] = -9999.0
     ts2.data[2] = -9999.0    
     self.assertNotEqual(ts1.data[5],ts.data[5,0])
     self.assertNotEqual(ts2.data[2],ts.data[2,1])
     
     for t1,t2 in zip(ts1.times,ts.times):
        self.assertEqual(t1,t2)
     for t1,t2 in zip(ts2.times,ts.times):
        self.assertEqual(t1,t2)
        
     ts1,ts2 =ts_split(ts,True)
     ts1.data[5] = -9999.0
     ts2.data[2] = -9999.0
 
     for d1,d2 in zip(ts.data[:,0],ts1.data):
         self.assertEqual(d1,d2)
     for d1,d2 in zip(ts.data[:,1],ts2.data):
         self.assertEqual(d1,d2)
    
     for t1,t2 in zip(ts1.times,ts.times):
        self.assertEqual(t1,t2)
     for t1,t2 in zip(ts2.times,ts.times):
        self.assertEqual(t1,t2)
Esempio n. 16
0
def shift(ts, interval, copy_data=True):
    """ Shift entire time series by a given interval

    Parameters
    ----------
    ts : :class:`~vtools.data.timeseries.TimeSeries`
        A regular timeseries to be shifted.

    interval : :ref:`time_interval <time_intervals>`
        Interval of the shifting.

    copy_data : boolean,optional
            If True, the result is an entirely new series with deep copy of all data and properties. Otherwise, it will share data and properties.

    Returns
    -------
    shifted : :class:`~vtools.data.timeseries.TimeSeries`
        A new time series with shifted times.

    """

    if copy_data:
        new_data = numpy.copy(ts.data)
        new_props = deepcopy(ts.props)
    else:
        new_data = ts.data
        new_props = ts.props

    if not is_interval(interval):
        interval = parse_interval(interval)

    if ts.is_regular():
        return rts(new_data, increment(ts.start, interval, 1),
                   ts.interval, new_props)
    else:
        if is_calendar_dependent(interval):
            tms = ts.times + interval
        else:
            tms = ts.ticks + ticks(interval)
            # ts._ticks=scipy.array(map(ticks,time_op(ts.times,interval)))
        return its(tms, new_data, new_props)
Esempio n. 17
0
    def test_save_its_data(self):
        """ test adding a its to source."""

        ## save some data.
        dssfile_path=self.dss_file_path
        path="/HERE/IS/ITS//IR-YEAR/TEST/"
        data=[1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0, \
              10.0,11.0,12.0,13.0,14.5,15.1,16.8,\
              17.2,14.2,19.2,20.0]
       
        times=["01/15/1997","02/17/1997","03/5/1997",\
               "04/25/1997","05/1/1997","06/15/1997",\
               "07/25/1997","08/14/1997","09/17/1997",\
               "10/15/1997","11/21/1997","12/3/1997",\
               "01/9/1998","02/15/1998","03/19/1998",\
               "04/15/1998","05/19/1998","06/30/1998",\
               "07/15/1998","08/24/1998"]
        times =map(parse,times)
        props={}
        ts = its(times,data,props=props)
        dss_store_ts(ts,dssfile_path,path)
Esempio n. 18
0
    def test_retrievesave_longits(self):
        ## save some ts into dss file, ts may contain
        ## header.

        ## create rts first, and use it ticks and
        ## data to create its
        total_len = 700000
        its_len = 40000  ## 80000 will cause zsitx fail,for dss interal
        ## cache size is 10000. data size
        ## close to this number will cause dss lib abort
        data = range(total_len)
        start = "12/21/1940 2:00"
        interval = "1hour"
        prop = {}
        prop[TIMESTAMP] = INST
        prop[AGGREGATION] = INDIVIDUAL

        prop["datum"] = "NGVD88"
        prop["manager"] = "John Doe"
        prop["model"] = "hydro 7.5"

        rt1 = rts(data, start, interval, prop)

        id = "vtools.datastore.dss.DssService"
        path = "/TEST/DOWNSTREAM/EC//1HOUR/SRT/"
        source = self.test_file_path

        data_ref = DataReference(id, source=source, selector=path)
        self.dss_service.add_data(data_ref, rt1)

        path = "/TEST/DOWNSTREAM/EC//IR-DAY/STAGE/"
        data_ref = DataReference(id, source=source, selector=path)
        i = 0
        start = i * its_len
        end = (i + 1) * its_len
        it1 = its(data[start:end], rt1.ticks[start:end], prop)
        self.dss_service.add_data(data_ref, it1)
Esempio n. 19
0
    def test_interpolate_rts_to_larger_dt(self):
        """ Test interpolating a fine ts to a coarser one
        
        """
        import numpy as np
        import datetime as dt
        ts = rts(np.arange(100.), dt.datetime(2000, 1, 1, 0, 6), minutes(6))
        ts1 = interpolate_ts(ts, hours(1), method=LINEAR)
        self.assertTrue(ts[9].value == ts1[0].value)
        ts2 = interpolate_ts(ts, ts1, method=LINEAR)
        self.assertTrue(ts1[3].value == ts2[3].value)
        its1 = its(ts1.times, ts1.data)
        ts3 = interpolate_ts(ts, its1, method=LINEAR)
        self.assertTrue(ts1[3].value == ts3[3].value)

        ts = rts(np.arange(98.), dt.datetime(2000, 1, 1, 0, 3), minutes(6))
        ts2 = interpolate_ts(ts, hours(1), method=LINEAR)
        self.assertTrue(ts2[0].value == 9.5)
        #self.assertTrue(rt.is_regular())
        #self.assertEqual(rt.interval,parse_interval(interval))

        ts_too_wide = rts(np.arange(120.), dt.datetime(1999, 12, 31, 23),
                          hours(1))
        self.assertRaises(ValueError, interpolate_ts, ts, ts_too_wide)
Esempio n. 20
0
    def read(self,
             fpath,
             start=None,
             end=None,
             force_regular=True,
             selector=None):
        """ Read a text file with the given pattern and parsers.
            Parsers and a pattern must be defined and set in the child class.

            Parameters
            ----------
            fpath: str
                file to read
            start: datetime.datetime, optional
                datetime to start reading in.
                If None, read from the start of the file
            end: datetime.datetime, optional
                datetime to finish reading in.
                If None, read till the end of the file
            force_regular: boolean, optional
                If it is true, it returns a regular time series

            Returns
            -------
            vtools.data.timeseries.TimeSeries
                time series from the file
        """
        # The selector (if it exists) can probably be precalculated or at least recorded.
        # Almost always this amounts to picking variables out of a list of column names
        # and recording indexes, but here we don't ask any questions about what "selector" is.
        n_headerlines, metadata = self.process_header(fpath, selector)
        metadata = dict()
        if not self._header_regexs is None:
            metadata = self.read_metadata_from_header(fpath)
        print "Here we are working on %s" % fpath
        with open(fpath, 'r') as f_in:
            times = list()
            values = list()
            # fast forward past header
            if n_headerlines > 0:
                for i in range(n_headerlines):
                    f_in.readline()
            # process lines starting from current file pointer
            for i, line in enumerate(f_in):
                if self.is_comment(line): continue
                timestamp, vals = self.parse_record(line)
                if start and timestamp < start:
                    continue
                if end and timestamp > end:
                    break
                times.append(timestamp)
                values.append(vals)

        if len(times) < 1:
            return None

        arr = numpy.array(values)

        # Here I assume that it is more effective to retrieve too much
        # in the reading stage and then do this with numpy fancy indexing.
        # I But you can override this function
        arr = self.cull_using_selector(arr)

        ts = vts.its(times, numpy.array(values))
        if force_regular:
            interval = vt_infer_interval(times[:11],
                                         fraction=0.5,
                                         standard=[
                                             vtt.minutes(6),
                                             vtt.minutes(10),
                                             vtt.minutes(15),
                                             vtt.hours(1)
                                         ])
            if not interval:
                for t in times[:10]:
                    print t.strftime("%Y-%m-%d %H:%M:%S")
                raise ValueError(
                    "Interval could not be inferred from first time steps in %s"
                    % fpath)
            import warnings
            # todo: this really should be an option
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                ts = vts.its2rts(ts, interval)
            if start is not None:
                if start < ts.start:
                    ts = vts.extrapolate_ts(ts, start=start)
                else:
                    ts = ts.window(start=start)
            if end is not None:
                if end > ts.end:
                    ts = vts.extrapolate_ts(ts, end=end)
                else:
                    ts = ts.window(end=end)
            for k, v in metadata.iteritems():
                ts.props[k] = v
            return ts
Esempio n. 21
0
    def test_period_op_irregular(self):
        """ Test behaviour of period operation on irregular TS."""
        times = [12, 15, 32, 38, 43, 52, 84, 138, 161, 172]
        #times=sciadd.accumulate(times)
        start_datetime = parse_time("1996-2-1")
        start_ticks = ticks(start_datetime)
        times = scimultiply(times, ticks_per_minute)
        times = sciadd(times, start_ticks)
        data = sciarray([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0])
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 3)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)

        times = [0, 15, 32, 38, 43, 52, 60, 120, 138, 161, 180]
        data = sciarray(
            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0])
        times = scimultiply(times, ticks_per_minute)
        times = sciadd(times, start_ticks)
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)

        data = sciarray(
            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 2.0, 3.0, 4.0, 5.0, 4.0])
        ts = its(times, data, {})
        op = MIN
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)

        op = MAX
        ts_op = period_op(ts, "1 hour", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 6.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 5.0)
        self.assertEqual(ts_op.data[3], 4.0)

        times = [0, 15, 28, 30, 58, 64, 80, 90, 91]
        start_datetime = parse_time("1996-1-1")
        start_ticks = ticks(start_datetime)
        times = scimultiply(times, ticks_per_day)
        times = sciadd(times, start_ticks)
        data = sciarray([1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0])
        ts = its(times, data, {})
        op = MEAN
        ts_op = period_op(ts, "1 month", op)
        self.assertEqual(len(ts_op), 4)
        self.assertEqual(ts_op.data[0], 1.0)
        self.assertEqual(ts_op.data[1], 2.0)
        self.assertEqual(ts_op.data[2], 3.0)
        self.assertEqual(ts_op.data[3], 4.0)
Esempio n. 22
0
def _flat(ts_data, ts_ticks, tticks, method=NEAREST):
    """  Estimate timeseries values at given times by flat methods 
         use previous known value, next known vlaue, and nearest known value to
         fill missed data.
         
    Parameters
    -----------
        
    ts : :class:`~vtools.data.timeseries.TimeSeries`
        Series to be interpolated

    times : :ref:`time_sequence <time_sequence>`
        The new times to which the series will be interpolated.
        
    method: int,optional
        Choice of PREVIOUS,NEXT,NEAREST
    
    filter_nan:boolean,optional
        if true, null data points of input timeseries are omitted. 
        
    **dic : Dictionary
        Dictonary of extra parameters to be passed in. For instance , input
        'extroplate'  as integer (num of interval,whether or not do a number
        of extra extroplation at the end of new ts data). 
        
    Returns
    -------
     result: array 
        interpolated values.
        
    """

    tss = its(ts_ticks, ts_data)

    ## find out index of interpolating points that exists in
    ## in original ts, thus orginal value can be reused in
    ## interpolated ts.
    ts_len = len(tss)
    index1 = tss.ticks.searchsorted(tticks)
    index2 = where(index1 < ts_len, index1, ts_len - 1)
    tticks2 = take(tss.ticks, index2)
    same_points_index = where(tticks2 == tticks, index1, ts_len)

    indexes = tss.index_after(tticks)

    ##    if not(alltrue(greater(indexes,0))) or not(alltrue(less(indexes,len(tss.data)))):
    ##        raise ValueError("interpolation list has time points falling out of input "+
    ##              "timeseries range.")

    ## removed the exception above, that will by default make interpolating point
    ## locate on boundary points using boundary values (value before take first order).
    indexes1 = where(indexes < 1, 1, indexes)
    indexes2 = where(indexes > len(tss.data) - 1, len(tss.data) - 1, indexes1)
    indexes = indexes2

    if method == PREVIOUS:
        indexes = indexes - 1
    elif method == NEXT:
        indexes = indexes
    else:
        pre = indexes - 1
        after = indexes
        interval_to_pre = abs(tticks - take(tss.ticks, pre))
        interval_to_after = abs(tticks - take(tss.ticks, after))
        indexes = where(interval_to_pre <= interval_to_after, pre, after)

    vals1 = take(tss.data, indexes)
    dum_index = where(same_points_index < ts_len, same_points_index, 0)
    dum_val = take(tss.data, dum_index)
    vals2 = where(same_points_index < ts_len, dum_val, vals1)

    ## if is required by user, do some extra extrapolation at the
    ## end of new ts using last value, thus new ts will be longer
    #    for keyword in dic.keys():
    #        if not(keyword=="extrapolate"):
    #            raise TypeError("unexpected keyword %s"%keyword)
    #
    #    if "extrapolate" in dic.keys():
    #        new_ts_len=len(vals2)
    #        extrapolate_num=dic["extrapolate"]
    #        dum_val2=zeros(new_ts_len+extrapolate_num)
    #        dum_val2[0:new_ts_len]=vals2[0:new_ts_len]
    #        last_val=vals2[new_ts_len-1]
    #        dum_val2[new_ts_len:new_ts_len+extrapolate_num]=last_val
    #        vals2=dum_val2
    return vals2