Beispiel #1
0
def _collect_and_plot(files):
    TS = []
    location = []
    for f in files:
        temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ]
        for t in temperatures:
            if t['Step'][0] != '0000-00-00.01:00:00':
                print 'Not hourly readings of temperature. Abort.'
                break
            dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value']))
            data = [ float(value.rsplit('/')[0]) for value in t['Value'] ]
            TS.append(ts.TimeSeries(data=data, dates=dates))
            if location and t['Installation'][0] != location:
                print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.'
            location = t['Installation'][0]
    if TS:
        path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/'
        for file in os.listdir(path):
            try:
                series = xml.parse(path + file)
                sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file])
            except:
                print file, 'had no data.'
    else:
        print 'No temperature data.'
Beispiel #2
0
    def __init__(self, **kwargs):
        """
        kwargs ={'baisnName':'Mackenzie',
                 'start_date':'2000-06-01',
                 'end_date':'2010-06-31',
                 'info_fl':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge\RiverGages_description.csv',
                 'pthIn':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge'}
        """

        for key in kwargs:

            setattr(self, key, kwargs[key])

        self._load_rivergages()

        self._idate = ts.Date('D', self.start_date)

        self._idateHStr = self._idate.strfmt('%Y/%m/%d') + ' 00:00'

        self._fdate = ts.Date('D', self.end_date)

        self._dates = ts.date_array(start_date=self._idate,
                                    end_date=self._fdate,
                                    freq='d')

        self.nr_days = self._fdate - self._idate + 1

        self._get_matrix()
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl, delimiter=',', names=tuple('ABCDEFG'),
                               datecols=0, skip_header=2, asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[(3., 30), (2., 20), (1., 10), ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
Beispiel #5
0
def _get_year(year,d,n=1):
    """
    Return a time-series with the same frequency of the input time-series
    with n complete years from input year and values taken from input series

    :param year: base year
    :type year: integer
    :param d: time-series object
    :type d: time-series
    :param n: number of periods to take
    :type n: integer
    :return: output time-series
    :rtype: time-series
    
    """

    f    = d.freqstr      # frequenza d'ingresso
    nels = _ts_nels(f)    # numero di elementi da considerare in un anno (M=12, Q=4, A=1)

    N=n*nels              # Numero di elementi totali da considerare
    
    startd = d.start_date
    endd   = d.end_date

    if f[0]=='M':
        starty = ts.Date(f,year=year,month=1)
        endy   = ts.Date(f,year=year,month=N)
    elif f[0]=='Q':
        starty = ts.Date(f,year=year,quarter=1)
        endy   = ts.Date(f,year=year,quarter=N)
    elif f[0]=='A':
        starty = ts.Date(f,year=year)
        endy   = ts.Date(f,year=year+N-1)
    else:
        raise UnknownFrequencyError, f

    # Create a timeseries with N elements np.nan
    # from starty with frequency f
    s = ts.time_series([ np.nan for i in range(0,N)],
                       start_date=starty,
                       freq=f)

    # create date range
    da = ts.date_array(start_date=starty,
                       end_date=endy,
                       freq=f)

    d.fill_missing_dates()
    d.adjust_endpoints()

    # copy values from d to s
    d.mask=False
    for _d in da:
        s[_d]=np.nan
        if _d <= d.end_date:
            s[_d]=d[_d]
        else:
            s[_d]=np.nan

    return s
Beispiel #6
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(test[:, 59:62],
                  ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]],
                                   - 1))
Beispiel #7
0
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[
         (3., 30),
         (2., 20),
         (1., 10),
     ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
Beispiel #8
0
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl,
                            delimiter=',',
                            names=tuple('ABCDEFG'),
                            datecols=0,
                            skip_header=2,
                            asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
Beispiel #9
0
def add_diurnal(tseries, sine_period, peak_mag):
    """
    Scales a time series to a sine wave of peak_mag with sine_period.
    Input: tseries, sine_period (float, hrs), peak_mag (float)
    Output: scaled_data (array-like)
    """
    # Convert sine_period to same frequency as tseries
    # Create a time delta of magnitude sine_period
    # Convert that time delta into frequency units same as tseries
    zero_date = ts.now('H')
    second_date = zero_date + sine_period
    time_delta = ts.date_array([zero_date, second_date])
    time_delta = time_delta.asfreq(tseries.freq)
    sine_period = float(time_delta[1] - time_delta[0])
    
    angular_freq = (2. * np.pi) / sine_period
    
    for i in range(len(tseries)-1):
        passed_time = float(tseries.dates[i]- tseries.start_date)
        sine_factor = peak_mag * np.sin(angular_freq * passed_time)
        tseries[i] = tseries[i] + tseries[i] * sine_factor
    
    return tseries

# Generate power density function (pdf) to create synthetic TPM from
# mean, stdev, autocorr, npointsx

# def gen_pdf(desired_mean, desired_stdev, bin_width):
    ## TODO
    return 0
Beispiel #10
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(
         test[:, 59:62],
         ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], -1))
Beispiel #11
0
 def _get_tseriesD(freq,date_values,kw):
     v = [ xlrd.xldate_as_tuple(int(d),0) 
           for i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d)  ]
     D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v]
     date_array = ts.date_array(D)
     return date_array
Beispiel #12
0
    def request(self,reqs,**kw):
        """Random Request

        >>> ds = Random()
        >>> xs = ds.request([{'Instrument':'NAME=PIPPO~:2012-12-31~2007-01-01~M', 'Source': 'FRED'},])
        >>> ts = xs['PIPPO']._data
        >>> print ts.freqstr
        M
        """
        logger.debug('request')
        rx={}
        kw=udict(kw)
        if 'SEED' in kw:
            seed = int(kw['SEED'])
            rand.seed(seed)
        for rq in reqs:
            logger.debug("Request: %s",rq)
            s = parse_instrument(rq['Instrument'])
            logger.debug(s)
            params = ldict(mk_params(s['TICKER']))
            if 'name' in params:
                name = params['name'].upper()
                del params['name']
            else:
                name = rq['Source'].upper()
            if 'NAME' in kw and kw['NAME']:
                name = kw['NAME'].upper()
            kw['NAME']=name
            if s['FREQ']!='0':
                dr = ts.date_array(freq=s['FREQ'],start_date=s['START'],end_date=s['END'])
                L = len(dr)
            else:
                L = s['END']-s['START']
            params['size']=L
            dd = np.zeros(L)
            logger.debug(params)
            try:
                if re.match('^constant$',rq['Source'],re.I):
                    name = 'CONSTANT'
                    v = float(name)
                    params['LOW']=v
                    params['HIGH']=v
                    dd = rand.uniform(**params)
                elif re.match('^walk$',rq['Source'],re.I):
                    dd = RandomWalk(**params)
                elif re.match('^uniform|beta|binomial|chisquare|exponential|gamma|geometric|gumbel|hypergeometric|laplace|logistic|lognormal|logseries|multinomial|multivariate_normal|negative_binomial|noncentral_chisquare|noncentral_f|normal|pareto|poisson|power|rayleigh|standard_cauchy|standard_exponential|standard_gamma|standard_normal|standard_t|triangular|uniform|vonmises|wald|weibull|zipf$',rq['Source'],re.I):
                    generator = rq['Source'].lower()
                    if hasattr(rand,generator):
                        f = getattr(rand,generator)
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
Beispiel #13
0
 def _make_predict_dates(self):
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
Beispiel #14
0
 def _get_tseriesQ(freq,date_values,kw):
     # print date_values
     by=0
     if kw.has_key('YEAR'):
         by = eval(kw['YEAR'])-1
     v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 
           for _i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T,)
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
Beispiel #16
0
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T, )
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
Beispiel #17
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data, delimiter=',', dtype=float, datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
Beispiel #18
0
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
Beispiel #19
0
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                        freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
         [datetime.datetime.fromordinal(i) for i in dates])
Beispiel #20
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent, delimiter=",", skip_header=1, names=True,
                         converters={'dates': dateconv}, datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
Beispiel #21
0
 def _get_tseriesM(freq,date_values,kw):
     dformat='INT'
     if kw.has_key('DFORMAT'):
         dformat = kw['DFORMAT']
     if dformat=='INT':            
         v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     elif dformat=='XL_DATE':
         v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1]) for _v in v]
     else:
         logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING')
         raise ValueError, dformat
                     #            print "FREQ=|%s|"%freq,D
     date_array = ts.date_array(D)
     return date_array
Beispiel #22
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data,
                      delimiter=',',
                      dtype=float,
                      datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
 def setUp(self):
     "Initializes"
     ndtype = [('lin',float),('rand',float)]
     dlin = np.linspace(0,10,120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M')-120, length=120, freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1,1,120), 
                          dates=dates,
                          thresholds=(-0.5,0.5),
                          full_year='False', refseason='NDH', minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat=cdat
     self.enso=enso
Beispiel #24
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         converters={'dates': dateconv},
                         datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
Beispiel #25
0
    def test_with_names(self):
        "Tests w/ names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent, delimiter=",", datecols=0, skip_header=2,
                         names="A,B,C,D,E,F", freq='M')
        assert(isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(test['F'], [1, 1, 1, 1])
        assert_equal(test['E'].mask, [1, 1, 1, 1])
        assert_equal(test['C'], [1, 2, 300000, -1e-10])
 def setUp(self):
     "Initializes"
     ndtype = [('lin', float), ('rand', float)]
     dlin = np.linspace(0, 10, 120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M') - 120,
                           length=120,
                           freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1, 1, 120),
                          dates=dates,
                          thresholds=(-0.5, 0.5),
                          full_year='False',
                          refseason='NDH',
                          minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat = cdat
     self.enso = enso
Beispiel #27
0
    def test_without_names(self):
        "Test w/o names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         freq='M')
        assert (isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names,
                     ['One_S', 'Two_I', 'Three_F', 'Four_M', 'Five_', 'Six_C'])
        assert_equal(test['Six_C'], [1, 1, 1, 1])
        assert_equal(test['Five_'].mask, [1, 1, 1, 1])
        assert_equal(test['Three_F'], [1, 2, 300000, -1e-10])
Beispiel #28
0
import statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts

dates = ts.date_array(start_date=1700, length=len(data.endog), freq="A")

# To make an array of datetime types, we need an integer array of ordinals

# .. from datetime import datetime
# .. dt_dates = dates.toordinal().astype(int)
# .. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
Beispiel #29
0
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""

import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1, 2, 3, 4, 5],
                   dates=ts.date_array(
                       ["2001-01", "2001-01", "2001-02", "2001-03", "2001-03"],
                       freq="M"))

print '\nUsing la'
import la
dta = la.larry(s.data, label=[range(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[range(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,
                    dates=ts.date_array(dat.x, freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print repr(s)
print dat
print repr(s2)
print repr(s2u)

print '\nUsing pandas'
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data

### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
                      end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp', 'realcons', 'cpi']].view(float).reshape(-1, 3)
ts_data = np.column_stack((ts_data, (1 - data['unemp'] / 100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)

fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:, 0], '-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:, 1], 'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:, 2], 'g-')
fsp.set_title("CPI")
Beispiel #31
0
def getHappinessStats(fromFunc=False, groupId=None, userId=None, endDateIn=None):
	condFilter = [1==1]

	companyId = request.form.get("companyId") and request.form.get("companyId") or session.get("companyId")
	if companyId :
		condFilter.append(Happiness.companyId==companyId)
	else :
		return None

	if groupId:
		condFilter.append(Happiness.groupId==groupId)
	elif userId:
		condFilter.append(Happiness.userId==userId)
	
	if request.form.get('endDate'):
		endDateIn = request.form['endDate']
	elif fromFunc==True and not endDateIn:
		endDateIn = datetime.now().strftime("%Y-%m-%d")
	if not endDateIn:
		return None
	
	if request.form.get("startDate"):
		condFilter.append(Happiness.rdate > request.form['startDate'])

	# cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal)
	# if request.form.get('type')=='all' and request.form.get('groupId') and request.form.get('userId'):
		# cols = cols + ( func.avg(func.IF(Happiness.groupId==groupId, Happiness.happyVal, None)) )
		# cols = cols + ( func.avg(func.IF(Happiness.userId==userId, Happiness.happyVal, None)) )
	cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal), func.avg(func.IF(Happiness.groupId==request.form.get('groupId'), Happiness.happyVal, None)), func.avg(func.IF(Happiness.userId==request.form.get('userId'), Happiness.happyVal, None))

	db_result = db_session.query(*cols)\
				.group_by( func.date(Happiness.rdate) )\
				.filter(func.date(Happiness.rdate) <= endDateIn)\
				.filter(*condFilter).all()

	db_dates = list()
	db_vals1 = list()
	db_vals2 = list()
	db_vals3 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_vals1.append(row[1])
		if len(row)>2 : 
			db_vals2.append(row[2])
			db_vals3.append(row[3])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_vals1, dateAry)
	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals1 = fillVals1.filled(0)
	timeSrz2 = ts.time_series(db_vals2, dateAry)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals2 = fillVals2.filled(0)
	timeSrz3 = ts.time_series(db_vals3, dateAry)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = db_dates[0]
	endDate = datetime.strptime(endDateIn, '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, val in enumerate(fillVals1):
		gr = fillVals2[idx]
		me = fillVals3[idx]
		rowDict = { "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "val" : "{0:.2f}".format(float(val or 0)) }
		rowDict["gr"] = "{0:.2f}".format(float(gr or 0))
		rowDict["me"] = "{0:.2f}".format(float(me or 0))
		stats.append(rowDict)

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
def load_oni(mode='standard', **options):
    """
    Loads the ONI 3-m averaged monthly SST anomalies over the Niño-3.4 region
    and returns a :class:`~scikits.hydroclimpy.enso.ENSOIndicator` object.

    Two modes are accepted as arguments:
    
    - in the ``standard`` mode, the SSTs are retrieved from the original CPC
      website_.
      Data are available from Jan. 1950 to present.
    - in the ``backup`` mode, the SSTs are retrieved from the CPC `ftp site <ftpsite>`_.
      Data are available from Jan. 1900 to present.

    .. _website : http://www.cpc.noaa.gov/products/analysis_monitoring/ensostuff/ensoyears.shtml
    .. _ftpsite : ftp://eclipse.ncdc.noaa.gov/pub/ersst/pdo/el_nino_v3.dat.


    Parameters
    ----------
    mode : {'standard','backup'}, optional
        Mode describing the data to download.
    options : dictionary
        Optional parameters to parse to the ENSOIndicator for the definition of
        ENSO indices.
    thresholds : tuple of floats, optional
        Low and high temperature thresholds for the definition of El Niño and
        La Niña conditions.
        By default, the CPC uses -0.5oC and +0.5oC.
    minimum_size : int, optional
        Minimum number of consecutive months in El Niño / La Niña conditions
        required for the definition of an episode.
        By default, the CPC use 5 consecutive months.
    reference_season : string or tuple, optional
        Months that must be in an episode for it to be valid.
        By default, the CPC uses None (no restriction on the months).
    full_year : boolean, optional
        The CPC uses ``full_year=False``.

    References
    ----------
    Xue, Y., T. M. Smith, and R. W. Reynolds, 2003: Interdecadal changes of 30-yr
    SST normals during 1871-2000. *J. Climate*, 16, 1601-1612.

    """
    # Initialization .......................
    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    mode = mode.lower()
    cfg = dict(config.items('ENSO.ONI'))
    cfg.update(options)
    try:
        from BeautifulSoup import BeautifulSoup, SoupStrainer
    except ImportError:
        warnings.warn("The module 'BeautifulSoup' is unavailable.\n"\
                      "Reverting to backup mode")
        mode = 'backup'
    #
    datadir = cfg['datadir']
    if mode == 'standard':
        netfile = cfg['netfile']
        archive = cfg['archive']
    else:
        netfile = cfg['netfile_backup']
        archive = cfg['archive_backup']
    # Try to open an existing ENSOIndicator

    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    try:
        zipf = zipfile.ZipFile(ensoarchive, 'r')
        ensoi = cPickle.loads(zipf.read(archive))
        ensologger.info("... Loading from existing archived file")
    except IOError:
        zipf = zipfile.ZipFile(ensoarchive, 'w')
        ensologger.info("... Creating archive")
    except KeyError:
        zipf = zipfile.ZipFile(ensoarchive, 'a')
        ensologger.info("... Appending to archive")
    else:
        if isinstance(ensoi, enso.ENSOIndicator):
            return ensoi
    #
    sourcedir = np.lib._datasource.DataSource(datadir)
    dfile = sourcedir.open(netfile)
    #
    #
    if mode == 'standard':
        # Load the file as a tree, but only take the SST table (border=1)
        table = BeautifulSoup(dfile.read(),
                              parseOnlyThese=SoupStrainer("table", border=1))
        # Separate it by rows, but skip the first one (the header)
        years = []
        data = []
        indices = []
        color = {'red': +1, 'white': 0, 'blue': -1}
        deft = [(None, 'color:white')]
        for row in table.findAll("tr")[1:]:
            cols = row.findAll('td')
            years.append(int(cols.pop(0).strong.string))
            data.append([
                float(_.fetchText()[-1].string.replace('&nbsp;', '99.9'))
                for _ in cols
            ])
            indices.append([
                color[getattr(_.span, 'attrs', deft)[0][-1].split(':')[-1]]
                for _ in cols
            ])
        #
        start_date = Date('M', year=years[0], month=1)
        ensoi = enso.ENSOIndicator(
            ma.masked_values(data, 99.9).ravel(),
            start_date=start_date,
        )
        #        oni.set_indices(full_year=False, minsize=5, refseason=None)
        indices = time_series(np.array(indices).ravel(), start_date=start_date)
    else:
        rawdata = np.loadtxt(dfile)
        dates = date_array(
            [Date('M', year=yy, month=mm) for (yy, mm) in rawdata[:, :2]],
            freq='M')
        ensoi = enso.ENSOIndicator(
            cmov_mean(rawdata[:, -1], 3).round(2),
            dates,
        )
    #
    _set_ensoindicator_options(ensoi, **cfg)
    ensoi.set_indices()
    #
    # Store in the archive
    zipf.writestr(archive, cPickle.dumps(ensoi))
    zipf.close()
    return ensoi
Beispiel #33
0
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
rearranged
1.00796791   0.24449867(-0.00521004)   0.50554663
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''


erro,ho, etaxo = generate_gjrgarch(20, ar, ma, mu=0.04, scale=0.01,
                  varinnovation = np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                dates=ts.date_array(a[0]['Date'][::-1],freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))


plt.show()
Beispiel #35
0
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
                raise
            if s['FREQ']!='0':
                data = ts.time_series(dd,
                                      freq=s['FREQ'],
                                      dates=dr)            
                if s['NAN']:
                    t=ts.now(data.freqstr)
                    if t<data.end_date:
                        da = ts.date_array(start_date=t,end_date=data.end_date)
                        data[da]=np.nan
                rx[kw['NAME']]=Timeseries(data=data,name=name)
            else: # for a vector
                print type(dd),dd.shape
                xx = dd.reshape((1,-1)) 
                print type(xx),xx.shape
                rx[kw['NAME']]=np.array(dd)
        return rx

# random://normal/loc=10,scale=.2~:2012-12-31~2007-01-01~M?name=UNI

if __name__=="__main__":
    parse_instrument('NAME=PIPPO')
    parse_instrument('NAME=PIPPO~M')
    parse_instrument('NAME=PIPPO~Q')
Beispiel #36
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span,
                    dtype=[('val', int), ('maj', bool), ('min', bool),
                           ('fmt', '|S20')])
    info['val'][:] = np.arange(vmin, vmax + 1)
    info['fmt'][:] = ''
    info['maj'][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info['maj']
    info_min = info['min']
    info_fmt = info['fmt']

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and \
            ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, 'day')
        month_start = period_break(dates_, 'month')

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y'

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, 'hour')
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'

        def _second_finder(label_interval):
            minute_start = period_break(dates_, 'minute')
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info['maj'][minute_start] = True
            info['min'][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[second_start
                     & (_second % label_interval == 0)] = '%H:%M:%S'
            info_fmt[day_start] = '%H:%M:%S\n%d-%b'
            info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y'

        if span < periodsperday / 12000.0: _second_finder(1)
        elif span < periodsperday / 6000.0: _second_finder(2)
        elif span < periodsperday / 2400.0: _second_finder(5)
        elif span < periodsperday / 1200.0: _second_finder(10)
        elif span < periodsperday / 800.0: _second_finder(15)
        elif span < periodsperday / 400.0: _second_finder(30)
        elif span < periodsperday / 150.0: _minute_finder(1)
        elif span < periodsperday / 70.0: _minute_finder(2)
        elif span < periodsperday / 24.0: _minute_finder(5)
        elif span < periodsperday / 12.0: _minute_finder(15)
        elif span < periodsperday / 6.0: _minute_finder(30)
        elif span < periodsperday / 2.5: _hour_finder(1, False)
        elif span < periodsperday / 1.5: _hour_finder(2, False)
        elif span < periodsperday * 1.25: _hour_finder(3, False)
        elif span < periodsperday * 2.5: _hour_finder(6, True)
        elif span < periodsperday * 4: _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[day_start] = '%d'
            info_fmt[month_start] = '%d\n%b'
            info_fmt[year_start] = '%d\n%b\n%Y'
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = '%d\n%b\n%Y'
                else:
                    info_fmt[first_label(month_start)] = '%d\n%b\n%Y'

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, 'month')
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info['min'] = True
        else:
            day_start = period_break(dates_, 'day')
            info['min'][day_start] = True
        week_start = period_break(dates_, 'week')
        year_start = period_break(dates_, 'year')
        info_fmt[week_start] = '%d'
        info_fmt[month_start] = '\n\n%b'
        info_fmt[year_start] = '\n\n%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = '\n\n%b\n%Y'
            else:
                info_fmt[first_label(month_start)] = '\n\n%b\n%Y'
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        week_start = period_break(dates_, 'week')
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = '%b\n%Y'
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        month_start = period_break(dates_, 'month')
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = '%Y'
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, 'year')
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = '%Y'
    #............................................
    return info
Beispiel #37
0
import scikits.statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts
dates = ts.date_array(start_date=1700, length=len(data.endog), freq='A')

# To make an array of datetime types, we need an integer array of ordinals

#.. from datetime import datetime
#.. dt_dates = dates.toordinal().astype(int)
#.. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
ar_model = sm.tsa.AR(endog, freq='A')
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
* pandas is missing GroupBy in the docs, but the docstring is helpful
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange, zip
import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
import la

dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
import pandas
Beispiel #40
0
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data


### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
    end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)


fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:,1],'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:,2],'g-')
Beispiel #41
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S20")])
    info["val"][:] = np.arange(vmin, vmax + 1)
    info["fmt"][:] = ""
    info["maj"][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info["maj"]
    info_min = info["min"]
    info_fmt = info["fmt"]

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, "day")
        month_start = period_break(dates_, "month")

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y"

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, "hour")
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"

        def _second_finder(label_interval):
            minute_start = period_break(dates_, "minute")
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info["maj"][minute_start] = True
            info["min"][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S"
            info_fmt[day_start] = "%H:%M:%S\n%d-%b"
            info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"

        if span < periodsperday / 12000.0:
            _second_finder(1)
        elif span < periodsperday / 6000.0:
            _second_finder(2)
        elif span < periodsperday / 2400.0:
            _second_finder(5)
        elif span < periodsperday / 1200.0:
            _second_finder(10)
        elif span < periodsperday / 800.0:
            _second_finder(15)
        elif span < periodsperday / 400.0:
            _second_finder(30)
        elif span < periodsperday / 150.0:
            _minute_finder(1)
        elif span < periodsperday / 70.0:
            _minute_finder(2)
        elif span < periodsperday / 24.0:
            _minute_finder(5)
        elif span < periodsperday / 12.0:
            _minute_finder(15)
        elif span < periodsperday / 6.0:
            _minute_finder(30)
        elif span < periodsperday / 2.5:
            _hour_finder(1, False)
        elif span < periodsperday / 1.5:
            _hour_finder(2, False)
        elif span < periodsperday * 1.25:
            _hour_finder(3, False)
        elif span < periodsperday * 2.5:
            _hour_finder(6, True)
        elif span < periodsperday * 4:
            _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[day_start] = "%d"
            info_fmt[month_start] = "%d\n%b"
            info_fmt[year_start] = "%d\n%b\n%Y"
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = "%d\n%b\n%Y"
                else:
                    info_fmt[first_label(month_start)] = "%d\n%b\n%Y"

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, "month")
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info["min"] = True
        else:
            day_start = period_break(dates_, "day")
            info["min"][day_start] = True
        week_start = period_break(dates_, "week")
        year_start = period_break(dates_, "year")
        info_fmt[week_start] = "%d"
        info_fmt[month_start] = "\n\n%b"
        info_fmt[year_start] = "\n\n%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = "\n\n%b\n%Y"
            else:
                info_fmt[first_label(month_start)] = "\n\n%b\n%Y"
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        week_start = period_break(dates_, "week")
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = "%b\n%Y"
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        month_start = period_break(dates_, "month")
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = "%Y"
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, "year")
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = "%Y"
    # ............................................
    return info
Beispiel #42
0
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
Beispiel #43
0
def getMsgStats(fromFunc=False):

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(Message.companyId==companyId)

	if request.form.get('startDate'):
		condFilter.append(Message.rdate > request.form['startDate'])

	db_result = db_session.query(func.date(Message.rdate).label("rdate"), func.count(func.IF(Message.msgType==1,1,None)), func.count(func.IF(Message.msgType==2,1,None)))\
					.group_by( func.date(Message.rdate) )\
					.filter(func.date(Message.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	db_dates = list()
	db_cnt1 = list()
	db_cnt2 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_cnt1.append(row[1])
		db_cnt2.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry)
	timeSrz2 = ts.time_series(db_cnt2, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)


	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, msg1cnt in enumerate(fillVals1):
		msg2cnt = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "msg1" : msg1cnt, "msg2" : msg2cnt })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Beispiel #44
0
                                   _attrs)
    _methods = {'predict': 'dates'}
    _wrap_methods = wrap.union_dicts(
        base.LikelihoodResultsWrapper._wrap_methods, _methods)


wrap.populate_wrapper(TimeSeriesResultsWrapper, TimeSeriesModelResults)

if __name__ == "__main__":
    import scikits.statsmodels.api as sm
    import datetime
    import pandas

    data = sm.datasets.macrodata.load()

    #make a DataFrame
    #TODO: attach a DataFrame to some of the datasets, for quicker use
    dates = [str(int(x[0])) +':'+ str(int(x[1])) \
             for x in data.data[['year','quarter']]]
    try:
        import scikits.timeseries as ts
        ts_dates = date_array(start_date=Date(year=1959, quarter=1, freq='Q'),
                              length=len(data.data))
    except:
        pass

    df = pandas.DataFrame(data.data[['realgdp', 'realinv', 'realcons']],
                          index=dates)
    ex_mod = TimeSeriesModel(df)
    #ts_series = pandas.TimeSeries()
Beispiel #45
0
def getGoodworkStats(fromFunc=False):

	db_dates1 = list()
	db_dates2 = list()
	db_dates3 = list()
	db_cnt1 = list()
	db_cnt2 = list()
	db_cnt3 = list()

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(GoodPost.companyId==companyId)
	if request.form.get('startDate'):
		condFilter.append(GoodPost.rdate > request.form['startDate'])

	db_result1 = db_session.query(func.date(GoodPost.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodPost.rdate) )\
					.filter(func.date(GoodPost.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	for row in db_result1:
		db_dates1.append(row[0])
		db_cnt1.append(row[1])


	condFilter2 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter2.append(GoodLike.rdate > request.form['startDate'])

	if companyId:
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodLike.userId)\
					.group_by( func.date(GoodLike.rdate) )\
					.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter2).all()
	else :
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
						.group_by( func.date(GoodLike.rdate) )\
						.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
						.filter(*condFilter2).all()
	for row in db_result2:
		db_dates2.append(row[0])
		db_cnt2.append(row[1])

	condFilter3 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter3.append(GoodReply.rdate > request.form['startDate'])

	if companyId:
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodReply.userId)\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter3).all()
	else :
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(*condFilter3).all()
	for row in db_result3:
		db_dates3.append(row[0])
		db_cnt3.append(row[1])
	
	dateAry1 = ts.date_array(db_dates1, freq='D')
	dateAry2 = ts.date_array(db_dates2, freq='D')
	dateAry3 = ts.date_array(db_dates3, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry1)
	timeSrz2 = ts.time_series(db_cnt2, dateAry2)
	timeSrz3 = ts.time_series(db_cnt3, dateAry3)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	if len(dateAry1)>0 and len(dateAry2)>0 and dateAry1[0] > dateAry2[0] :
		if len(dateAry3)>0 and dateAry2[0] > dateAry3[0] :
			minDate = dateAry3[0]
		else :
			minDate = dateAry2[0]
	else :
		if (len(dateAry1)>0 and len(dateAry3)>0 and dateAry1[0] > dateAry3[0]) or len(dateAry1)==0 :
			minDate = len(dateAry3)>0 and dateAry3[0] or None
		else:
			minDate = len(dateAry1)>0 and dateAry1[0] or None

	postStats = list()
	likeStats = list()
	replyStats = list()

	firstDayIdx1 = 0
	for day in fillDateAry :
		if minDate != fillDateAry[firstDayIdx1] :			
			postStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "post" : 0 })
			likeStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "like" : 0 })
			replyStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx1 += 1
			continue
		else :
			break
	firstDayIdx2 = 0
	for day in fillDateAry :
		if len(dateAry2)==0 or dateAry2[0] != fillDateAry[firstDayIdx2] :
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx2 += 1
			continue
		else :
			break
	firstDayIdx3 = 0
	for day in fillDateAry :
		if len(dateAry3)==0 or dateAry3[0] != fillDateAry[firstDayIdx3] :			
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx3 += 1
			continue
		else :
			break

	for idx, post in enumerate(fillVals1):
		postStats.append({ "date" : fillDateAry[firstDayIdx1 + idx].strftime("%m/%d"), "post" : post })
	for idx, like in enumerate(fillVals2):
		likeStats.append({ "date" : fillDateAry[firstDayIdx2 + idx].strftime("%m/%d"), "like" : like })
	for idx, reply in enumerate(fillVals3):
		replyStats.append({ "date" : fillDateAry[firstDayIdx3 + idx].strftime("%m/%d"), "reply" : reply })

	
	postLen = len(postStats)
	likeLen = len(likeStats)
	replyLen = len(replyStats)
	for i in range(len(fillDateAry) - postLen) :
		postStats.append({ "date" : fillDateAry[postLen + i].strftime("%m/%d"), "post" : 0 })
	for i in range(len(fillDateAry) - likeLen) :
		likeStats.append({ "date" : fillDateAry[likeLen + i].strftime("%m/%d"), "like" : 0 })
	for i in range(len(fillDateAry) - replyLen) :
		replyStats.append({ "date" : fillDateAry[replyLen + i].strftime("%m/%d"), "reply" : 0 })
		
	#merge
	stats = list()
	for idx, postStat in enumerate(postStats):
		mergedStats = postStat.copy()
		mergedStats.update(likeStats[idx])
		mergedStats.update(replyStats[idx])
		stats.append(mergedStats)

	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
    def __call__(self, *tseries, **kwargs):
        """
        generate a report. Parameter values are not saved to the Report instance.

        Parameters
        ----------
        Accepts same parameters as __init__ method of Report class
        """

        option_dict = copy.copy(self.options)
        option_dict.update(self.__make_dict(**kwargs))
        if len(tseries) == 0:
            tseries = self.tseries

        def option(kw):
            return option_dict.get(kw, _default_options[kw])

        dates = option('dates')
        header_row = option('header_row')
        header_char = option('header_char')
        header_justify = option('header_justify')
        row_char = option('row_char')
        footer_label = option('footer_label')
        footer_char = option('footer_char')
        footer_func = option('footer_func')
        delim = option('delim')
        justify = option('justify')
        prefix = option('prefix')
        postfix = option('postfix')
        mask_rep = option('mask_rep')
        datefmt = option('datefmt')
        fmt_func = option('fmt_func')
        wrap_func = option('wrap_func')
        col_width = option('col_width')
        nls=option('nls')
        output=option('output')
        fixed_width=option('fixed_width')

        if header_row is not None:
            has_header=True
            if len(header_row) == len(tseries)+1:
                # label for date column included
                rows = [header_row]
            elif len(header_row) == len(tseries):
                # label for date column not included
                rows = [['']+header_row]
            else:
                raise ValueError("mismatch with number of headers and series")
        else:
            has_header=False
            rows=[]

        if fixed_width:

            def _standardize_justify(userspec):
                if isinstance(userspec, str):
                    # justify all columns the the same way
                    return [userspec for x in range(len(tseries)+1)]
                elif isinstance(userspec, list):
                    if len(userspec) == len(tseries):
                        # justification for date column not included, so set that
                        # to left by default
                        return ['left'] + userspec
                else:
                    raise ValueError("invalid `justify` specification")

            if justify is not None:
                justify = _standardize_justify(justify)
            else:
                # default column justification
                justify = ['left']
                for ser in tseries:
                    if ser.dtype.char in 'SUO': justify.append('left')
                    else: justify.append('right')


            if header_justify is not None:
                header_justify = _standardize_justify(header_justify)
            else:
                # default column justification
                header_justify = ['left' for x in range(len(tseries)+1)]
        else:
            justify = ['none' for x in range(len(tseries)+1)]
            header_justify = justify

        if datefmt is None:
            def datefmt_func(date): return str(date)
        else:
            def datefmt_func(date): return date.strftime(datefmt)

        if dates is None:
            tseries = ts.align_series(*tseries)
            dates = ts.date_array(start_date=tseries[0].start_date,
                                  end_date=tseries[0].end_date)
        else:
            tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries)

        if isinstance(fmt_func, list):
            fmt_func = [fmt_func_wrapper(f, mask_rep) for f in fmt_func]
        else:
            fmt_func = [fmt_func_wrapper(fmt_func, mask_rep)]*len(tseries)

        def wrap_func_default(func):
            if func is None: return lambda x:x
            else: return func

        if isinstance(wrap_func, list):
            if len(wrap_func) == len(tseries):
                wrap_func = [lambda x: x] + wrap_func
            wrap_func = [wrap_func_default(func) for func in wrap_func]
        else:
            wrap_func = [wrap_func_default(wrap_func) for x in range(len(tseries)+1)]


        if isinstance(col_width, list):
            if len(col_width) == len(tseries):
                col_width = [None] + col_width
        else:
            col_width = [col_width for x in range(len(tseries)+1)]

        _sd = dates[0]

        for d in dates:
            rows.append(
                [datefmt_func(d)] + \
                [fmt_func[i](ser.series[d - _sd]) \
                 for i, ser in enumerate(tseries)]
            )

        if footer_func is not None:
            has_footer=True
            if not isinstance(footer_func, list):
                footer_func = [footer_func]*len(tseries)

            if footer_label is None: footer_label = ['']
            else: footer_label = [footer_label]

            footer_data = []
            has_missing = dates.has_missing_dates()

            for i, ser in enumerate(tseries):
                if footer_func[i] is None:
                    footer_data.append('')
                else:
                    if has_missing: _input = ser[dates]
                    else:           _input = ser.series
                    footer_data.append(fmt_func[i](footer_func[i](_input)))

            rows.append(footer_label + footer_data)
        else:
            has_footer=False


        def rowWrapper(row):
            newRows = [wrap_func[i](item).split('\n') for i, item in enumerate(row)]
            return [[(substr or '') for substr in item] for item in map(None, *newRows)]
        # break each logical row into one or more physical ones
        logicalRows = [rowWrapper(row) for row in rows]
        numLogicalRows = len(logicalRows)
        # columns of physical rows
        columns = map(None,*reduce(operator.add,logicalRows))
        numCols = len(columns)
        colNums = list(range(numCols))

        # get the maximum of each column by the string length of its items
        maxWidths = [max(col_width[i], *[len(str(item)) for item in column])
                        for i, column in enumerate(columns)]

        def getSeparator(char, separate):
            if char is not None and separate:
                return char * (len(prefix) + len(postfix) + sum(maxWidths) + \
                                             len(delim)*(len(maxWidths)-1))
            else:
                return None

        header_separator = getSeparator(header_char, has_header)
        footer_separator = getSeparator(footer_char, has_footer)
        row_separator = getSeparator(row_char, True)

        # select the appropriate justify method
        justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust,
                          'none':(lambda text, width: text)}

        if has_header and has_footer:
            data_start = 1
            data_end = numLogicalRows-3
        elif has_header:
            data_start = 1
            data_end = numLogicalRows-2
        elif has_footer:
            data_start = 0
            data_end = numLogicalRows-3
        else:
            data_start = 0
            data_end = numLogicalRows-2

        for rowNum, physicalRows in enumerate(logicalRows):

            if rowNum == 0 and header_separator:
                _justify = header_justify
            else:
                _justify = justify

            def apply_justify(colNum, item, width):
                jfunc_key = str(_justify[colNum]).lower()
                jfunc = justify_funcs[jfunc_key]
                return jfunc(str(item), width)

            for row in physicalRows:

                output.write(
                    prefix + \
                    delim.join([
                        apply_justify(cn, item, width) \
                        for (cn, item, width) in zip(colNums, row, maxWidths)
                    ]) + \
                    postfix + nls)

            if row_separator and (data_start <= rowNum <= data_end):
                output.write(row_separator + nls)
            elif header_separator and rowNum < data_start:
                output.write(header_separator + nls)
            elif footer_separator and rowNum == data_end + 1:
                output.write(footer_separator + nls)
Beispiel #48
0
def getAccessStats(fromFunc=False):
	companyId = session.get("companyId")

	condFilter = [1==1]
	if request.form.get('startDate'):
		condFilter.append(LogAccess.rdate > request.form['startDate'])

	if companyId:
		condFilter.append(User.companyId==companyId)

	db_result = db_session.query(func.date(LogAccess.rdate).label("rdate"), func.count(), func.count(distinct(LogAccess.userId)))\
				.group_by( func.date(LogAccess.rdate) )\
				.filter(*condFilter)\
				.filter(func.date(LogAccess.rdate) <= request.form['endDate'])\
				.all()

	db_dates = list()
	db_pv = list()
	db_uv = list()
	for row in db_result:
		db_dates.append(row[0])
		db_pv.append(row[1])
		db_uv.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_pv, dateAry)
	timeSrz2 = ts.time_series(db_uv, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'

	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "pv" : 0, "uv" : 0 })
			firstDayIdx += 1
			continue
		else :
			break

	for idx, pv in enumerate(fillVals1):
		uv = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "pv" : pv, "uv" : uv })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "pv" : 0, "uv" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Beispiel #49
0
 def _get_tseriesA(freq,date_values,kw):
     v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
Beispiel #50
0
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''

erro, ho, etaxo = generate_gjrgarch(20,
                                    ar,
                                    ma,
                                    mu=0.04,
                                    scale=0.01,
                                    varinnovation=np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                       dates=ts.date_array(a[0]['Date'][::-1], freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))

#plt.show()
* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange
import numpy as np
import scikits.timeseries as ts
import la
import pandas
import tabular as tb
from finance import msft, ibm  # hack to make it run as standalone

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
pdta = pandas.DataFrame(s.data, np.arange(len(s.data)), [1])
pa = pdta.groupby(dict(zip(np.arange(len(s.data)),
            s.dates.tolist()))).aggregate(np.mean)