コード例 #1
0
def _collect_and_plot(files):
    TS = []
    location = []
    for f in files:
        temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ]
        for t in temperatures:
            if t['Step'][0] != '0000-00-00.01:00:00':
                print 'Not hourly readings of temperature. Abort.'
                break
            dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value']))
            data = [ float(value.rsplit('/')[0]) for value in t['Value'] ]
            TS.append(ts.TimeSeries(data=data, dates=dates))
            if location and t['Installation'][0] != location:
                print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.'
            location = t['Installation'][0]
    if TS:
        path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/'
        for file in os.listdir(path):
            try:
                series = xml.parse(path + file)
                sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file])
            except:
                print file, 'had no data.'
    else:
        print 'No temperature data.'
コード例 #2
0
    def __init__(self, **kwargs):
        """
        kwargs ={'baisnName':'Mackenzie',
                 'start_date':'2000-06-01',
                 'end_date':'2010-06-31',
                 'info_fl':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge\RiverGages_description.csv',
                 'pthIn':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge'}
        """

        for key in kwargs:

            setattr(self, key, kwargs[key])

        self._load_rivergages()

        self._idate = ts.Date('D', self.start_date)

        self._idateHStr = self._idate.strfmt('%Y/%m/%d') + ' 00:00'

        self._fdate = ts.Date('D', self.end_date)

        self._dates = ts.date_array(start_date=self._idate,
                                    end_date=self._fdate,
                                    freq='d')

        self.nr_days = self._fdate - self._idate + 1

        self._get_matrix()
コード例 #3
0
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl, delimiter=',', names=tuple('ABCDEFG'),
                               datecols=0, skip_header=2, asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
コード例 #4
0
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[(3., 30), (2., 20), (1., 10), ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
コード例 #5
0
ファイル: INLINE_PROCESSOR.py プロジェクト: exedre/e4t
def _get_year(year,d,n=1):
    """
    Return a time-series with the same frequency of the input time-series
    with n complete years from input year and values taken from input series

    :param year: base year
    :type year: integer
    :param d: time-series object
    :type d: time-series
    :param n: number of periods to take
    :type n: integer
    :return: output time-series
    :rtype: time-series
    
    """

    f    = d.freqstr      # frequenza d'ingresso
    nels = _ts_nels(f)    # numero di elementi da considerare in un anno (M=12, Q=4, A=1)

    N=n*nels              # Numero di elementi totali da considerare
    
    startd = d.start_date
    endd   = d.end_date

    if f[0]=='M':
        starty = ts.Date(f,year=year,month=1)
        endy   = ts.Date(f,year=year,month=N)
    elif f[0]=='Q':
        starty = ts.Date(f,year=year,quarter=1)
        endy   = ts.Date(f,year=year,quarter=N)
    elif f[0]=='A':
        starty = ts.Date(f,year=year)
        endy   = ts.Date(f,year=year+N-1)
    else:
        raise UnknownFrequencyError, f

    # Create a timeseries with N elements np.nan
    # from starty with frequency f
    s = ts.time_series([ np.nan for i in range(0,N)],
                       start_date=starty,
                       freq=f)

    # create date range
    da = ts.date_array(start_date=starty,
                       end_date=endy,
                       freq=f)

    d.fill_missing_dates()
    d.adjust_endpoints()

    # copy values from d to s
    d.mask=False
    for _d in da:
        s[_d]=np.nan
        if _d <= d.end_date:
            s[_d]=d[_d]
        else:
            s[_d]=np.nan

    return s
コード例 #6
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(test[:, 59:62],
                  ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]],
                                   - 1))
コード例 #7
0
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[
         (3., 30),
         (2., 20),
         (1., 10),
     ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
コード例 #8
0
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl,
                            delimiter=',',
                            names=tuple('ABCDEFG'),
                            datecols=0,
                            skip_header=2,
                            asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
コード例 #9
0
def add_diurnal(tseries, sine_period, peak_mag):
    """
    Scales a time series to a sine wave of peak_mag with sine_period.
    Input: tseries, sine_period (float, hrs), peak_mag (float)
    Output: scaled_data (array-like)
    """
    # Convert sine_period to same frequency as tseries
    # Create a time delta of magnitude sine_period
    # Convert that time delta into frequency units same as tseries
    zero_date = ts.now('H')
    second_date = zero_date + sine_period
    time_delta = ts.date_array([zero_date, second_date])
    time_delta = time_delta.asfreq(tseries.freq)
    sine_period = float(time_delta[1] - time_delta[0])
    
    angular_freq = (2. * np.pi) / sine_period
    
    for i in range(len(tseries)-1):
        passed_time = float(tseries.dates[i]- tseries.start_date)
        sine_factor = peak_mag * np.sin(angular_freq * passed_time)
        tseries[i] = tseries[i] + tseries[i] * sine_factor
    
    return tseries

# Generate power density function (pdf) to create synthetic TPM from
# mean, stdev, autocorr, npointsx

# def gen_pdf(desired_mean, desired_stdev, bin_width):
    ## TODO
    return 0
コード例 #10
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(
         test[:, 59:62],
         ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], -1))
コード例 #11
0
ファイル: Excel.py プロジェクト: exedre/e4t
 def _get_tseriesD(freq,date_values,kw):
     v = [ xlrd.xldate_as_tuple(int(d),0) 
           for i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d)  ]
     D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v]
     date_array = ts.date_array(D)
     return date_array
コード例 #12
0
ファイル: Random.py プロジェクト: exedre/e4t
    def request(self,reqs,**kw):
        """Random Request

        >>> ds = Random()
        >>> xs = ds.request([{'Instrument':'NAME=PIPPO~:2012-12-31~2007-01-01~M', 'Source': 'FRED'},])
        >>> ts = xs['PIPPO']._data
        >>> print ts.freqstr
        M
        """
        logger.debug('request')
        rx={}
        kw=udict(kw)
        if 'SEED' in kw:
            seed = int(kw['SEED'])
            rand.seed(seed)
        for rq in reqs:
            logger.debug("Request: %s",rq)
            s = parse_instrument(rq['Instrument'])
            logger.debug(s)
            params = ldict(mk_params(s['TICKER']))
            if 'name' in params:
                name = params['name'].upper()
                del params['name']
            else:
                name = rq['Source'].upper()
            if 'NAME' in kw and kw['NAME']:
                name = kw['NAME'].upper()
            kw['NAME']=name
            if s['FREQ']!='0':
                dr = ts.date_array(freq=s['FREQ'],start_date=s['START'],end_date=s['END'])
                L = len(dr)
            else:
                L = s['END']-s['START']
            params['size']=L
            dd = np.zeros(L)
            logger.debug(params)
            try:
                if re.match('^constant$',rq['Source'],re.I):
                    name = 'CONSTANT'
                    v = float(name)
                    params['LOW']=v
                    params['HIGH']=v
                    dd = rand.uniform(**params)
                elif re.match('^walk$',rq['Source'],re.I):
                    dd = RandomWalk(**params)
                elif re.match('^uniform|beta|binomial|chisquare|exponential|gamma|geometric|gumbel|hypergeometric|laplace|logistic|lognormal|logseries|multinomial|multivariate_normal|negative_binomial|noncentral_chisquare|noncentral_f|normal|pareto|poisson|power|rayleigh|standard_cauchy|standard_exponential|standard_gamma|standard_normal|standard_t|triangular|uniform|vonmises|wald|weibull|zipf$',rq['Source'],re.I):
                    generator = rq['Source'].lower()
                    if hasattr(rand,generator):
                        f = getattr(rand,generator)
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
コード例 #13
0
ファイル: tsa_model.py プロジェクト: jaydenwhyte/statsmodels
 def _make_predict_dates(self):
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
コード例 #14
0
ファイル: Excel.py プロジェクト: exedre/e4t
 def _get_tseriesQ(freq,date_values,kw):
     # print date_values
     by=0
     if kw.has_key('YEAR'):
         by = eval(kw['YEAR'])-1
     v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 
           for _i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
コード例 #15
0
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T,)
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
コード例 #16
0
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T, )
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
コード例 #17
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data, delimiter=',', dtype=float, datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
コード例 #18
0
ファイル: tsa_model.py プロジェクト: collinstocks/statsmodels
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
コード例 #19
0
ファイル: tsa_model.py プロジェクト: zed/statsmodels
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                        freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
         [datetime.datetime.fromordinal(i) for i in dates])
コード例 #20
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent, delimiter=",", skip_header=1, names=True,
                         converters={'dates': dateconv}, datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
コード例 #21
0
ファイル: Excel.py プロジェクト: exedre/e4t
 def _get_tseriesM(freq,date_values,kw):
     dformat='INT'
     if kw.has_key('DFORMAT'):
         dformat = kw['DFORMAT']
     if dformat=='INT':            
         v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     elif dformat=='XL_DATE':
         v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1]) for _v in v]
     else:
         logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING')
         raise ValueError, dformat
                     #            print "FREQ=|%s|"%freq,D
     date_array = ts.date_array(D)
     return date_array
コード例 #22
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data,
                      delimiter=',',
                      dtype=float,
                      datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
コード例 #23
0
 def setUp(self):
     "Initializes"
     ndtype = [('lin',float),('rand',float)]
     dlin = np.linspace(0,10,120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M')-120, length=120, freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1,1,120), 
                          dates=dates,
                          thresholds=(-0.5,0.5),
                          full_year='False', refseason='NDH', minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat=cdat
     self.enso=enso
コード例 #24
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         converters={'dates': dateconv},
                         datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
コード例 #25
0
    def test_with_names(self):
        "Tests w/ names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent, delimiter=",", datecols=0, skip_header=2,
                         names="A,B,C,D,E,F", freq='M')
        assert(isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(test['F'], [1, 1, 1, 1])
        assert_equal(test['E'].mask, [1, 1, 1, 1])
        assert_equal(test['C'], [1, 2, 300000, -1e-10])
コード例 #26
0
 def setUp(self):
     "Initializes"
     ndtype = [('lin', float), ('rand', float)]
     dlin = np.linspace(0, 10, 120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M') - 120,
                           length=120,
                           freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1, 1, 120),
                          dates=dates,
                          thresholds=(-0.5, 0.5),
                          full_year='False',
                          refseason='NDH',
                          minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat = cdat
     self.enso = enso
コード例 #27
0
    def test_without_names(self):
        "Test w/o names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         freq='M')
        assert (isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names,
                     ['One_S', 'Two_I', 'Three_F', 'Four_M', 'Five_', 'Six_C'])
        assert_equal(test['Six_C'], [1, 1, 1, 1])
        assert_equal(test['Five_'].mask, [1, 1, 1, 1])
        assert_equal(test['Three_F'], [1, 2, 300000, -1e-10])
コード例 #28
0
ファイル: ex_dates.py プロジェクト: slojo404/statsmodels
import statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts

dates = ts.date_array(start_date=1700, length=len(data.endog), freq="A")

# To make an array of datetime types, we need an integer array of ordinals

# .. from datetime import datetime
# .. dt_dates = dates.toordinal().astype(int)
# .. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
コード例 #29
0
ファイル: try_interchange.py プロジェクト: zed/statsmodels
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""

import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1, 2, 3, 4, 5],
                   dates=ts.date_array(
                       ["2001-01", "2001-01", "2001-02", "2001-03", "2001-03"],
                       freq="M"))

print '\nUsing la'
import la
dta = la.larry(s.data, label=[range(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[range(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,
                    dates=ts.date_array(dat.x, freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print repr(s)
print dat
print repr(s2)
print repr(s2u)

print '\nUsing pandas'
コード例 #30
0
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data

### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
                      end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp', 'realcons', 'cpi']].view(float).reshape(-1, 3)
ts_data = np.column_stack((ts_data, (1 - data['unemp'] / 100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)

fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:, 0], '-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:, 1], 'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:, 2], 'g-')
fsp.set_title("CPI")
コード例 #31
0
ファイル: cmsCtrl.py プロジェクト: kitworks/Oasis-App-API
def getHappinessStats(fromFunc=False, groupId=None, userId=None, endDateIn=None):
	condFilter = [1==1]

	companyId = request.form.get("companyId") and request.form.get("companyId") or session.get("companyId")
	if companyId :
		condFilter.append(Happiness.companyId==companyId)
	else :
		return None

	if groupId:
		condFilter.append(Happiness.groupId==groupId)
	elif userId:
		condFilter.append(Happiness.userId==userId)
	
	if request.form.get('endDate'):
		endDateIn = request.form['endDate']
	elif fromFunc==True and not endDateIn:
		endDateIn = datetime.now().strftime("%Y-%m-%d")
	if not endDateIn:
		return None
	
	if request.form.get("startDate"):
		condFilter.append(Happiness.rdate > request.form['startDate'])

	# cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal)
	# if request.form.get('type')=='all' and request.form.get('groupId') and request.form.get('userId'):
		# cols = cols + ( func.avg(func.IF(Happiness.groupId==groupId, Happiness.happyVal, None)) )
		# cols = cols + ( func.avg(func.IF(Happiness.userId==userId, Happiness.happyVal, None)) )
	cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal), func.avg(func.IF(Happiness.groupId==request.form.get('groupId'), Happiness.happyVal, None)), func.avg(func.IF(Happiness.userId==request.form.get('userId'), Happiness.happyVal, None))

	db_result = db_session.query(*cols)\
				.group_by( func.date(Happiness.rdate) )\
				.filter(func.date(Happiness.rdate) <= endDateIn)\
				.filter(*condFilter).all()

	db_dates = list()
	db_vals1 = list()
	db_vals2 = list()
	db_vals3 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_vals1.append(row[1])
		if len(row)>2 : 
			db_vals2.append(row[2])
			db_vals3.append(row[3])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_vals1, dateAry)
	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals1 = fillVals1.filled(0)
	timeSrz2 = ts.time_series(db_vals2, dateAry)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals2 = fillVals2.filled(0)
	timeSrz3 = ts.time_series(db_vals3, dateAry)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = db_dates[0]
	endDate = datetime.strptime(endDateIn, '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, val in enumerate(fillVals1):
		gr = fillVals2[idx]
		me = fillVals3[idx]
		rowDict = { "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "val" : "{0:.2f}".format(float(val or 0)) }
		rowDict["gr"] = "{0:.2f}".format(float(gr or 0))
		rowDict["me"] = "{0:.2f}".format(float(me or 0))
		stats.append(rowDict)

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
コード例 #32
0
def load_oni(mode='standard', **options):
    """
    Loads the ONI 3-m averaged monthly SST anomalies over the Niño-3.4 region
    and returns a :class:`~scikits.hydroclimpy.enso.ENSOIndicator` object.

    Two modes are accepted as arguments:
    
    - in the ``standard`` mode, the SSTs are retrieved from the original CPC
      website_.
      Data are available from Jan. 1950 to present.
    - in the ``backup`` mode, the SSTs are retrieved from the CPC `ftp site <ftpsite>`_.
      Data are available from Jan. 1900 to present.

    .. _website : http://www.cpc.noaa.gov/products/analysis_monitoring/ensostuff/ensoyears.shtml
    .. _ftpsite : ftp://eclipse.ncdc.noaa.gov/pub/ersst/pdo/el_nino_v3.dat.


    Parameters
    ----------
    mode : {'standard','backup'}, optional
        Mode describing the data to download.
    options : dictionary
        Optional parameters to parse to the ENSOIndicator for the definition of
        ENSO indices.
    thresholds : tuple of floats, optional
        Low and high temperature thresholds for the definition of El Niño and
        La Niña conditions.
        By default, the CPC uses -0.5oC and +0.5oC.
    minimum_size : int, optional
        Minimum number of consecutive months in El Niño / La Niña conditions
        required for the definition of an episode.
        By default, the CPC use 5 consecutive months.
    reference_season : string or tuple, optional
        Months that must be in an episode for it to be valid.
        By default, the CPC uses None (no restriction on the months).
    full_year : boolean, optional
        The CPC uses ``full_year=False``.

    References
    ----------
    Xue, Y., T. M. Smith, and R. W. Reynolds, 2003: Interdecadal changes of 30-yr
    SST normals during 1871-2000. *J. Climate*, 16, 1601-1612.

    """
    # Initialization .......................
    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    mode = mode.lower()
    cfg = dict(config.items('ENSO.ONI'))
    cfg.update(options)
    try:
        from BeautifulSoup import BeautifulSoup, SoupStrainer
    except ImportError:
        warnings.warn("The module 'BeautifulSoup' is unavailable.\n"\
                      "Reverting to backup mode")
        mode = 'backup'
    #
    datadir = cfg['datadir']
    if mode == 'standard':
        netfile = cfg['netfile']
        archive = cfg['archive']
    else:
        netfile = cfg['netfile_backup']
        archive = cfg['archive_backup']
    # Try to open an existing ENSOIndicator

    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    try:
        zipf = zipfile.ZipFile(ensoarchive, 'r')
        ensoi = cPickle.loads(zipf.read(archive))
        ensologger.info("... Loading from existing archived file")
    except IOError:
        zipf = zipfile.ZipFile(ensoarchive, 'w')
        ensologger.info("... Creating archive")
    except KeyError:
        zipf = zipfile.ZipFile(ensoarchive, 'a')
        ensologger.info("... Appending to archive")
    else:
        if isinstance(ensoi, enso.ENSOIndicator):
            return ensoi
    #
    sourcedir = np.lib._datasource.DataSource(datadir)
    dfile = sourcedir.open(netfile)
    #
    #
    if mode == 'standard':
        # Load the file as a tree, but only take the SST table (border=1)
        table = BeautifulSoup(dfile.read(),
                              parseOnlyThese=SoupStrainer("table", border=1))
        # Separate it by rows, but skip the first one (the header)
        years = []
        data = []
        indices = []
        color = {'red': +1, 'white': 0, 'blue': -1}
        deft = [(None, 'color:white')]
        for row in table.findAll("tr")[1:]:
            cols = row.findAll('td')
            years.append(int(cols.pop(0).strong.string))
            data.append([
                float(_.fetchText()[-1].string.replace('&nbsp;', '99.9'))
                for _ in cols
            ])
            indices.append([
                color[getattr(_.span, 'attrs', deft)[0][-1].split(':')[-1]]
                for _ in cols
            ])
        #
        start_date = Date('M', year=years[0], month=1)
        ensoi = enso.ENSOIndicator(
            ma.masked_values(data, 99.9).ravel(),
            start_date=start_date,
        )
        #        oni.set_indices(full_year=False, minsize=5, refseason=None)
        indices = time_series(np.array(indices).ravel(), start_date=start_date)
    else:
        rawdata = np.loadtxt(dfile)
        dates = date_array(
            [Date('M', year=yy, month=mm) for (yy, mm) in rawdata[:, :2]],
            freq='M')
        ensoi = enso.ENSOIndicator(
            cmov_mean(rawdata[:, -1], 3).round(2),
            dates,
        )
    #
    _set_ensoindicator_options(ensoi, **cfg)
    ensoi.set_indices()
    #
    # Store in the archive
    zipf.writestr(archive, cPickle.dumps(ensoi))
    zipf.close()
    return ensoi
コード例 #33
0
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
コード例 #34
0
rearranged
1.00796791   0.24449867(-0.00521004)   0.50554663
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''


erro,ho, etaxo = generate_gjrgarch(20, ar, ma, mu=0.04, scale=0.01,
                  varinnovation = np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                dates=ts.date_array(a[0]['Date'][::-1],freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))


plt.show()
コード例 #35
0
ファイル: Random.py プロジェクト: exedre/e4t
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
                raise
            if s['FREQ']!='0':
                data = ts.time_series(dd,
                                      freq=s['FREQ'],
                                      dates=dr)            
                if s['NAN']:
                    t=ts.now(data.freqstr)
                    if t<data.end_date:
                        da = ts.date_array(start_date=t,end_date=data.end_date)
                        data[da]=np.nan
                rx[kw['NAME']]=Timeseries(data=data,name=name)
            else: # for a vector
                print type(dd),dd.shape
                xx = dd.reshape((1,-1)) 
                print type(xx),xx.shape
                rx[kw['NAME']]=np.array(dd)
        return rx

# random://normal/loc=10,scale=.2~:2012-12-31~2007-01-01~M?name=UNI

if __name__=="__main__":
    parse_instrument('NAME=PIPPO')
    parse_instrument('NAME=PIPPO~M')
    parse_instrument('NAME=PIPPO~Q')
コード例 #36
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span,
                    dtype=[('val', int), ('maj', bool), ('min', bool),
                           ('fmt', '|S20')])
    info['val'][:] = np.arange(vmin, vmax + 1)
    info['fmt'][:] = ''
    info['maj'][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info['maj']
    info_min = info['min']
    info_fmt = info['fmt']

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and \
            ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, 'day')
        month_start = period_break(dates_, 'month')

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y'

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, 'hour')
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'

        def _second_finder(label_interval):
            minute_start = period_break(dates_, 'minute')
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info['maj'][minute_start] = True
            info['min'][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[second_start
                     & (_second % label_interval == 0)] = '%H:%M:%S'
            info_fmt[day_start] = '%H:%M:%S\n%d-%b'
            info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y'

        if span < periodsperday / 12000.0: _second_finder(1)
        elif span < periodsperday / 6000.0: _second_finder(2)
        elif span < periodsperday / 2400.0: _second_finder(5)
        elif span < periodsperday / 1200.0: _second_finder(10)
        elif span < periodsperday / 800.0: _second_finder(15)
        elif span < periodsperday / 400.0: _second_finder(30)
        elif span < periodsperday / 150.0: _minute_finder(1)
        elif span < periodsperday / 70.0: _minute_finder(2)
        elif span < periodsperday / 24.0: _minute_finder(5)
        elif span < periodsperday / 12.0: _minute_finder(15)
        elif span < periodsperday / 6.0: _minute_finder(30)
        elif span < periodsperday / 2.5: _hour_finder(1, False)
        elif span < periodsperday / 1.5: _hour_finder(2, False)
        elif span < periodsperday * 1.25: _hour_finder(3, False)
        elif span < periodsperday * 2.5: _hour_finder(6, True)
        elif span < periodsperday * 4: _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[day_start] = '%d'
            info_fmt[month_start] = '%d\n%b'
            info_fmt[year_start] = '%d\n%b\n%Y'
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = '%d\n%b\n%Y'
                else:
                    info_fmt[first_label(month_start)] = '%d\n%b\n%Y'

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, 'month')
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info['min'] = True
        else:
            day_start = period_break(dates_, 'day')
            info['min'][day_start] = True
        week_start = period_break(dates_, 'week')
        year_start = period_break(dates_, 'year')
        info_fmt[week_start] = '%d'
        info_fmt[month_start] = '\n\n%b'
        info_fmt[year_start] = '\n\n%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = '\n\n%b\n%Y'
            else:
                info_fmt[first_label(month_start)] = '\n\n%b\n%Y'
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        week_start = period_break(dates_, 'week')
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = '%b\n%Y'
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        month_start = period_break(dates_, 'month')
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = '%Y'
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, 'year')
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = '%Y'
    #............................................
    return info
コード例 #37
0
ファイル: ex_dates.py プロジェクト: zed/statsmodels
import scikits.statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts
dates = ts.date_array(start_date=1700, length=len(data.endog), freq='A')

# To make an array of datetime types, we need an integer array of ordinals

#.. from datetime import datetime
#.. dt_dates = dates.toordinal().astype(int)
#.. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
ar_model = sm.tsa.AR(endog, freq='A')
コード例 #38
0
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
コード例 #39
0
* pandas is missing GroupBy in the docs, but the docstring is helpful
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange, zip
import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
import la

dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
import pandas
コード例 #40
0
ファイル: example_var.py プロジェクト: bashtage/statsmodels
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data


### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
    end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)


fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:,1],'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:,2],'g-')
コード例 #41
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S20")])
    info["val"][:] = np.arange(vmin, vmax + 1)
    info["fmt"][:] = ""
    info["maj"][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info["maj"]
    info_min = info["min"]
    info_fmt = info["fmt"]

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, "day")
        month_start = period_break(dates_, "month")

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y"

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, "hour")
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"

        def _second_finder(label_interval):
            minute_start = period_break(dates_, "minute")
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info["maj"][minute_start] = True
            info["min"][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S"
            info_fmt[day_start] = "%H:%M:%S\n%d-%b"
            info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"

        if span < periodsperday / 12000.0:
            _second_finder(1)
        elif span < periodsperday / 6000.0:
            _second_finder(2)
        elif span < periodsperday / 2400.0:
            _second_finder(5)
        elif span < periodsperday / 1200.0:
            _second_finder(10)
        elif span < periodsperday / 800.0:
            _second_finder(15)
        elif span < periodsperday / 400.0:
            _second_finder(30)
        elif span < periodsperday / 150.0:
            _minute_finder(1)
        elif span < periodsperday / 70.0:
            _minute_finder(2)
        elif span < periodsperday / 24.0:
            _minute_finder(5)
        elif span < periodsperday / 12.0:
            _minute_finder(15)
        elif span < periodsperday / 6.0:
            _minute_finder(30)
        elif span < periodsperday / 2.5:
            _hour_finder(1, False)
        elif span < periodsperday / 1.5:
            _hour_finder(2, False)
        elif span < periodsperday * 1.25:
            _hour_finder(3, False)
        elif span < periodsperday * 2.5:
            _hour_finder(6, True)
        elif span < periodsperday * 4:
            _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[day_start] = "%d"
            info_fmt[month_start] = "%d\n%b"
            info_fmt[year_start] = "%d\n%b\n%Y"
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = "%d\n%b\n%Y"
                else:
                    info_fmt[first_label(month_start)] = "%d\n%b\n%Y"

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, "month")
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info["min"] = True
        else:
            day_start = period_break(dates_, "day")
            info["min"][day_start] = True
        week_start = period_break(dates_, "week")
        year_start = period_break(dates_, "year")
        info_fmt[week_start] = "%d"
        info_fmt[month_start] = "\n\n%b"
        info_fmt[year_start] = "\n\n%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = "\n\n%b\n%Y"
            else:
                info_fmt[first_label(month_start)] = "\n\n%b\n%Y"
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        week_start = period_break(dates_, "week")
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = "%b\n%Y"
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        month_start = period_break(dates_, "month")
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = "%Y"
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, "year")
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = "%Y"
    # ............................................
    return info
コード例 #42
0
ファイル: ar_model.py プロジェクト: 0ceangypsy/statsmodels
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
コード例 #43
0
ファイル: cmsCtrl.py プロジェクト: kitworks/Oasis-App-API
def getMsgStats(fromFunc=False):

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(Message.companyId==companyId)

	if request.form.get('startDate'):
		condFilter.append(Message.rdate > request.form['startDate'])

	db_result = db_session.query(func.date(Message.rdate).label("rdate"), func.count(func.IF(Message.msgType==1,1,None)), func.count(func.IF(Message.msgType==2,1,None)))\
					.group_by( func.date(Message.rdate) )\
					.filter(func.date(Message.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	db_dates = list()
	db_cnt1 = list()
	db_cnt2 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_cnt1.append(row[1])
		db_cnt2.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry)
	timeSrz2 = ts.time_series(db_cnt2, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)


	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, msg1cnt in enumerate(fillVals1):
		msg2cnt = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "msg1" : msg1cnt, "msg2" : msg2cnt })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
コード例 #44
0
ファイル: tsa_model.py プロジェクト: zed/statsmodels
                                   _attrs)
    _methods = {'predict': 'dates'}
    _wrap_methods = wrap.union_dicts(
        base.LikelihoodResultsWrapper._wrap_methods, _methods)


wrap.populate_wrapper(TimeSeriesResultsWrapper, TimeSeriesModelResults)

if __name__ == "__main__":
    import scikits.statsmodels.api as sm
    import datetime
    import pandas

    data = sm.datasets.macrodata.load()

    #make a DataFrame
    #TODO: attach a DataFrame to some of the datasets, for quicker use
    dates = [str(int(x[0])) +':'+ str(int(x[1])) \
             for x in data.data[['year','quarter']]]
    try:
        import scikits.timeseries as ts
        ts_dates = date_array(start_date=Date(year=1959, quarter=1, freq='Q'),
                              length=len(data.data))
    except:
        pass

    df = pandas.DataFrame(data.data[['realgdp', 'realinv', 'realcons']],
                          index=dates)
    ex_mod = TimeSeriesModel(df)
    #ts_series = pandas.TimeSeries()
コード例 #45
0
ファイル: cmsCtrl.py プロジェクト: kitworks/Oasis-App-API
def getGoodworkStats(fromFunc=False):

	db_dates1 = list()
	db_dates2 = list()
	db_dates3 = list()
	db_cnt1 = list()
	db_cnt2 = list()
	db_cnt3 = list()

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(GoodPost.companyId==companyId)
	if request.form.get('startDate'):
		condFilter.append(GoodPost.rdate > request.form['startDate'])

	db_result1 = db_session.query(func.date(GoodPost.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodPost.rdate) )\
					.filter(func.date(GoodPost.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	for row in db_result1:
		db_dates1.append(row[0])
		db_cnt1.append(row[1])


	condFilter2 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter2.append(GoodLike.rdate > request.form['startDate'])

	if companyId:
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodLike.userId)\
					.group_by( func.date(GoodLike.rdate) )\
					.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter2).all()
	else :
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
						.group_by( func.date(GoodLike.rdate) )\
						.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
						.filter(*condFilter2).all()
	for row in db_result2:
		db_dates2.append(row[0])
		db_cnt2.append(row[1])

	condFilter3 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter3.append(GoodReply.rdate > request.form['startDate'])

	if companyId:
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodReply.userId)\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter3).all()
	else :
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(*condFilter3).all()
	for row in db_result3:
		db_dates3.append(row[0])
		db_cnt3.append(row[1])
	
	dateAry1 = ts.date_array(db_dates1, freq='D')
	dateAry2 = ts.date_array(db_dates2, freq='D')
	dateAry3 = ts.date_array(db_dates3, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry1)
	timeSrz2 = ts.time_series(db_cnt2, dateAry2)
	timeSrz3 = ts.time_series(db_cnt3, dateAry3)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	if len(dateAry1)>0 and len(dateAry2)>0 and dateAry1[0] > dateAry2[0] :
		if len(dateAry3)>0 and dateAry2[0] > dateAry3[0] :
			minDate = dateAry3[0]
		else :
			minDate = dateAry2[0]
	else :
		if (len(dateAry1)>0 and len(dateAry3)>0 and dateAry1[0] > dateAry3[0]) or len(dateAry1)==0 :
			minDate = len(dateAry3)>0 and dateAry3[0] or None
		else:
			minDate = len(dateAry1)>0 and dateAry1[0] or None

	postStats = list()
	likeStats = list()
	replyStats = list()

	firstDayIdx1 = 0
	for day in fillDateAry :
		if minDate != fillDateAry[firstDayIdx1] :			
			postStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "post" : 0 })
			likeStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "like" : 0 })
			replyStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx1 += 1
			continue
		else :
			break
	firstDayIdx2 = 0
	for day in fillDateAry :
		if len(dateAry2)==0 or dateAry2[0] != fillDateAry[firstDayIdx2] :
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx2 += 1
			continue
		else :
			break
	firstDayIdx3 = 0
	for day in fillDateAry :
		if len(dateAry3)==0 or dateAry3[0] != fillDateAry[firstDayIdx3] :			
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx3 += 1
			continue
		else :
			break

	for idx, post in enumerate(fillVals1):
		postStats.append({ "date" : fillDateAry[firstDayIdx1 + idx].strftime("%m/%d"), "post" : post })
	for idx, like in enumerate(fillVals2):
		likeStats.append({ "date" : fillDateAry[firstDayIdx2 + idx].strftime("%m/%d"), "like" : like })
	for idx, reply in enumerate(fillVals3):
		replyStats.append({ "date" : fillDateAry[firstDayIdx3 + idx].strftime("%m/%d"), "reply" : reply })

	
	postLen = len(postStats)
	likeLen = len(likeStats)
	replyLen = len(replyStats)
	for i in range(len(fillDateAry) - postLen) :
		postStats.append({ "date" : fillDateAry[postLen + i].strftime("%m/%d"), "post" : 0 })
	for i in range(len(fillDateAry) - likeLen) :
		likeStats.append({ "date" : fillDateAry[likeLen + i].strftime("%m/%d"), "like" : 0 })
	for i in range(len(fillDateAry) - replyLen) :
		replyStats.append({ "date" : fillDateAry[replyLen + i].strftime("%m/%d"), "reply" : 0 })
		
	#merge
	stats = list()
	for idx, postStat in enumerate(postStats):
		mergedStats = postStat.copy()
		mergedStats.update(likeStats[idx])
		mergedStats.update(replyStats[idx])
		stats.append(mergedStats)

	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
コード例 #46
0
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
コード例 #47
0
    def __call__(self, *tseries, **kwargs):
        """
        generate a report. Parameter values are not saved to the Report instance.

        Parameters
        ----------
        Accepts same parameters as __init__ method of Report class
        """

        option_dict = copy.copy(self.options)
        option_dict.update(self.__make_dict(**kwargs))
        if len(tseries) == 0:
            tseries = self.tseries

        def option(kw):
            return option_dict.get(kw, _default_options[kw])

        dates = option('dates')
        header_row = option('header_row')
        header_char = option('header_char')
        header_justify = option('header_justify')
        row_char = option('row_char')
        footer_label = option('footer_label')
        footer_char = option('footer_char')
        footer_func = option('footer_func')
        delim = option('delim')
        justify = option('justify')
        prefix = option('prefix')
        postfix = option('postfix')
        mask_rep = option('mask_rep')
        datefmt = option('datefmt')
        fmt_func = option('fmt_func')
        wrap_func = option('wrap_func')
        col_width = option('col_width')
        nls=option('nls')
        output=option('output')
        fixed_width=option('fixed_width')

        if header_row is not None:
            has_header=True
            if len(header_row) == len(tseries)+1:
                # label for date column included
                rows = [header_row]
            elif len(header_row) == len(tseries):
                # label for date column not included
                rows = [['']+header_row]
            else:
                raise ValueError("mismatch with number of headers and series")
        else:
            has_header=False
            rows=[]

        if fixed_width:

            def _standardize_justify(userspec):
                if isinstance(userspec, str):
                    # justify all columns the the same way
                    return [userspec for x in range(len(tseries)+1)]
                elif isinstance(userspec, list):
                    if len(userspec) == len(tseries):
                        # justification for date column not included, so set that
                        # to left by default
                        return ['left'] + userspec
                else:
                    raise ValueError("invalid `justify` specification")

            if justify is not None:
                justify = _standardize_justify(justify)
            else:
                # default column justification
                justify = ['left']
                for ser in tseries:
                    if ser.dtype.char in 'SUO': justify.append('left')
                    else: justify.append('right')


            if header_justify is not None:
                header_justify = _standardize_justify(header_justify)
            else:
                # default column justification
                header_justify = ['left' for x in range(len(tseries)+1)]
        else:
            justify = ['none' for x in range(len(tseries)+1)]
            header_justify = justify

        if datefmt is None:
            def datefmt_func(date): return str(date)
        else:
            def datefmt_func(date): return date.strftime(datefmt)

        if dates is None:
            tseries = ts.align_series(*tseries)
            dates = ts.date_array(start_date=tseries[0].start_date,
                                  end_date=tseries[0].end_date)
        else:
            tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries)

        if isinstance(fmt_func, list):
            fmt_func = [fmt_func_wrapper(f, mask_rep) for f in fmt_func]
        else:
            fmt_func = [fmt_func_wrapper(fmt_func, mask_rep)]*len(tseries)

        def wrap_func_default(func):
            if func is None: return lambda x:x
            else: return func

        if isinstance(wrap_func, list):
            if len(wrap_func) == len(tseries):
                wrap_func = [lambda x: x] + wrap_func
            wrap_func = [wrap_func_default(func) for func in wrap_func]
        else:
            wrap_func = [wrap_func_default(wrap_func) for x in range(len(tseries)+1)]


        if isinstance(col_width, list):
            if len(col_width) == len(tseries):
                col_width = [None] + col_width
        else:
            col_width = [col_width for x in range(len(tseries)+1)]

        _sd = dates[0]

        for d in dates:
            rows.append(
                [datefmt_func(d)] + \
                [fmt_func[i](ser.series[d - _sd]) \
                 for i, ser in enumerate(tseries)]
            )

        if footer_func is not None:
            has_footer=True
            if not isinstance(footer_func, list):
                footer_func = [footer_func]*len(tseries)

            if footer_label is None: footer_label = ['']
            else: footer_label = [footer_label]

            footer_data = []
            has_missing = dates.has_missing_dates()

            for i, ser in enumerate(tseries):
                if footer_func[i] is None:
                    footer_data.append('')
                else:
                    if has_missing: _input = ser[dates]
                    else:           _input = ser.series
                    footer_data.append(fmt_func[i](footer_func[i](_input)))

            rows.append(footer_label + footer_data)
        else:
            has_footer=False


        def rowWrapper(row):
            newRows = [wrap_func[i](item).split('\n') for i, item in enumerate(row)]
            return [[(substr or '') for substr in item] for item in map(None, *newRows)]
        # break each logical row into one or more physical ones
        logicalRows = [rowWrapper(row) for row in rows]
        numLogicalRows = len(logicalRows)
        # columns of physical rows
        columns = map(None,*reduce(operator.add,logicalRows))
        numCols = len(columns)
        colNums = list(range(numCols))

        # get the maximum of each column by the string length of its items
        maxWidths = [max(col_width[i], *[len(str(item)) for item in column])
                        for i, column in enumerate(columns)]

        def getSeparator(char, separate):
            if char is not None and separate:
                return char * (len(prefix) + len(postfix) + sum(maxWidths) + \
                                             len(delim)*(len(maxWidths)-1))
            else:
                return None

        header_separator = getSeparator(header_char, has_header)
        footer_separator = getSeparator(footer_char, has_footer)
        row_separator = getSeparator(row_char, True)

        # select the appropriate justify method
        justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust,
                          'none':(lambda text, width: text)}

        if has_header and has_footer:
            data_start = 1
            data_end = numLogicalRows-3
        elif has_header:
            data_start = 1
            data_end = numLogicalRows-2
        elif has_footer:
            data_start = 0
            data_end = numLogicalRows-3
        else:
            data_start = 0
            data_end = numLogicalRows-2

        for rowNum, physicalRows in enumerate(logicalRows):

            if rowNum == 0 and header_separator:
                _justify = header_justify
            else:
                _justify = justify

            def apply_justify(colNum, item, width):
                jfunc_key = str(_justify[colNum]).lower()
                jfunc = justify_funcs[jfunc_key]
                return jfunc(str(item), width)

            for row in physicalRows:

                output.write(
                    prefix + \
                    delim.join([
                        apply_justify(cn, item, width) \
                        for (cn, item, width) in zip(colNums, row, maxWidths)
                    ]) + \
                    postfix + nls)

            if row_separator and (data_start <= rowNum <= data_end):
                output.write(row_separator + nls)
            elif header_separator and rowNum < data_start:
                output.write(header_separator + nls)
            elif footer_separator and rowNum == data_end + 1:
                output.write(footer_separator + nls)
コード例 #48
0
ファイル: cmsCtrl.py プロジェクト: kitworks/Oasis-App-API
def getAccessStats(fromFunc=False):
	companyId = session.get("companyId")

	condFilter = [1==1]
	if request.form.get('startDate'):
		condFilter.append(LogAccess.rdate > request.form['startDate'])

	if companyId:
		condFilter.append(User.companyId==companyId)

	db_result = db_session.query(func.date(LogAccess.rdate).label("rdate"), func.count(), func.count(distinct(LogAccess.userId)))\
				.group_by( func.date(LogAccess.rdate) )\
				.filter(*condFilter)\
				.filter(func.date(LogAccess.rdate) <= request.form['endDate'])\
				.all()

	db_dates = list()
	db_pv = list()
	db_uv = list()
	for row in db_result:
		db_dates.append(row[0])
		db_pv.append(row[1])
		db_uv.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_pv, dateAry)
	timeSrz2 = ts.time_series(db_uv, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'

	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "pv" : 0, "uv" : 0 })
			firstDayIdx += 1
			continue
		else :
			break

	for idx, pv in enumerate(fillVals1):
		uv = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "pv" : pv, "uv" : uv })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "pv" : 0, "uv" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
コード例 #49
0
ファイル: Excel.py プロジェクト: exedre/e4t
 def _get_tseriesA(freq,date_values,kw):
     v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
コード例 #50
0
ファイル: ex_mle_garch.py プロジェクト: zed/statsmodels
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''

erro, ho, etaxo = generate_gjrgarch(20,
                                    ar,
                                    ma,
                                    mu=0.04,
                                    scale=0.01,
                                    varinnovation=np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                       dates=ts.date_array(a[0]['Date'][::-1], freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))

#plt.show()
コード例 #51
0
* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange
import numpy as np
import scikits.timeseries as ts
import la
import pandas
import tabular as tb
from finance import msft, ibm  # hack to make it run as standalone

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
pdta = pandas.DataFrame(s.data, np.arange(len(s.data)), [1])
pa = pdta.groupby(dict(zip(np.arange(len(s.data)),
            s.dates.tolist()))).aggregate(np.mean)