Ejemplo n.º 1
0
def _collect_and_plot(files):
    TS = []
    location = []
    for f in files:
        temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ]
        for t in temperatures:
            if t['Step'][0] != '0000-00-00.01:00:00':
                print 'Not hourly readings of temperature. Abort.'
                break
            dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value']))
            data = [ float(value.rsplit('/')[0]) for value in t['Value'] ]
            TS.append(ts.TimeSeries(data=data, dates=dates))
            if location and t['Installation'][0] != location:
                print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.'
            location = t['Installation'][0]
    if TS:
        path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/'
        for file in os.listdir(path):
            try:
                series = xml.parse(path + file)
                sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file])
            except:
                print file, 'had no data.'
    else:
        print 'No temperature data.'
Ejemplo n.º 2
0
    def __init__(self, **kwargs):
        """
        kwargs ={'baisnName':'Mackenzie',
                 'start_date':'2000-06-01',
                 'end_date':'2010-06-31',
                 'info_fl':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge\RiverGages_description.csv',
                 'pthIn':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge'}
        """

        for key in kwargs:

            setattr(self, key, kwargs[key])

        self._load_rivergages()

        self._idate = ts.Date('D', self.start_date)

        self._idateHStr = self._idate.strfmt('%Y/%m/%d') + ' 00:00'

        self._fdate = ts.Date('D', self.end_date)

        self._dates = ts.date_array(start_date=self._idate,
                                    end_date=self._fdate,
                                    freq='d')

        self.nr_days = self._fdate - self._idate + 1

        self._get_matrix()
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl, delimiter=',', names=tuple('ABCDEFG'),
                               datecols=0, skip_header=2, asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[(3., 30), (2., 20), (1., 10), ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
Ejemplo n.º 5
0
def _get_year(year,d,n=1):
    """
    Return a time-series with the same frequency of the input time-series
    with n complete years from input year and values taken from input series

    :param year: base year
    :type year: integer
    :param d: time-series object
    :type d: time-series
    :param n: number of periods to take
    :type n: integer
    :return: output time-series
    :rtype: time-series
    
    """

    f    = d.freqstr      # frequenza d'ingresso
    nels = _ts_nels(f)    # numero di elementi da considerare in un anno (M=12, Q=4, A=1)

    N=n*nels              # Numero di elementi totali da considerare
    
    startd = d.start_date
    endd   = d.end_date

    if f[0]=='M':
        starty = ts.Date(f,year=year,month=1)
        endy   = ts.Date(f,year=year,month=N)
    elif f[0]=='Q':
        starty = ts.Date(f,year=year,quarter=1)
        endy   = ts.Date(f,year=year,quarter=N)
    elif f[0]=='A':
        starty = ts.Date(f,year=year)
        endy   = ts.Date(f,year=year+N-1)
    else:
        raise UnknownFrequencyError, f

    # Create a timeseries with N elements np.nan
    # from starty with frequency f
    s = ts.time_series([ np.nan for i in range(0,N)],
                       start_date=starty,
                       freq=f)

    # create date range
    da = ts.date_array(start_date=starty,
                       end_date=endy,
                       freq=f)

    d.fill_missing_dates()
    d.adjust_endpoints()

    # copy values from d to s
    d.mask=False
    for _d in da:
        s[_d]=np.nan
        if _d <= d.end_date:
            s[_d]=d[_d]
        else:
            s[_d]=np.nan

    return s
Ejemplo n.º 6
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(test[:, 59:62],
                  ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]],
                                   - 1))
Ejemplo n.º 7
0
 def test_sorted(self):
     dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)]
     (a, b) = zip(*[
         (3., 30),
         (2., 20),
         (1., 10),
     ])
     ndtype = [('a', np.float), ('b', np.int)]
     controldates = date_array(dates, freq='D')
     controldates.sort_chronologically()
     series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(series._dates, controldates)
     #
     trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype)
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromrecords(zip(a, b), dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
     #
     trec = fromarrays([a, b], dates, names=('a', 'b'))
     assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)])
     assert_equal(trec._dates, controldates)
     assert_equal(trec['a'], [1., 2., 3.])
     assert_equal(trec.a, [1., 2., 3.])
Ejemplo n.º 8
0
    def test_tsfromtxt(self):
        "Tests reading from a text file."
        fcontent = """#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
"""
        import os
        from datetime import datetime
        import tempfile
        (tmp_fd, tmp_fl) = tempfile.mkstemp()
        os.write(tmp_fd, fcontent)
        os.close(tmp_fd)

        mrectxt = tsfromtxt(tmp_fl,
                            delimiter=',',
                            names=tuple('ABCDEFG'),
                            datecols=0,
                            skip_header=2,
                            asrecarray=True)
        os.remove(tmp_fl)
        #
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        self.failUnless(isinstance(mrectxt, TimeSeriesRecords))
        assert_equal(mrectxt._dates, date_array(dlist, 'M'))
        assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(mrectxt.F, [1, 1, 1, 1])
        assert_equal(mrectxt.E._mask, [1, 1, 1, 1])
        assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
Ejemplo n.º 9
0
def add_diurnal(tseries, sine_period, peak_mag):
    """
    Scales a time series to a sine wave of peak_mag with sine_period.
    Input: tseries, sine_period (float, hrs), peak_mag (float)
    Output: scaled_data (array-like)
    """
    # Convert sine_period to same frequency as tseries
    # Create a time delta of magnitude sine_period
    # Convert that time delta into frequency units same as tseries
    zero_date = ts.now('H')
    second_date = zero_date + sine_period
    time_delta = ts.date_array([zero_date, second_date])
    time_delta = time_delta.asfreq(tseries.freq)
    sine_period = float(time_delta[1] - time_delta[0])
    
    angular_freq = (2. * np.pi) / sine_period
    
    for i in range(len(tseries)-1):
        passed_time = float(tseries.dates[i]- tseries.start_date)
        sine_factor = peak_mag * np.sin(angular_freq * passed_time)
        tseries[i] = tseries[i] + tseries[i] * sine_factor
    
    return tseries

# Generate power density function (pdf) to create synthetic TPM from
# mean, stdev, autocorr, npointsx

# def gen_pdf(desired_mean, desired_stdev, bin_width):
    ## TODO
    return 0
Ejemplo n.º 10
0
 def test_convert_to_annual(self):
     "Test convert_to_annual"
     base = dict(D=1, H=24, T=24 * 60, S=24 * 3600)
     #for fq in ('D', 'H', 'T', 'S'):
     # Don't test for minuTe and Second frequency, too time consuming.
     for fq in ('D', 'H'):
         dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'),
                            end_date=Date(fq, '2004-12-31 23:59:59'))
         bq = base[fq]
         series = time_series(range(365 * bq) * 3 + range(366 * bq),
                              dates=dates)
         control = ma.masked_all((4, 366 * bq), dtype=series.dtype)
         control[0, :58 * bq] = range(58 * bq)
         control[0, 59 * bq:] = range(58 * bq, 365 * bq)
         control[[1, 2]] = control[0]
         control[3] = range(366 * bq)
         test = convert_to_annual(series)
         assert_equal(test, control)
     #
     series = time_series(range(59, 365) + range(366) + range(365),
                          start_date=Date('D', '2003-03-01'))
     test = convert_to_annual(series)
     assert_equal(
         test[:, 59:62],
         ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], -1))
Ejemplo n.º 11
0
Archivo: Excel.py Proyecto: exedre/e4t
 def _get_tseriesD(freq,date_values,kw):
     v = [ xlrd.xldate_as_tuple(int(d),0) 
           for i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d)  ]
     D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v]
     date_array = ts.date_array(D)
     return date_array
Ejemplo n.º 12
0
Archivo: Random.py Proyecto: exedre/e4t
    def request(self,reqs,**kw):
        """Random Request

        >>> ds = Random()
        >>> xs = ds.request([{'Instrument':'NAME=PIPPO~:2012-12-31~2007-01-01~M', 'Source': 'FRED'},])
        >>> ts = xs['PIPPO']._data
        >>> print ts.freqstr
        M
        """
        logger.debug('request')
        rx={}
        kw=udict(kw)
        if 'SEED' in kw:
            seed = int(kw['SEED'])
            rand.seed(seed)
        for rq in reqs:
            logger.debug("Request: %s",rq)
            s = parse_instrument(rq['Instrument'])
            logger.debug(s)
            params = ldict(mk_params(s['TICKER']))
            if 'name' in params:
                name = params['name'].upper()
                del params['name']
            else:
                name = rq['Source'].upper()
            if 'NAME' in kw and kw['NAME']:
                name = kw['NAME'].upper()
            kw['NAME']=name
            if s['FREQ']!='0':
                dr = ts.date_array(freq=s['FREQ'],start_date=s['START'],end_date=s['END'])
                L = len(dr)
            else:
                L = s['END']-s['START']
            params['size']=L
            dd = np.zeros(L)
            logger.debug(params)
            try:
                if re.match('^constant$',rq['Source'],re.I):
                    name = 'CONSTANT'
                    v = float(name)
                    params['LOW']=v
                    params['HIGH']=v
                    dd = rand.uniform(**params)
                elif re.match('^walk$',rq['Source'],re.I):
                    dd = RandomWalk(**params)
                elif re.match('^uniform|beta|binomial|chisquare|exponential|gamma|geometric|gumbel|hypergeometric|laplace|logistic|lognormal|logseries|multinomial|multivariate_normal|negative_binomial|noncentral_chisquare|noncentral_f|normal|pareto|poisson|power|rayleigh|standard_cauchy|standard_exponential|standard_gamma|standard_normal|standard_t|triangular|uniform|vonmises|wald|weibull|zipf$',rq['Source'],re.I):
                    generator = rq['Source'].lower()
                    if hasattr(rand,generator):
                        f = getattr(rand,generator)
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
Ejemplo n.º 13
0
 def _make_predict_dates(self):
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
Ejemplo n.º 14
0
Archivo: Excel.py Proyecto: exedre/e4t
 def _get_tseriesQ(freq,date_values,kw):
     # print date_values
     by=0
     if kw.has_key('YEAR'):
         by = eval(kw['YEAR'])-1
     v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 
           for _i,d in enumerate(date_values.flatten()) 
           if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T,)
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
Ejemplo n.º 16
0
 def setup(self):
     "Generic setup"
     d = np.arange(5)
     m = ma.make_mask([1, 0, 0, 1, 1])
     base_d = np.r_[d, d[::-1]].reshape(2, -1).T
     base_m = np.r_[[m, m[::-1]]].T
     base = ma.array(base_d, mask=base_m)
     mrec = mr.fromarrays(base.T, )
     dlist = ['2007-%02i' % (i + 1) for i in d]
     dates = date_array(dlist)
     mts = time_series(mrec, dates)
     rts = time_records(mrec, dates)
     self.data = [d, m, mrec, dlist, dates, mts, rts]
Ejemplo n.º 17
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data, delimiter=',', dtype=float, datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
Ejemplo n.º 18
0
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                             freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
             [datetime.datetime.fromordinal(i) for i in dates])
Ejemplo n.º 19
0
 def _make_predict_dates(self):
     try:
         from scikits.timeseries import date_array
     except ImportError:
         self._data.predict_dates = None
     data = self._data
     dtstart = data.predict_start
     dtend = data.predict_end
     freq = data.freq
     #pandas_freq = _freq_to_pandas[freq]
     dates = date_array(start_date=dtstart, end_date=dtend,
                        freq=freq).toordinal().astype(int)
     self._data.predict_dates = asarray(
         [datetime.datetime.fromordinal(i) for i in dates])
Ejemplo n.º 20
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent, delimiter=",", skip_header=1, names=True,
                         converters={'dates': dateconv}, datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
Ejemplo n.º 21
0
Archivo: Excel.py Proyecto: exedre/e4t
 def _get_tseriesM(freq,date_values,kw):
     dformat='INT'
     if kw.has_key('DFORMAT'):
         dformat = kw['DFORMAT']
     if dformat=='INT':            
         v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     elif dformat=='XL_DATE':
         v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d)  ]
         D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1]) for _v in v]
     else:
         logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING')
         raise ValueError, dformat
                     #            print "FREQ=|%s|"%freq,D
     date_array = ts.date_array(D)
     return date_array
Ejemplo n.º 22
0
 def test_dates_on_several_columns(self):
     "Test tsfromtxt when the date spans several columns."
     datatxt = """
     2001, 01, 0.0, 10.
     2001, 02, 1.1, 11.
     2001, 02, 2.2, 12.
     """
     data = StringIO.StringIO(datatxt)
     dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
     test = tsfromtxt(data,
                      delimiter=',',
                      dtype=float,
                      datecols=(0, 1),
                      dateconverter=dateconverter)
     assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]])
     assert_equal(test.dates,
                  date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
Ejemplo n.º 23
0
 def setUp(self):
     "Initializes"
     ndtype = [('lin',float),('rand',float)]
     dlin = np.linspace(0,10,120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M')-120, length=120, freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1,1,120), 
                          dates=dates,
                          thresholds=(-0.5,0.5),
                          full_year='False', refseason='NDH', minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat=cdat
     self.enso=enso
Ejemplo n.º 24
0
    def test_with_datecols(self):
        "Test two datecols"
        fcontent = StringIO.StringIO("""
year, month, A, B
2009, 01, 1, 1.
2009, 03, 3, 3.
""")
        dateconv = lambda y, m: Date("M", year=int(y), month=int(m))
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         converters={'dates': dateconv},
                         datecols=(0, 1))
        dates = date_array(['2009-01', '2009-03'], freq='M')
        assert_equal(test.dates.tovalue(), dates)
        assert_equal(test['A'], [1, 3])
        assert_equal(test['B'], [1., 3.])
        assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
Ejemplo n.º 25
0
    def test_with_names(self):
        "Tests w/ names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent, delimiter=",", datecols=0, skip_header=2,
                         names="A,B,C,D,E,F", freq='M')
        assert(isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F'])
        assert_equal(test['F'], [1, 1, 1, 1])
        assert_equal(test['E'].mask, [1, 1, 1, 1])
        assert_equal(test['C'], [1, 2, 300000, -1e-10])
Ejemplo n.º 26
0
 def setUp(self):
     "Initializes"
     ndtype = [('lin', float), ('rand', float)]
     dlin = np.linspace(0, 10, 120)
     drnd = np.random.rand(120)
     data = np.array(zip(dlin, drnd), dtype=ndtype)
     dates = ts.date_array(start_date=ts.now('M') - 120,
                           length=120,
                           freq='M')
     enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1, 1, 120),
                          dates=dates,
                          thresholds=(-0.5, 0.5),
                          full_year='False',
                          refseason='NDH',
                          minsize=5)
     cdat = data.view(ClimateRecords)
     cdat._dates = dates
     cdat.ensoindicator = enso
     self.dlin = dlin
     self.cdat = cdat
     self.enso = enso
Ejemplo n.º 27
0
    def test_without_names(self):
        "Test w/o names"
        fcontent = StringIO.StringIO("""#
'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)'
'2007-01', 'strings',1,1.0,'mixed column',,1
'2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1
'2007-03', 'strings',3,3.0E5,3,,1
'2007-05','strings',4,-1e-10,,,1
""")
        test = tsfromtxt(fcontent,
                         delimiter=",",
                         skip_header=1,
                         names=True,
                         freq='M')
        assert (isinstance(test, TimeSeries))
        dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)]
        assert_equal(test.dates.tovalue(),
                     date_array(dlist, freq='M').tovalue())
        assert_equal(test.dtype.names,
                     ['One_S', 'Two_I', 'Three_F', 'Four_M', 'Five_', 'Six_C'])
        assert_equal(test['Six_C'], [1, 1, 1, 1])
        assert_equal(test['Five_'].mask, [1, 1, 1, 1])
        assert_equal(test['Three_F'], [1, 2, 300000, -1e-10])
Ejemplo n.º 28
0
import statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts

dates = ts.date_array(start_date=1700, length=len(data.endog), freq="A")

# To make an array of datetime types, we need an integer array of ordinals

# .. from datetime import datetime
# .. dt_dates = dates.toordinal().astype(int)
# .. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
Ejemplo n.º 29
0
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""

import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1, 2, 3, 4, 5],
                   dates=ts.date_array(
                       ["2001-01", "2001-01", "2001-02", "2001-03", "2001-03"],
                       freq="M"))

print '\nUsing la'
import la
dta = la.larry(s.data, label=[range(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[range(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,
                    dates=ts.date_array(dat.x, freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print repr(s)
print dat
print repr(s2)
print repr(s2u)

print '\nUsing pandas'
Ejemplo n.º 30
0
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data

### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
                      end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp', 'realcons', 'cpi']].view(float).reshape(-1, 3)
ts_data = np.column_stack((ts_data, (1 - data['unemp'] / 100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)

fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:, 0], '-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:, 1], 'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:, 2], 'g-')
fsp.set_title("CPI")
Ejemplo n.º 31
0
def getHappinessStats(fromFunc=False, groupId=None, userId=None, endDateIn=None):
	condFilter = [1==1]

	companyId = request.form.get("companyId") and request.form.get("companyId") or session.get("companyId")
	if companyId :
		condFilter.append(Happiness.companyId==companyId)
	else :
		return None

	if groupId:
		condFilter.append(Happiness.groupId==groupId)
	elif userId:
		condFilter.append(Happiness.userId==userId)
	
	if request.form.get('endDate'):
		endDateIn = request.form['endDate']
	elif fromFunc==True and not endDateIn:
		endDateIn = datetime.now().strftime("%Y-%m-%d")
	if not endDateIn:
		return None
	
	if request.form.get("startDate"):
		condFilter.append(Happiness.rdate > request.form['startDate'])

	# cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal)
	# if request.form.get('type')=='all' and request.form.get('groupId') and request.form.get('userId'):
		# cols = cols + ( func.avg(func.IF(Happiness.groupId==groupId, Happiness.happyVal, None)) )
		# cols = cols + ( func.avg(func.IF(Happiness.userId==userId, Happiness.happyVal, None)) )
	cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal), func.avg(func.IF(Happiness.groupId==request.form.get('groupId'), Happiness.happyVal, None)), func.avg(func.IF(Happiness.userId==request.form.get('userId'), Happiness.happyVal, None))

	db_result = db_session.query(*cols)\
				.group_by( func.date(Happiness.rdate) )\
				.filter(func.date(Happiness.rdate) <= endDateIn)\
				.filter(*condFilter).all()

	db_dates = list()
	db_vals1 = list()
	db_vals2 = list()
	db_vals3 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_vals1.append(row[1])
		if len(row)>2 : 
			db_vals2.append(row[2])
			db_vals3.append(row[3])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_vals1, dateAry)
	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals1 = fillVals1.filled(0)
	timeSrz2 = ts.time_series(db_vals2, dateAry)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals2 = fillVals2.filled(0)
	timeSrz3 = ts.time_series(db_vals3, dateAry)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = db_dates[0]
	endDate = datetime.strptime(endDateIn, '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, val in enumerate(fillVals1):
		gr = fillVals2[idx]
		me = fillVals3[idx]
		rowDict = { "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "val" : "{0:.2f}".format(float(val or 0)) }
		rowDict["gr"] = "{0:.2f}".format(float(gr or 0))
		rowDict["me"] = "{0:.2f}".format(float(me or 0))
		stats.append(rowDict)

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Ejemplo n.º 32
0
def load_oni(mode='standard', **options):
    """
    Loads the ONI 3-m averaged monthly SST anomalies over the Niño-3.4 region
    and returns a :class:`~scikits.hydroclimpy.enso.ENSOIndicator` object.

    Two modes are accepted as arguments:
    
    - in the ``standard`` mode, the SSTs are retrieved from the original CPC
      website_.
      Data are available from Jan. 1950 to present.
    - in the ``backup`` mode, the SSTs are retrieved from the CPC `ftp site <ftpsite>`_.
      Data are available from Jan. 1900 to present.

    .. _website : http://www.cpc.noaa.gov/products/analysis_monitoring/ensostuff/ensoyears.shtml
    .. _ftpsite : ftp://eclipse.ncdc.noaa.gov/pub/ersst/pdo/el_nino_v3.dat.


    Parameters
    ----------
    mode : {'standard','backup'}, optional
        Mode describing the data to download.
    options : dictionary
        Optional parameters to parse to the ENSOIndicator for the definition of
        ENSO indices.
    thresholds : tuple of floats, optional
        Low and high temperature thresholds for the definition of El Niño and
        La Niña conditions.
        By default, the CPC uses -0.5oC and +0.5oC.
    minimum_size : int, optional
        Minimum number of consecutive months in El Niño / La Niña conditions
        required for the definition of an episode.
        By default, the CPC use 5 consecutive months.
    reference_season : string or tuple, optional
        Months that must be in an episode for it to be valid.
        By default, the CPC uses None (no restriction on the months).
    full_year : boolean, optional
        The CPC uses ``full_year=False``.

    References
    ----------
    Xue, Y., T. M. Smith, and R. W. Reynolds, 2003: Interdecadal changes of 30-yr
    SST normals during 1871-2000. *J. Climate*, 16, 1601-1612.

    """
    # Initialization .......................
    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    mode = mode.lower()
    cfg = dict(config.items('ENSO.ONI'))
    cfg.update(options)
    try:
        from BeautifulSoup import BeautifulSoup, SoupStrainer
    except ImportError:
        warnings.warn("The module 'BeautifulSoup' is unavailable.\n"\
                      "Reverting to backup mode")
        mode = 'backup'
    #
    datadir = cfg['datadir']
    if mode == 'standard':
        netfile = cfg['netfile']
        archive = cfg['archive']
    else:
        netfile = cfg['netfile_backup']
        archive = cfg['archive_backup']
    # Try to open an existing ENSOIndicator

    ensoarchive = dict(config.items('ENSO'))['ensoarchive']
    if ensoarchive[-4:].lower() != '.zip':
        ensoarchive += '.zip'
    #
    try:
        zipf = zipfile.ZipFile(ensoarchive, 'r')
        ensoi = cPickle.loads(zipf.read(archive))
        ensologger.info("... Loading from existing archived file")
    except IOError:
        zipf = zipfile.ZipFile(ensoarchive, 'w')
        ensologger.info("... Creating archive")
    except KeyError:
        zipf = zipfile.ZipFile(ensoarchive, 'a')
        ensologger.info("... Appending to archive")
    else:
        if isinstance(ensoi, enso.ENSOIndicator):
            return ensoi
    #
    sourcedir = np.lib._datasource.DataSource(datadir)
    dfile = sourcedir.open(netfile)
    #
    #
    if mode == 'standard':
        # Load the file as a tree, but only take the SST table (border=1)
        table = BeautifulSoup(dfile.read(),
                              parseOnlyThese=SoupStrainer("table", border=1))
        # Separate it by rows, but skip the first one (the header)
        years = []
        data = []
        indices = []
        color = {'red': +1, 'white': 0, 'blue': -1}
        deft = [(None, 'color:white')]
        for row in table.findAll("tr")[1:]:
            cols = row.findAll('td')
            years.append(int(cols.pop(0).strong.string))
            data.append([
                float(_.fetchText()[-1].string.replace('&nbsp;', '99.9'))
                for _ in cols
            ])
            indices.append([
                color[getattr(_.span, 'attrs', deft)[0][-1].split(':')[-1]]
                for _ in cols
            ])
        #
        start_date = Date('M', year=years[0], month=1)
        ensoi = enso.ENSOIndicator(
            ma.masked_values(data, 99.9).ravel(),
            start_date=start_date,
        )
        #        oni.set_indices(full_year=False, minsize=5, refseason=None)
        indices = time_series(np.array(indices).ravel(), start_date=start_date)
    else:
        rawdata = np.loadtxt(dfile)
        dates = date_array(
            [Date('M', year=yy, month=mm) for (yy, mm) in rawdata[:, :2]],
            freq='M')
        ensoi = enso.ENSOIndicator(
            cmov_mean(rawdata[:, -1], 3).round(2),
            dates,
        )
    #
    _set_ensoindicator_options(ensoi, **cfg)
    ensoi.set_indices()
    #
    # Store in the archive
    zipf.writestr(archive, cPickle.dumps(ensoi))
    zipf.close()
    return ensoi
Ejemplo n.º 33
0
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
Ejemplo n.º 34
0
rearranged
1.00796791   0.24449867(-0.00521004)   0.50554663
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''


erro,ho, etaxo = generate_gjrgarch(20, ar, ma, mu=0.04, scale=0.01,
                  varinnovation = np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                dates=ts.date_array(a[0]['Date'][::-1],freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))


plt.show()
Ejemplo n.º 35
0
Archivo: Random.py Proyecto: exedre/e4t
                        dd = f(**params)
                ## Add other distributions here
                else:
                    raise TypeError, 'Unknown generator'
            except TypeError, exc:
                logging.error('%s generator - %s',rq['Source'],exc.args[0])
            except:
                raise
            if s['FREQ']!='0':
                data = ts.time_series(dd,
                                      freq=s['FREQ'],
                                      dates=dr)            
                if s['NAN']:
                    t=ts.now(data.freqstr)
                    if t<data.end_date:
                        da = ts.date_array(start_date=t,end_date=data.end_date)
                        data[da]=np.nan
                rx[kw['NAME']]=Timeseries(data=data,name=name)
            else: # for a vector
                print type(dd),dd.shape
                xx = dd.reshape((1,-1)) 
                print type(xx),xx.shape
                rx[kw['NAME']]=np.array(dd)
        return rx

# random://normal/loc=10,scale=.2~:2012-12-31~2007-01-01~M?name=UNI

if __name__=="__main__":
    parse_instrument('NAME=PIPPO')
    parse_instrument('NAME=PIPPO~M')
    parse_instrument('NAME=PIPPO~Q')
Ejemplo n.º 36
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span,
                    dtype=[('val', int), ('maj', bool), ('min', bool),
                           ('fmt', '|S20')])
    info['val'][:] = np.arange(vmin, vmax + 1)
    info['fmt'][:] = ''
    info['maj'][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info['maj']
    info_min = info['min']
    info_fmt = info['fmt']

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and \
            ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, 'day')
        month_start = period_break(dates_, 'month')

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y'

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, 'hour')
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M'
            info_fmt[day_start] = '%H:%M\n%d-%b'
            info_fmt[year_start] = '%H:%M\n%d-%b\n%Y'

        def _second_finder(label_interval):
            minute_start = period_break(dates_, 'minute')
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info['maj'][minute_start] = True
            info['min'][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[second_start
                     & (_second % label_interval == 0)] = '%H:%M:%S'
            info_fmt[day_start] = '%H:%M:%S\n%d-%b'
            info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y'

        if span < periodsperday / 12000.0: _second_finder(1)
        elif span < periodsperday / 6000.0: _second_finder(2)
        elif span < periodsperday / 2400.0: _second_finder(5)
        elif span < periodsperday / 1200.0: _second_finder(10)
        elif span < periodsperday / 800.0: _second_finder(15)
        elif span < periodsperday / 400.0: _second_finder(30)
        elif span < periodsperday / 150.0: _minute_finder(1)
        elif span < periodsperday / 70.0: _minute_finder(2)
        elif span < periodsperday / 24.0: _minute_finder(5)
        elif span < periodsperday / 12.0: _minute_finder(15)
        elif span < periodsperday / 6.0: _minute_finder(30)
        elif span < periodsperday / 2.5: _hour_finder(1, False)
        elif span < periodsperday / 1.5: _hour_finder(2, False)
        elif span < periodsperday * 1.25: _hour_finder(3, False)
        elif span < periodsperday * 2.5: _hour_finder(6, True)
        elif span < periodsperday * 4: _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, 'year')
            info_fmt = info['fmt']
            info_fmt[day_start] = '%d'
            info_fmt[month_start] = '%d\n%b'
            info_fmt[year_start] = '%d\n%b\n%Y'
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = '%d\n%b\n%Y'
                else:
                    info_fmt[first_label(month_start)] = '%d\n%b\n%Y'

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, 'month')
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info['min'] = True
        else:
            day_start = period_break(dates_, 'day')
            info['min'][day_start] = True
        week_start = period_break(dates_, 'week')
        year_start = period_break(dates_, 'year')
        info_fmt[week_start] = '%d'
        info_fmt[month_start] = '\n\n%b'
        info_fmt[year_start] = '\n\n%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = '\n\n%b\n%Y'
            else:
                info_fmt[first_label(month_start)] = '\n\n%b\n%Y'
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        week_start = period_break(dates_, 'week')
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = '%b\n%Y'
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        month_start = period_break(dates_, 'month')
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, 'year')
        month_start = period_break(dates_, 'month')
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = '%b'
        info_fmt[year_start] = '%b\n%Y'
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, 'year')
        quarter_start = period_break(dates_, 'quarter')
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = '%Y'
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, 'year')
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = '%Y'
    #............................................
    return info
Ejemplo n.º 37
0
import scikits.statsmodels.api as sm
import numpy as np
import pandas

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.
# We can use scikits.timeseries and datetime to create this array.

import datetime
import scikits.timeseries as ts
dates = ts.date_array(start_date=1700, length=len(data.endog), freq='A')

# To make an array of datetime types, we need an integer array of ordinals

#.. from datetime import datetime
#.. dt_dates = dates.toordinal().astype(int)
#.. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates])
dt_dates = dates.tolist()

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.Series(data.endog, index=dt_dates)

# and instantiate the model
ar_model = sm.tsa.AR(endog, freq='A')
Ejemplo n.º 38
0
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
Ejemplo n.º 39
0
* pandas is missing GroupBy in the docs, but the docstring is helpful
* both la and pandas handle datetime objects as object arrays
* tabular requires conversion to structured dtype, but easy helper
  functions or methods are available in scikits.timeseries and tabular

* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange, zip
import numpy as np
import scikits.timeseries as ts

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
import la

dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
import pandas
Ejemplo n.º 40
0
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data


### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
    end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)


fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')
fsp.set_title("Real GDP")
fsp = fig.add_tsplot(222)
fsp.tsplot(ts_series[:,1],'r-')
fsp.set_title("Real Consumption")
fsp = fig.add_tsplot(223)
fsp.tsplot(ts_series[:,2],'g-')
Ejemplo n.º 41
0
def _daily_finder(vmin, vmax, freq):

    periodsperday = -1

    if freq >= _c.FR_HR:
        if freq == _c.FR_SEC:
            periodsperday = 24 * 60 * 60
        elif freq == _c.FR_MIN:
            periodsperday = 24 * 60
        elif freq == _c.FR_HR:
            periodsperday = 24
        else:
            raise ValueError("unexpected frequency: %s" % check_freq_str(freq))
        periodsperyear = 365 * periodsperday
        periodspermonth = 28 * periodsperday

    elif freq == _c.FR_BUS:
        periodsperyear = 261
        periodspermonth = 19
    elif freq == _c.FR_DAY:
        periodsperyear = 365
        periodspermonth = 28
    elif get_freq_group(freq) == _c.FR_WK:
        periodsperyear = 52
        periodspermonth = 3
    elif freq == _c.FR_UND:
        periodsperyear = 100
        periodspermonth = 10
    else:
        raise ValueError("unexpected frequency")

    # save this for later usage
    vmin_orig = vmin

    (vmin, vmax) = (int(vmin), int(vmax))
    span = vmax - vmin + 1
    dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax))
    # Initialize the output
    info = np.zeros(span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S20")])
    info["val"][:] = np.arange(vmin, vmax + 1)
    info["fmt"][:] = ""
    info["maj"][[0, -1]] = True
    # .. and set some shortcuts
    info_maj = info["maj"]
    info_min = info["min"]
    info_fmt = info["fmt"]

    def first_label(label_flags):
        if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0):
            return label_flags[1]
        else:
            return label_flags[0]

    # Case 1. Less than a month
    if span <= periodspermonth:

        day_start = period_break(dates_, "day")
        month_start = period_break(dates_, "month")

        def _hour_finder(label_interval, force_year_start):
            _hour = dates_.hour
            _prev_hour = (dates_ - 1).hour
            hour_start = (_hour - _prev_hour) != 0
            info_maj[day_start] = True
            info_min[hour_start & (_hour % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"
            if force_year_start and not has_level_label(year_start, vmin_orig):
                info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y"

        def _minute_finder(label_interval):
            hour_start = period_break(dates_, "hour")
            _minute = dates_.minute
            _prev_minute = (dates_ - 1).minute
            minute_start = (_minute - _prev_minute) != 0
            info_maj[hour_start] = True
            info_min[minute_start & (_minute % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M"
            info_fmt[day_start] = "%H:%M\n%d-%b"
            info_fmt[year_start] = "%H:%M\n%d-%b\n%Y"

        def _second_finder(label_interval):
            minute_start = period_break(dates_, "minute")
            _second = dates_.second
            _prev_second = (dates_ - 1).second
            second_start = (_second - _prev_second) != 0
            info["maj"][minute_start] = True
            info["min"][second_start & (_second % label_interval == 0)] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S"
            info_fmt[day_start] = "%H:%M:%S\n%d-%b"
            info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y"

        if span < periodsperday / 12000.0:
            _second_finder(1)
        elif span < periodsperday / 6000.0:
            _second_finder(2)
        elif span < periodsperday / 2400.0:
            _second_finder(5)
        elif span < periodsperday / 1200.0:
            _second_finder(10)
        elif span < periodsperday / 800.0:
            _second_finder(15)
        elif span < periodsperday / 400.0:
            _second_finder(30)
        elif span < periodsperday / 150.0:
            _minute_finder(1)
        elif span < periodsperday / 70.0:
            _minute_finder(2)
        elif span < periodsperday / 24.0:
            _minute_finder(5)
        elif span < periodsperday / 12.0:
            _minute_finder(15)
        elif span < periodsperday / 6.0:
            _minute_finder(30)
        elif span < periodsperday / 2.5:
            _hour_finder(1, False)
        elif span < periodsperday / 1.5:
            _hour_finder(2, False)
        elif span < periodsperday * 1.25:
            _hour_finder(3, False)
        elif span < periodsperday * 2.5:
            _hour_finder(6, True)
        elif span < periodsperday * 4:
            _hour_finder(12, True)
        else:
            info_maj[month_start] = True
            info_min[day_start] = True
            year_start = period_break(dates_, "year")
            info_fmt = info["fmt"]
            info_fmt[day_start] = "%d"
            info_fmt[month_start] = "%d\n%b"
            info_fmt[year_start] = "%d\n%b\n%Y"
            if not has_level_label(year_start, vmin_orig):
                if not has_level_label(month_start, vmin_orig):
                    info_fmt[first_label(day_start)] = "%d\n%b\n%Y"
                else:
                    info_fmt[first_label(month_start)] = "%d\n%b\n%Y"

    # Case 2. Less than three months
    elif span <= periodsperyear // 4:
        month_start = period_break(dates_, "month")
        info_maj[month_start] = True
        if freq < _c.FR_HR:
            info["min"] = True
        else:
            day_start = period_break(dates_, "day")
            info["min"][day_start] = True
        week_start = period_break(dates_, "week")
        year_start = period_break(dates_, "year")
        info_fmt[week_start] = "%d"
        info_fmt[month_start] = "\n\n%b"
        info_fmt[year_start] = "\n\n%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            if not has_level_label(month_start, vmin_orig):
                info_fmt[first_label(week_start)] = "\n\n%b\n%Y"
            else:
                info_fmt[first_label(month_start)] = "\n\n%b\n%Y"
    # Case 3. Less than 14 months ...............
    elif span <= 1.15 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        week_start = period_break(dates_, "week")
        info_maj[month_start] = True
        info_min[week_start] = True
        info_min[year_start] = False
        info_min[month_start] = False
        info_fmt[month_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
        if not has_level_label(year_start, vmin_orig):
            info_fmt[first_label(month_start)] = "%b\n%Y"
    # Case 4. Less than 2.5 years ...............
    elif span <= 2.5 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        month_start = period_break(dates_, "month")
        info_maj[quarter_start] = True
        info_min[month_start] = True
        info_fmt[quarter_start] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 4. Less than 4 years .................
    elif span <= 4 * periodsperyear:
        year_start = period_break(dates_, "year")
        month_start = period_break(dates_, "month")
        info_maj[year_start] = True
        info_min[month_start] = True
        info_min[year_start] = False

        month_break = dates_[month_start].month
        jan_or_jul = month_start[(month_break == 1) | (month_break == 7)]
        info_fmt[jan_or_jul] = "%b"
        info_fmt[year_start] = "%b\n%Y"
    # Case 5. Less than 11 years ................
    elif span <= 11 * periodsperyear:
        year_start = period_break(dates_, "year")
        quarter_start = period_break(dates_, "quarter")
        info_maj[year_start] = True
        info_min[quarter_start] = True
        info_min[year_start] = False
        info_fmt[year_start] = "%Y"
    # Case 6. More than 12 years ................
    else:
        year_start = period_break(dates_, "year")
        year_break = dates_[year_start].years
        nyears = span / periodsperyear
        (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears)
        major_idx = year_start[(year_break % maj_anndef == 0)]
        info_maj[major_idx] = True
        minor_idx = year_start[(year_break % min_anndef == 0)]
        info_min[minor_idx] = True
        info_fmt[major_idx] = "%Y"
    # ............................................
    return info
Ejemplo n.º 42
0
#    t = timer()
#    mod_tb = tb.lpc(y, 2)
#    t_end = timer()
#    print str(t_end - t) + " seconds for talkbox.lpc"
#    print """For higher lag lengths ours quickly fills up memory and starts
#thrashing the swap.  Should we include talkbox C code or Cythonize the
#Levinson recursion algorithm?"""

    ## Try with a pandas series
    import pandas
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog), timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.Series(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(lmap(datetime.datetime.fromordinal,
                              ts_dr.toordinal().astype(int)))
    sunspots = pandas.Series(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
Ejemplo n.º 43
0
def getMsgStats(fromFunc=False):

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(Message.companyId==companyId)

	if request.form.get('startDate'):
		condFilter.append(Message.rdate > request.form['startDate'])

	db_result = db_session.query(func.date(Message.rdate).label("rdate"), func.count(func.IF(Message.msgType==1,1,None)), func.count(func.IF(Message.msgType==2,1,None)))\
					.group_by( func.date(Message.rdate) )\
					.filter(func.date(Message.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	db_dates = list()
	db_cnt1 = list()
	db_cnt2 = list()
	for row in db_result:
		db_dates.append(row[0])
		db_cnt1.append(row[1])
		db_cnt2.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry)
	timeSrz2 = ts.time_series(db_cnt2, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)


	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })
			firstDayIdx += 1
			continue
		else :
			break
	for idx, msg1cnt in enumerate(fillVals1):
		msg2cnt = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "msg1" : msg1cnt, "msg2" : msg2cnt })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Ejemplo n.º 44
0
                                   _attrs)
    _methods = {'predict': 'dates'}
    _wrap_methods = wrap.union_dicts(
        base.LikelihoodResultsWrapper._wrap_methods, _methods)


wrap.populate_wrapper(TimeSeriesResultsWrapper, TimeSeriesModelResults)

if __name__ == "__main__":
    import scikits.statsmodels.api as sm
    import datetime
    import pandas

    data = sm.datasets.macrodata.load()

    #make a DataFrame
    #TODO: attach a DataFrame to some of the datasets, for quicker use
    dates = [str(int(x[0])) +':'+ str(int(x[1])) \
             for x in data.data[['year','quarter']]]
    try:
        import scikits.timeseries as ts
        ts_dates = date_array(start_date=Date(year=1959, quarter=1, freq='Q'),
                              length=len(data.data))
    except:
        pass

    df = pandas.DataFrame(data.data[['realgdp', 'realinv', 'realcons']],
                          index=dates)
    ex_mod = TimeSeriesModel(df)
    #ts_series = pandas.TimeSeries()
Ejemplo n.º 45
0
def getGoodworkStats(fromFunc=False):

	db_dates1 = list()
	db_dates2 = list()
	db_dates3 = list()
	db_cnt1 = list()
	db_cnt2 = list()
	db_cnt3 = list()

	companyId = session.get("companyId")

	condFilter = [ 1==1 ]
	if companyId:
		condFilter.append(GoodPost.companyId==companyId)
	if request.form.get('startDate'):
		condFilter.append(GoodPost.rdate > request.form['startDate'])

	db_result1 = db_session.query(func.date(GoodPost.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodPost.rdate) )\
					.filter(func.date(GoodPost.rdate) <= request.form['endDate'])\
					.filter(*condFilter).all()
	for row in db_result1:
		db_dates1.append(row[0])
		db_cnt1.append(row[1])


	condFilter2 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter2.append(GoodLike.rdate > request.form['startDate'])

	if companyId:
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodLike.userId)\
					.group_by( func.date(GoodLike.rdate) )\
					.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter2).all()
	else :
		db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\
						.group_by( func.date(GoodLike.rdate) )\
						.filter(func.date(GoodLike.rdate) <= request.form['endDate'])\
						.filter(*condFilter2).all()
	for row in db_result2:
		db_dates2.append(row[0])
		db_cnt2.append(row[1])

	condFilter3 = [ 1==1 ]
	if request.form.get('startDate'):
		condFilter3.append(GoodReply.rdate > request.form['startDate'])

	if companyId:
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.join(User, User.userId==GoodReply.userId)\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(User.companyId==companyId)\
					.filter(*condFilter3).all()
	else :
		db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\
					.group_by( func.date(GoodReply.rdate) )\
					.filter(func.date(GoodReply.rdate) <= request.form['endDate'])\
					.filter(*condFilter3).all()
	for row in db_result3:
		db_dates3.append(row[0])
		db_cnt3.append(row[1])
	
	dateAry1 = ts.date_array(db_dates1, freq='D')
	dateAry2 = ts.date_array(db_dates2, freq='D')
	dateAry3 = ts.date_array(db_dates3, freq='D')
	timeSrz1 = ts.time_series(db_cnt1, dateAry1)
	timeSrz2 = ts.time_series(db_cnt2, dateAry2)
	timeSrz3 = ts.time_series(db_cnt3, dateAry3)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)
	fillVals3 = timeSrz3.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)
	fillVals3 = fillVals3.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'
	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	if len(dateAry1)>0 and len(dateAry2)>0 and dateAry1[0] > dateAry2[0] :
		if len(dateAry3)>0 and dateAry2[0] > dateAry3[0] :
			minDate = dateAry3[0]
		else :
			minDate = dateAry2[0]
	else :
		if (len(dateAry1)>0 and len(dateAry3)>0 and dateAry1[0] > dateAry3[0]) or len(dateAry1)==0 :
			minDate = len(dateAry3)>0 and dateAry3[0] or None
		else:
			minDate = len(dateAry1)>0 and dateAry1[0] or None

	postStats = list()
	likeStats = list()
	replyStats = list()

	firstDayIdx1 = 0
	for day in fillDateAry :
		if minDate != fillDateAry[firstDayIdx1] :			
			postStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "post" : 0 })
			likeStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "like" : 0 })
			replyStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx1 += 1
			continue
		else :
			break
	firstDayIdx2 = 0
	for day in fillDateAry :
		if len(dateAry2)==0 or dateAry2[0] != fillDateAry[firstDayIdx2] :
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx2 += 1
			continue
		else :
			break
	firstDayIdx3 = 0
	for day in fillDateAry :
		if len(dateAry3)==0 or dateAry3[0] != fillDateAry[firstDayIdx3] :			
			if len(dateAry1)>0 and minDate!=dateAry1[0]:
				postStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "post" : 0 })
			if len(dateAry2)>0 and minDate!=dateAry2[0]:
				likeStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "like" : 0 })
			if len(dateAry3)>0 and minDate!=dateAry3[0]:
				replyStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "reply" : 0 })
			firstDayIdx3 += 1
			continue
		else :
			break

	for idx, post in enumerate(fillVals1):
		postStats.append({ "date" : fillDateAry[firstDayIdx1 + idx].strftime("%m/%d"), "post" : post })
	for idx, like in enumerate(fillVals2):
		likeStats.append({ "date" : fillDateAry[firstDayIdx2 + idx].strftime("%m/%d"), "like" : like })
	for idx, reply in enumerate(fillVals3):
		replyStats.append({ "date" : fillDateAry[firstDayIdx3 + idx].strftime("%m/%d"), "reply" : reply })

	
	postLen = len(postStats)
	likeLen = len(likeStats)
	replyLen = len(replyStats)
	for i in range(len(fillDateAry) - postLen) :
		postStats.append({ "date" : fillDateAry[postLen + i].strftime("%m/%d"), "post" : 0 })
	for i in range(len(fillDateAry) - likeLen) :
		likeStats.append({ "date" : fillDateAry[likeLen + i].strftime("%m/%d"), "like" : 0 })
	for i in range(len(fillDateAry) - replyLen) :
		replyStats.append({ "date" : fillDateAry[replyLen + i].strftime("%m/%d"), "reply" : 0 })
		
	#merge
	stats = list()
	for idx, postStat in enumerate(postStats):
		mergedStats = postStat.copy()
		mergedStats.update(likeStats[idx])
		mergedStats.update(replyStats[idx])
		stats.append(mergedStats)

	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Ejemplo n.º 46
0
import datetime

from matplotlib.finance import quotes_historical_yahoo
import scikits.timeseries as ts
import scikits.timeseries.lib.tstables as tstab

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)

# retrieve data from yahoo.
# Data format is [(d, open, close, high, low, volume), ...] where d is
# a floating point representation of the number of days since 01-01-01 UTC
quotes = quotes_historical_yahoo('INTC', startdate, enddate)

# Create a DateArray of daily dates and convert it to business day frequency
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')

opens = [q[1] for q in quotes]

# opens: the data portion of the timeserie
# dates: the date portion of the timeserie
raw_series = ts.time_series(opens, dates)
test_series = raw_series
#test_series = ts.fill_missing_dates(raw_series, fill_value=-1)

# Write to a PyTables file
output_dir = '../timeseries'
try:
    os.mkdir(output_dir)
except OSError:
    pass
Ejemplo n.º 47
0
    def __call__(self, *tseries, **kwargs):
        """
        generate a report. Parameter values are not saved to the Report instance.

        Parameters
        ----------
        Accepts same parameters as __init__ method of Report class
        """

        option_dict = copy.copy(self.options)
        option_dict.update(self.__make_dict(**kwargs))
        if len(tseries) == 0:
            tseries = self.tseries

        def option(kw):
            return option_dict.get(kw, _default_options[kw])

        dates = option('dates')
        header_row = option('header_row')
        header_char = option('header_char')
        header_justify = option('header_justify')
        row_char = option('row_char')
        footer_label = option('footer_label')
        footer_char = option('footer_char')
        footer_func = option('footer_func')
        delim = option('delim')
        justify = option('justify')
        prefix = option('prefix')
        postfix = option('postfix')
        mask_rep = option('mask_rep')
        datefmt = option('datefmt')
        fmt_func = option('fmt_func')
        wrap_func = option('wrap_func')
        col_width = option('col_width')
        nls=option('nls')
        output=option('output')
        fixed_width=option('fixed_width')

        if header_row is not None:
            has_header=True
            if len(header_row) == len(tseries)+1:
                # label for date column included
                rows = [header_row]
            elif len(header_row) == len(tseries):
                # label for date column not included
                rows = [['']+header_row]
            else:
                raise ValueError("mismatch with number of headers and series")
        else:
            has_header=False
            rows=[]

        if fixed_width:

            def _standardize_justify(userspec):
                if isinstance(userspec, str):
                    # justify all columns the the same way
                    return [userspec for x in range(len(tseries)+1)]
                elif isinstance(userspec, list):
                    if len(userspec) == len(tseries):
                        # justification for date column not included, so set that
                        # to left by default
                        return ['left'] + userspec
                else:
                    raise ValueError("invalid `justify` specification")

            if justify is not None:
                justify = _standardize_justify(justify)
            else:
                # default column justification
                justify = ['left']
                for ser in tseries:
                    if ser.dtype.char in 'SUO': justify.append('left')
                    else: justify.append('right')


            if header_justify is not None:
                header_justify = _standardize_justify(header_justify)
            else:
                # default column justification
                header_justify = ['left' for x in range(len(tseries)+1)]
        else:
            justify = ['none' for x in range(len(tseries)+1)]
            header_justify = justify

        if datefmt is None:
            def datefmt_func(date): return str(date)
        else:
            def datefmt_func(date): return date.strftime(datefmt)

        if dates is None:
            tseries = ts.align_series(*tseries)
            dates = ts.date_array(start_date=tseries[0].start_date,
                                  end_date=tseries[0].end_date)
        else:
            tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries)

        if isinstance(fmt_func, list):
            fmt_func = [fmt_func_wrapper(f, mask_rep) for f in fmt_func]
        else:
            fmt_func = [fmt_func_wrapper(fmt_func, mask_rep)]*len(tseries)

        def wrap_func_default(func):
            if func is None: return lambda x:x
            else: return func

        if isinstance(wrap_func, list):
            if len(wrap_func) == len(tseries):
                wrap_func = [lambda x: x] + wrap_func
            wrap_func = [wrap_func_default(func) for func in wrap_func]
        else:
            wrap_func = [wrap_func_default(wrap_func) for x in range(len(tseries)+1)]


        if isinstance(col_width, list):
            if len(col_width) == len(tseries):
                col_width = [None] + col_width
        else:
            col_width = [col_width for x in range(len(tseries)+1)]

        _sd = dates[0]

        for d in dates:
            rows.append(
                [datefmt_func(d)] + \
                [fmt_func[i](ser.series[d - _sd]) \
                 for i, ser in enumerate(tseries)]
            )

        if footer_func is not None:
            has_footer=True
            if not isinstance(footer_func, list):
                footer_func = [footer_func]*len(tseries)

            if footer_label is None: footer_label = ['']
            else: footer_label = [footer_label]

            footer_data = []
            has_missing = dates.has_missing_dates()

            for i, ser in enumerate(tseries):
                if footer_func[i] is None:
                    footer_data.append('')
                else:
                    if has_missing: _input = ser[dates]
                    else:           _input = ser.series
                    footer_data.append(fmt_func[i](footer_func[i](_input)))

            rows.append(footer_label + footer_data)
        else:
            has_footer=False


        def rowWrapper(row):
            newRows = [wrap_func[i](item).split('\n') for i, item in enumerate(row)]
            return [[(substr or '') for substr in item] for item in map(None, *newRows)]
        # break each logical row into one or more physical ones
        logicalRows = [rowWrapper(row) for row in rows]
        numLogicalRows = len(logicalRows)
        # columns of physical rows
        columns = map(None,*reduce(operator.add,logicalRows))
        numCols = len(columns)
        colNums = list(range(numCols))

        # get the maximum of each column by the string length of its items
        maxWidths = [max(col_width[i], *[len(str(item)) for item in column])
                        for i, column in enumerate(columns)]

        def getSeparator(char, separate):
            if char is not None and separate:
                return char * (len(prefix) + len(postfix) + sum(maxWidths) + \
                                             len(delim)*(len(maxWidths)-1))
            else:
                return None

        header_separator = getSeparator(header_char, has_header)
        footer_separator = getSeparator(footer_char, has_footer)
        row_separator = getSeparator(row_char, True)

        # select the appropriate justify method
        justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust,
                          'none':(lambda text, width: text)}

        if has_header and has_footer:
            data_start = 1
            data_end = numLogicalRows-3
        elif has_header:
            data_start = 1
            data_end = numLogicalRows-2
        elif has_footer:
            data_start = 0
            data_end = numLogicalRows-3
        else:
            data_start = 0
            data_end = numLogicalRows-2

        for rowNum, physicalRows in enumerate(logicalRows):

            if rowNum == 0 and header_separator:
                _justify = header_justify
            else:
                _justify = justify

            def apply_justify(colNum, item, width):
                jfunc_key = str(_justify[colNum]).lower()
                jfunc = justify_funcs[jfunc_key]
                return jfunc(str(item), width)

            for row in physicalRows:

                output.write(
                    prefix + \
                    delim.join([
                        apply_justify(cn, item, width) \
                        for (cn, item, width) in zip(colNums, row, maxWidths)
                    ]) + \
                    postfix + nls)

            if row_separator and (data_start <= rowNum <= data_end):
                output.write(row_separator + nls)
            elif header_separator and rowNum < data_start:
                output.write(header_separator + nls)
            elif footer_separator and rowNum == data_end + 1:
                output.write(footer_separator + nls)
Ejemplo n.º 48
0
def getAccessStats(fromFunc=False):
	companyId = session.get("companyId")

	condFilter = [1==1]
	if request.form.get('startDate'):
		condFilter.append(LogAccess.rdate > request.form['startDate'])

	if companyId:
		condFilter.append(User.companyId==companyId)

	db_result = db_session.query(func.date(LogAccess.rdate).label("rdate"), func.count(), func.count(distinct(LogAccess.userId)))\
				.group_by( func.date(LogAccess.rdate) )\
				.filter(*condFilter)\
				.filter(func.date(LogAccess.rdate) <= request.form['endDate'])\
				.all()

	db_dates = list()
	db_pv = list()
	db_uv = list()
	for row in db_result:
		db_dates.append(row[0])
		db_pv.append(row[1])
		db_uv.append(row[2])

	stats = list()
	dateAry = ts.date_array(db_dates, freq='D')
	timeSrz1 = ts.time_series(db_pv, dateAry)
	timeSrz2 = ts.time_series(db_uv, dateAry)

	fillVals1 = timeSrz1.fill_missing_dates(fill_value=0)
	fillVals2 = timeSrz2.fill_missing_dates(fill_value=0)

	fillVals1 = fillVals1.filled(0)
	fillVals2 = fillVals2.filled(0)

	if request.form.get('startDate'):
		startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d')
	else :
		startDate = '2015-10-27'

	endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d')
	fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D')

	firstDayIdx = 0
	for day in fillDateAry :
		if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] :
			stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "pv" : 0, "uv" : 0 })
			firstDayIdx += 1
			continue
		else :
			break

	for idx, pv in enumerate(fillVals1):
		uv = fillVals2[idx]
		stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "pv" : pv, "uv" : uv })

	statsLen = len(stats)
	for i in range(len(fillDateAry) - len(stats)) :
		stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "pv" : 0, "uv" : 0 })		
		
	if fromFunc==True : 
		return stats
	else :
		return jsonify({"stats" : stats})
Ejemplo n.º 49
0
Archivo: Excel.py Proyecto: exedre/e4t
 def _get_tseriesA(freq,date_values,kw):
     v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ]
     D = [ ts.Date(freq=str(freq),value=_v) for _v in v]
     date_array = ts.date_array(D)
     return date_array
Ejemplo n.º 50
0
garch11:
[ 1.01258264  0.24149155  0.50479994]
-2056.3877404
R include_constant=False
Final Estimate:
 LLH:  2056.397    norm LLH:  2.056397
    omega    alpha1     beta1
1.0123560 0.2409589 0.5049154
'''

erro, ho, etaxo = generate_gjrgarch(20,
                                    ar,
                                    ma,
                                    mu=0.04,
                                    scale=0.01,
                                    varinnovation=np.ones(20))

if 'sp500' in examples:
    import tabular as tb
    import scikits.timeseries as ts

    a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv')

    s = ts.time_series(a[0]['Close'][::-1],
                       dates=ts.date_array(a[0]['Date'][::-1], freq="D"))

    sp500 = a[0]['Close'][::-1]
    sp500r = np.diff(np.log(sp500))

#plt.show()
Ejemplo n.º 51
0
* not too bad for a first try

Created on Sat Jan 30 08:33:11 2010
Author: josef-pktd
"""
from statsmodels.compat.python import lrange
import numpy as np
import scikits.timeseries as ts
import la
import pandas
import tabular as tb
from finance import msft, ibm  # hack to make it run as standalone

s = ts.time_series([1,2,3,4,5],
            dates=ts.date_array(["2001-01","2001-01",
            "2001-02","2001-03","2001-03"],freq="M"))

print('\nUsing la')
dta = la.larry(s.data, label=[lrange(len(s.data))])
dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
s2u = ts.remove_duplicated_dates(s2)
print(repr(s))
print(dat)
print(repr(s2))
print(repr(s2u))

print('\nUsing pandas')
pdta = pandas.DataFrame(s.data, np.arange(len(s.data)), [1])
pa = pdta.groupby(dict(zip(np.arange(len(s.data)),
            s.dates.tolist()))).aggregate(np.mean)