Exemple #1
0
    def test_resample_loffset(self):
        rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
        s = Series(np.random.randn(14), index=rng)

        result = s.resample('5min',
                            how='mean',
                            closed='right',
                            label='right',
                            loffset=timedelta(minutes=1))
        idx = date_range('1/1/2000', periods=4, freq='5min')
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                          index=idx + timedelta(minutes=1))
        assert_series_equal(result, expected)

        expected = s.resample('5min',
                              how='mean',
                              closed='right',
                              label='right',
                              loffset='1min')
        assert_series_equal(result, expected)

        expected = s.resample('5min',
                              how='mean',
                              closed='right',
                              label='right',
                              loffset=Minute(1))
        assert_series_equal(result, expected)

        self.assert_(result.index.freq == Minute(5))

        # from daily
        dti = DatetimeIndex(start=datetime(2005, 1, 1),
                            end=datetime(2005, 1, 10),
                            freq='D')
        ser = Series(np.random.rand(len(dti)), dti)

        # to weekly
        result = ser.resample('w-sun', how='last')
        expected = ser.resample('w-sun', how='last', loffset=-bday)
        self.assertEqual(result.index[0] - bday, expected.index[0])
    def test_custom_grouper(self):

        dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
                            end=datetime(2005, 1, 10))

        s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')

        b = TimeGrouper(Minute(5))
        g = s.groupby(b)

        # check all cython functions work
        funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
        for f in funcs:
            g._cython_agg_general(f)

        b = TimeGrouper(Minute(5), closed='right', label='right')
        g = s.groupby(b)
        # check all cython functions work
        funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
        for f in funcs:
            g._cython_agg_general(f)

        self.assertEqual(g.ngroups, 2593)
        self.assertTrue(notnull(g.mean()).all())

        # construct expected val
        arr = [1] + [5] * 2592
        idx = dti[0:-1:5]
        idx = idx.append(dti[-1:])
        expect = Series(arr, index=idx)

        # GH2763 - return in put dtype if we can
        result = g.agg(np.sum)
        assert_series_equal(result, expect)

        df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype='float64')
        r = df.groupby(b).agg(np.sum)

        self.assertEqual(len(r.columns), 10)
        self.assertEqual(len(r.index), 2593)
def test_custom_grouper(index):

    dti = index
    s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')

    b = TimeGrouper(Minute(5))
    g = s.groupby(b)

    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    b = TimeGrouper(Minute(5), closed='right', label='right')
    g = s.groupby(b)
    # check all cython functions work
    funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
    for f in funcs:
        g._cython_agg_general(f)

    assert g.ngroups == 2593
    assert notna(g.mean()).all()

    # construct expected val
    arr = [1] + [5] * 2592
    idx = dti[0:-1:5]
    idx = idx.append(dti[-1:])
    expect = Series(arr, index=idx)

    # GH2763 - return in put dtype if we can
    result = g.agg(np.sum)
    assert_series_equal(result, expect)

    df = DataFrame(np.random.rand(len(dti), 10),
                   index=dti, dtype='float64')
    r = df.groupby(b).agg(np.sum)

    assert len(r.columns) == 10
    assert len(r.index) == 2593
Exemple #4
0
def test_custom_grouper(index):

    dti = index
    s = Series(np.array([1] * len(dti)), index=dti, dtype="int64")

    b = Grouper(freq=Minute(5))
    g = s.groupby(b)

    # check all cython functions work
    funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    b = Grouper(freq=Minute(5), closed="right", label="right")
    g = s.groupby(b)
    # check all cython functions work
    funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    assert g.ngroups == 2593
    assert notna(g.mean()).all()

    # construct expected val
    arr = [1] + [5] * 2592
    idx = dti[0:-1:5]
    idx = idx.append(dti[-1:])
    expect = Series(arr, index=idx)

    # GH2763 - return in put dtype if we can
    result = g.agg(np.sum)
    tm.assert_series_equal(result, expect)

    df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64")
    r = df.groupby(b).agg(np.sum)

    assert len(r.columns) == 10
    assert len(r.index) == 2593
Exemple #5
0
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'):
    # creates a time series of flux data
    # returns time series object of flux
    # bin_width is time bin size in seconds, area is area of detector in square meters

    # read in data from threshold file
    names = ['id', 'jul', 'RE', 'FE', 'FLUX']
    skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh')
    df = pd.read_csv(from_dir + thresh_file + '.thresh',
                     skiprows=skiprows,
                     names=names,
                     delim_whitespace=True)

    # sort by date/times instead of julian days
    df['date/times'] = df['jul'] + df['RE']
    df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times']))
    df.index = df['date/times']

    # create time series, sample according to bin_width
    # calculate bins in pandas notation
    bins = str(int(bin_width / 60)) + 'T'
    flux_ts = pd.Series(data=df['FLUX'], index=df.index)
    flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area))
    flux_ts.name = 'FLUX'

    # determine offset (basically the bin centers) and add to the index
    start = df['RE'][0] - 0.5
    offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600
    offset_minutes = (int(bin_width / 2) + int(start * 86400) -
                      offset_hours * 3600) // 60
    offset_seconds = int(bin_width / 2) + int(
        start * 86400) - offset_hours * 3600 - offset_minutes * 60
    offset = offset_hours * Hour() + offset_minutes * Minute(
    ) + offset_seconds * Second()
    flux_ts.index += offset

    # filter out unfilled bins
    for i in range(len(flux_ts)):
        if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0
                                               or flux_ts[i] == 0
                                               or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0):
            flux_ts[i] = 'nan'

    flux_ts = flux_ts.interpolate()

    return flux_ts
Exemple #6
0
def restart(hdfname, newstart):
    '''
    Updates STATE values in HSP2 HDF file to start at later newstart date from
    computed values.  User can extend timeseries by predictive or historic data
    to continue simulation. In this case, the user must set a new stop date!

    Parameters
    ----------
    hdfname : str
        HSP2 HDF5 file.
    newstart : str (in Datatime format for Timestamp)
        DateTime for restarting the simulation.

    Returns
    -------
    None.
    '''

    with HDFStore(hdfname) as store:
        df = store['CONTROL/OP_SEQUENCE']
        delt = df.loc[0, 'INDELT_minutes']

        df = store['CONTROL/GLOBAL']
        start = Timestamp(df.loc['Start', 'Info'])
        stop = Timestamp(df.loc['Stop', 'Info'])
        dates = date_range(start, stop, freq=Minute(delt))

        # deterime new start date for restart; previous date if not exact match
        startindx = dates.get_loc(newstart, method='pad')
        startdate = dates[startindx]

        df.loc['Start', 'Info'] = str(startdate)
        df.to_hdf(hdfname, 'CONTROL/GLOBAL', format='table', data_columns=True)

        for path in [p[1:] for p in store.keys() if p.startswith('/RESULTS')]:
            _, x, activity = path.split('/')
            operation, segment = x.split('_')
            if (operation, activity) not in states:
                continue

            df = store[path][states[operation, activity]]
            df = df.iloc[startindx, :].to_frame()
            df.columns = [segment]

            storepath = f'{operation}/{activity}/STATES'
            dff = store[storepath]
            dff.update(df.T)
            dff.to_hdf(store, storepath, format='table', data_columns=True)
    return
Exemple #7
0
def flightsUsed(data,WIFIAPTag_list,dateRange,time):
    data_res = []
    for i,wifi in enumerate(WIFIAPTag_list):
        wifiDf = pd.DataFrame([wifi],columns=['WIFIAPTag'])
        wifiDf['time'] = time
        temp =  data[data['WIFIAPTag']==wifi].copy()
        for win in dateRange:
            wifiDf[str(win)+"minutes"] = 0  
            if win<0:
                compare = time+30*win*Minute()
                temp_ = temp[temp['scheduled_flt_time']>=compare] 
                temp_ = temp_[temp_['scheduled_flt_time']<time] 
                count = len(temp_)
                wifiDf[str(win)+"minutes"] = count  
            else:
                compare = time+30*win*Minute()
                temp_ = temp[temp['scheduled_flt_time']>=time] 
                temp_ = temp_[temp_['scheduled_flt_time']<compare] 
                count = len(temp_)
                wifiDf[str(win)+"minutes"] = count  
        data_res.append(wifiDf.copy()) 
    res = pd.concat(data_res)    
    res = res.reset_index().drop(['index'],axis=1)      
    return res
Exemple #8
0
 def compute_activity_levels(self):
     if self.raw_data.index.freq != Minute():
         #FIXME: if freq | Minute(), we should resample.
         raise ValueError("Activity cut points haven't been validated for "
                          'epoch lengths other than 60s')
     activity_level = pd.Series(index=self.data.index)
     activity_level[:-1] = pd.cut(self.data.ix[:-1, 'Axis1'],
                                  count_bins,
                                  right=False,
                                  labels=activity_labels)
     activity_level[~boolify(self.data['awake'], True)] = 'sleep'
     # can't tell standing from sitting w/o activPAL
     activity_level[activity_level == 'standing'] = 'sedentary'
     sedentary = pd.Series(index=self.data.index)
     sedentary[:-1] = boolify(activity_level[:-1] == 'sedentary')
     return activity_level, sedentary
 def split_data_chunk(data_chunk: pd.DataFrame, data_period,
                      output_path: str):
     i = 0
     while i < len(data_period):
         start_time = data_period[i]
         end_time = start_time + Minute(15)
         try:
             data_part = data_chunk[start_time:end_time]
             part_name = str(end_time.date()) + '_' + str(
                 end_time.time()).replace(':', '-')[:-3]
             file_path = output_path + '\\' + part_name + '.csv'
             data_part.to_csv(file_path, index=False)
         except Exception as e:
             print(e)
         i += 1
     return True
Exemple #10
0
    def set_time_right(self, obj, offset_time=Minute(1)):
        ''' df索引,由"开始时间"转为"结束时间" '''
        assert (isinstance(obj, datetime.datetime)
                or isinstance(obj, pd.Timestamp)
                or isinstance(obj, pd.DataFrame)), 'obj类型错误'

        if (isinstance(obj, datetime.datetime)
                or isinstance(obj, pd.Timestamp)):
            ret = obj + offset_time
        else:
            index_name = obj.index.name
            obj.index = obj.parallel_apply(lambda o: o.name + offset_time,
                                           axis=1)
            obj.index.rename(index_name, inplace=True)
            ret = obj
        return ret
Exemple #11
0
    def test_union_not_cacheable(self, sort):
        rng = date_range("1/1/2000", periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_union = rng1.union(rng2, sort=sort)
        if sort is None:
            tm.assert_index_equal(the_union, rng)
        else:
            expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10]))
            tm.assert_index_equal(the_union, expected)

        rng1 = rng[10:]
        rng2 = rng[15:35]
        the_union = rng1.union(rng2, sort=sort)
        expected = rng[10:]
        tm.assert_index_equal(the_union, expected)
 def chunk_data(data: pd.DataFrame, data_period, cpus=os.cpu_count()):
     chunk_size, extra = divmod(len(data_period), cpus * 8)
     if extra:
         chunk_size += 1
     split_time_iter = iter(data_period)
     while 1:
         divide_time = tuple(islice(split_time_iter, chunk_size))
         if not divide_time:
             return
         i = 0
         j = -1
         start_time = divide_time[i]
         end_time = divide_time[j] + Minute(15)
         try:
             yield (data[start_time:end_time], divide_time)
         except Exception as e:
             print(e)
Exemple #13
0
    def test_intersection(self):
        rng = date_range("1/1/2000", periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_int = rng1.intersection(rng2)
        expected = rng[10:25]
        tm.assert_index_equal(the_int, expected)
        assert isinstance(the_int, DatetimeIndex)
        assert the_int.freq == rng.freq

        the_int = rng1.intersection(rng2.view(DatetimeIndex))
        tm.assert_index_equal(the_int, expected)

        # non-overlapping
        the_int = rng[:10].intersection(rng[10:])
        expected = DatetimeIndex([])
        tm.assert_index_equal(the_int, expected)
Exemple #14
0
def monthval(siminfo, monthly):
    ''' returns value at start of month for all times within the month'''
    start = siminfo['start']
    stop = siminfo['stop']
    freq = Minute(siminfo['delt'])

    months = tile(monthly, stop.year - start.year + 1).astype(float)
    dr = date_range(start=f'{start.year}-01-01',
                    end=f'{stop.year}-12-31',
                    freq='MS')
    ts = Series(months, index=dr).resample('D').ffill()

    if ts.index.freq > freq:  # upsample
        ts = ts.resample(freq).asfreq().ffill()
    elif ts.index.freq < freq:  # downsample
        ts = ts.resample(freq).mean()
    return ts.truncate(start, stop).to_numpy()
Exemple #15
0
    def test_intersection(self):
        rng = date_range('1/1/2000', periods=50, freq=Minute())
        rng1 = rng[10:]
        rng2 = rng[:25]
        the_int = rng1.intersection(rng2)
        expected = rng[10:25]
        self.assert_index_equal(the_int, expected)
        tm.assertIsInstance(the_int, DatetimeIndex)
        self.assertEqual(the_int.offset, rng.offset)

        the_int = rng1.intersection(rng2.view(DatetimeIndex))
        self.assert_index_equal(the_int, expected)

        # non-overlapping
        the_int = rng[:10].intersection(rng[10:])
        expected = DatetimeIndex([])
        self.assert_index_equal(the_int, expected)
Exemple #16
0
def dayval(siminfo, monthly):
    '''broadcasts HSPF monthly data onto timeseries at desired freq with HSPF
    interpolation to day, but constant within day'''
    start = siminfo['start']
    stop = siminfo['stop']
    freq = Minute(siminfo['delt'])

    months = tile(monthly, stop.year - start.year + 1).astype(float)
    dr = date_range(start=f'{start.year}-01-01',
                    end=f'{stop.year}-12-31',
                    freq='MS')
    ts = Series(months, index=dr).resample('D').interpolate('time')

    if ts.index.freq > freq:  # upsample
        ts = ts.resample(freq).ffill()
    elif ts.index.freq < freq:  # downsample
        ts = ts.resample(freq).mean()
    return ts.truncate(start, stop).to_numpy()
def data_preprocess(volume_file, volume_file_new, test_volume_file):

    volume = pd.read_csv(volume_file)
    volume_new = pd.read_csv(volume_file_new)
    test_volume = pd.read_csv(test_volume_file)

    time_window = pd.date_range(
        start=datetime(2016, 9, 19),
        end=datetime(2016, 10, 18),
        freq='20min',
        closed='left').map(
            lambda x: '[' + str(x) + ',' + str(x + Minute(20)) + ')')
    fill_null_dataframe = pd.DataFrame({
        'tollgate_id':
        len(time_window) * 2 * [1] + len(time_window) * [2] +
        len(time_window) * 2 * [3],
        'direction':
        len(time_window) * [0] + len(time_window) * [1] +
        len(time_window) * [0] + len(time_window) * [0] +
        len(time_window) * [1],
        'time_window':
        np.tile(time_window, 5)
    })
    volume = pd.merge(volume, fill_null_dataframe,
                      how='right').fillna(0).sort_values(
                          ['tollgate_id', 'direction',
                           'time_window'])  #use fill null values
    volume = pd.concat((volume, volume_new), ignore_index=True)
    volume['volume'] = volume['volume'].astype('float')
    volume['tollgate_id'] = volume['tollgate_id'].astype('int')
    volume['direction'] = volume['direction'].astype('int')
    volume.index = volume['time_window'].map(
        lambda x: parse(x.split(',')[0][1:]))
    volume['id'] = 'T' + volume.tollgate_id.map(
        str) + 'D' + volume.direction.map(str)

    test_volume['volume'] = test_volume['volume'].astype('float')
    test_volume['tollgate_id'] = test_volume['tollgate_id'].astype('int')
    test_volume['direction'] = test_volume['direction'].astype('int')
    test_volume.index = test_volume['time_window'].map(
        lambda x: parse(x.split(',')[0][1:]))
    test_volume['id'] = 'T' + test_volume.tollgate_id.map(
        str) + 'D' + test_volume.direction.map(str)
    return volume, test_volume
Exemple #18
0
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0):
    """计算下次更新时间
    说明:
        'S':移动到下一秒
        'm':移动到下一分钟
        'H':移动到下一小时
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    if freq == 'S':
        off = Second()
        return last_updated + off
    elif freq == 'm':
        off = Minute()
        return last_updated + off
    elif freq == 'H':
        off = Hour()
        return last_updated + off
    elif freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute, second=second)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(
            ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
Exemple #19
0
    def test_resample_ohlc(self):
        s = self.series

        grouper = TimeGrouper(Minute(5))
        expect = s.groupby(grouper).agg(lambda x: x[-1])
        result = s.resample('5Min', how='ohlc')

        self.assertEquals(len(result), len(expect))
        self.assertEquals(len(result.columns), 4)

        xs = result.irow(-2)
        self.assertEquals(xs['open'], s[-6])
        self.assertEquals(xs['high'], s[-6:-1].max())
        self.assertEquals(xs['low'], s[-6:-1].min())
        self.assertEquals(xs['close'], s[-2])

        xs = result.irow(0)
        self.assertEquals(xs['open'], s[0])
        self.assertEquals(xs['high'], s[:5].max())
        self.assertEquals(xs['low'], s[:5].min())
        self.assertEquals(xs['close'], s[4])
Exemple #20
0
    def test_resample_ohlc(self):
        s = self.series

        grouper = TimeGrouper(Minute(5), closed='right', label='right')
        expect = s.groupby(grouper).agg(lambda x: x[-1])
        result = s.resample('5Min', how='ohlc')

        self.assertEquals(len(result), len(expect))
        self.assertEquals(len(result.columns), 4)

        xs = result.irow(-1)
        self.assertEquals(xs['open'], s[-5])
        self.assertEquals(xs['high'], s[-5:].max())
        self.assertEquals(xs['low'], s[-5:].min())
        self.assertEquals(xs['close'], s[-1])

        xs = result.irow(1)
        self.assertEquals(xs['open'], s[1])
        self.assertEquals(xs['high'], s[1:6].max())
        self.assertEquals(xs['low'], s[1:6].min())
        self.assertEquals(xs['close'], s[5])
Exemple #21
0
    def test_resample_basic(self):
        rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min',
                         name='index')
        s = Series(np.random.randn(14), index=rng)
        result = s.resample('5min', how='mean', closed='right', label='right')
        expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                          index=date_range('1/1/2000', periods=4, freq='5min'))
        assert_series_equal(result, expected)
        self.assertEqual(result.index.name, 'index')

        result = s.resample('5min', how='mean', closed='left', label='right')
        expected = Series([s[:5].mean(), s[5:10].mean(), s[10:].mean()],
                          index=date_range('1/1/2000 00:05', periods=3,
                                           freq='5min'))
        assert_series_equal(result, expected)

        s = self.series
        result = s.resample('5Min', how='last')
        grouper = TimeGrouper(Minute(5), closed='left', label='left')
        expect = s.groupby(grouper).agg(lambda x: x[-1])
        assert_series_equal(result, expect)
def test_resample_ohlc(series):
    s = series

    grouper = TimeGrouper(Minute(5))
    expect = s.groupby(grouper).agg(lambda x: x[-1])
    result = s.resample('5Min').ohlc()

    assert len(result) == len(expect)
    assert len(result.columns) == 4

    xs = result.iloc[-2]
    assert xs['open'] == s[-6]
    assert xs['high'] == s[-6:-1].max()
    assert xs['low'] == s[-6:-1].min()
    assert xs['close'] == s[-2]

    xs = result.iloc[0]
    assert xs['open'] == s[0]
    assert xs['high'] == s[:5].max()
    assert xs['low'] == s[:5].min()
    assert xs['close'] == s[4]
Exemple #23
0
def flightsCount(data,WIFIAPTag_list,dateRange,time,spaceData):
    data_res = []
    for i,wifi in enumerate(WIFIAPTag_list):
        wifiDf = pd.DataFrame([wifi],columns=['WIFIAPTag'])
        wifiDf['time'] = time
        temp =  data[data['WIFIAPTag']==wifi].copy()
        wifiDf[str(dateRange)+"minutes_"] = 0  
        compare = time+30*dateRange*Minute()
        temp_ = temp[temp['scheduled_flt_time']>=time] 
        temp_ = temp_[temp_['scheduled_flt_time']<compare] 
        temp_['diffUsed'] = [ np.round((x-time).seconds/1800) for x in temp_['time']]
        temp_ = pd.merge(temp_,spaceTimeCount,on=['flight_ID'],how='left').fillna(0)
        temp_ = pd.merge(temp_,spaceData,on=['diffUsed'],how='left')
        
        count = np.sum(temp_['sum_Times']*temp_['passenger_ID'])
        
        wifiDf[str(dateRange)+"minutes_"] = count  
        data_res.append(wifiDf.copy()) 
    res = pd.concat(data_res)    
    res = res.reset_index().drop(['index'],axis=1)     
    return res
Exemple #24
0
def test_resample_ohlc(series):
    s = series

    grouper = Grouper(freq=Minute(5))
    expect = s.groupby(grouper).agg(lambda x: x[-1])
    result = s.resample("5Min").ohlc()

    assert len(result) == len(expect)
    assert len(result.columns) == 4

    xs = result.iloc[-2]
    assert xs["open"] == s[-6]
    assert xs["high"] == s[-6:-1].max()
    assert xs["low"] == s[-6:-1].min()
    assert xs["close"] == s[-2]

    xs = result.iloc[0]
    assert xs["open"] == s[0]
    assert xs["high"] == s[:5].max()
    assert xs["low"] == s[:5].min()
    assert xs["close"] == s[4]
def slide7():
    from pandas.tseries.offsets import Hour, Minute
    hour = Hour()
    print hour
    four_hours = Hour(4)
    print four_hours
    print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')

    print Hour(2) + Minute(30)
    print pd.date_range('1/1/2000', periods=10, freq='1h30min')

    ts = Series(np.random.randn(4),
                index=pd.date_range('1/1/2000', periods=4, freq='M'))
    print ts
    print ts.shift(2)
    print ts.shift(-2)
    print '2 M'
    print ts.shift(2, freq='M')
    print '3 D'
    print ts.shift(3, freq='D')
    print '1 3D'
    print ts.shift(1, freq='3D')
    print '1 90T'
    print ts.shift(1, freq='90T')

    print 'shifting dates with offsets'
    from pandas.tseries.offsets import Day, MonthEnd
    now = datetime(2011, 11, 17)
    print now + 3 * Day()
    print now + MonthEnd()
    print now + MonthEnd(2)

    offset = MonthEnd()
    print offset
    print offset.rollforward(now)
    print offset.rollback(now)

    ts = Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))
    print ts.groupby(offset.rollforward).mean()
Exemple #26
0
    def create_position_info(self):
        """
        Create daily profit loss using stock data not using statement data
        :return: dict
        """
        last_close = self.stocks.last().close
        if self.date == self.position_set.stop_date and self.position_set.status == 'CLOSE':
            last_close = self.close_order.net_price

        stage = self.position_set.get_stage(last_close)
        status = self.position_set.current_status(
            new_price=last_close, old_price=self.stocks.reverse()[1].close
        )

        pl_open = (last_close - self.open_order.net_price) * self.open_order.quantity
        pl_open_pct = round(pl_open / (self.open_order.net_price * self.open_order.quantity) * 100, 2)

        if self.date == self.start_date:
            pl_day = (last_close - self.open_order.net_price) * self.open_order.quantity
        elif self.date == self.stop_date:
            pl_day = (self.close_order.net_price - self.stocks.reverse()[1].close) * self.open_order.quantity
        else:
            pl_day = (last_close - self.stocks.reverse()[1].close) * self.open_order.quantity
        pl_day_pct = round(pl_day / (self.open_order.net_price * self.open_order.quantity) * 100, 2)

        return dict(
            stage_id=stage.id,
            stage=stage.stage_name,
            status=status,
            pl_open=round(pl_open, 2),
            pl_open_pct=pl_open_pct,
            pl_day=round(pl_day, 2),
            pl_day_pct=pl_day_pct,
            enter_price=self.open_order.net_price,
            exit_price=self.close_order.net_price if self.close_order else 0.0,
            quantity=self.open_order.quantity,
            holding=self.open_order.net_price * self.open_order.quantity,
            bp_effect=self.position_instruments.last().bp_effect,
            date=(self.date + Hour(17) + Minute(30)).to_datetime().date(),
        )
Exemple #27
0
def sarimax_predict():

    model, prediction = {}, {}
    data = ['T1D0']  #, 'T1D1', 'T2D0', 'T3D0', 'T3D1'
    train_data = pd.read_csv('sarimax_data.csv', index_col=0)
    in_model_pkl = 'SARIMAX_6_0_1_1_0_1_72_%s.pkl'
    in_model_path = '../../data/data_after_process/tmp_file'

    for i in data:
        model[i] = joblib.load(path.join(in_model_path, in_model_pkl % (i)))
        # print results[td].summary()
        print i + ' model start predicting!'
        prediction[i] = model[i].predict(0, len(train_data) - 1)
        prediction[i] = prediction[i].map(lambda x: np.round(np.exp(x) - 1, 2))

    answer = pd.DataFrame(prediction)['2016-10-25':]
    answer = pd.concat([
        answer.between_time('17:00', '18:40'),
        answer.between_time('8:00', '9:40')
    ]).sort_index()
    answer['time_window'] = answer.index.map(
        lambda x: '[' + str(x) + ',' + str(x + Minute(20)) + ')')
    answer = pd.melt(answer,
                     var_name='tollgate_id',
                     value_name='volume',
                     id_vars=['time_window'])

    answer['direction'] = answer['tollgate_id'].map(lambda d: int(d[3]))
    answer['tollgate_id'] = answer['tollgate_id'].map(lambda d: int(d[1]))
    answer = answer[['tollgate_id', 'time_window', 'direction', 'volume']]

    # import time
    # version = time.strftime('%Y-%m-%d_%R', time.localtime(time.time()))
    # answer.to_csv('answer/prediction_'+version+'.csv',float_format='%.2f',header=True,index=False,encoding='utf-8')
    answer.to_csv('../../answer/prediction_sarimax.csv',
                  float_format='%.2f',
                  header=True,
                  index=False,
                  encoding='utf-8')
Exemple #28
0
 def get_data_once(self, stock_code, period, end_time=None, count=1):
     ''' 获取单个股票的历史数据,限制长度5000条记录 '''
     if end_time is None:
         end_time = datetime.datetime.now()
     if not (isinstance(count, int) and 0 < count <= 5000):
         count = 5000
     if not self.is_auth():
         self.connect_server()
     df = jqdatasdk.get_bars(
         security=stock_code,
         count=count,
         unit=period,
         fields=['date', 'open', 'high', 'low', 'close'],
         include_now=False,
         end_dt=end_time,
         fq_ref_date=None,
         df=True)
     if df.empty:
         raise ValueError(f'未能获取数据; {stock_code}, {period}')
     index_name = 'date'
     df.set_index(index_name, inplace=True)
     self.set_time_left(df, Minute(1))
     return df
def test_resample_loffset(loffset):
    # GH 7687
    rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
    s = Series(np.random.randn(14), index=rng)

    result = s.resample('5min', closed='right', label='right',
                        loffset=loffset).mean()
    idx = date_range('1/1/2000', periods=4, freq='5min')
    expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
                      index=idx + timedelta(minutes=1))
    assert_series_equal(result, expected)
    assert result.index.freq == Minute(5)

    # from daily
    dti = date_range(start=datetime(2005, 1, 1),
                     end=datetime(2005, 1, 10), freq='D')
    ser = Series(np.random.rand(len(dti)), dti)

    # to weekly
    result = ser.resample('w-sun').last()
    business_day_offset = BDay()
    expected = ser.resample('w-sun', loffset=-business_day_offset).last()
    assert result.index[0] - business_day_offset == expected.index[0]
Exemple #30
0
#----------------------------------------------------------------------
# Offset names ("time rules") and related functions


from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli,
                                    Week, Micro, MonthEnd, MonthBegin,
                                    BMonthBegin, BMonthEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd, QuarterBegin,
                                    QuarterEnd, BQuarterBegin, BQuarterEnd)

_offset_map = {
    'D'     : Day(),
    'B'     : BDay(),
    'H'     : Hour(),
    'T'     : Minute(),
    'S'     : Second(),
    'L'     : Milli(),
    'U'     : Micro(),
    None    : None,

    # Monthly - Calendar
    'M'      : MonthEnd(),
    'MS'     : MonthBegin(),

    # Monthly - Business
    'BM'     : BMonthEnd(),
    'BMS'    : BMonthBegin(),

    # Annual - Calendar
    'A-JAN' : YearEnd(month=1),