Beispiel #1
0
    def test_between_time(self, inclusive_endpoints_fixture, frame_or_series):
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        ts = tm.get_obj(ts, frame_or_series)

        stime = time(0, 0)
        etime = time(1, 0)
        inclusive = inclusive_endpoints_fixture

        filtered = ts.between_time(stime, etime, inclusive=inclusive)
        exp_len = 13 * 4 + 1

        if inclusive in ["right", "neither"]:
            exp_len -= 5
        if inclusive in ["left", "neither"]:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inclusive in ["left", "both"]:
                assert t >= stime
            else:
                assert t > stime

            if inclusive in ["right", "both"]:
                assert t <= etime
            else:
                assert t < etime

        result = ts.between_time("00:00", "01:00")
        expected = ts.between_time(stime, etime)
        tm.assert_equal(result, expected)

        # across midnight
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        ts = tm.get_obj(ts, frame_or_series)
        stime = time(22, 0)
        etime = time(9, 0)

        filtered = ts.between_time(stime, etime, inclusive=inclusive)
        exp_len = (12 * 11 + 1) * 4 + 1
        if inclusive in ["right", "neither"]:
            exp_len -= 4
        if inclusive in ["left", "neither"]:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inclusive in ["left", "both"]:
                assert (t >= stime) or (t <= etime)
            else:
                assert (t > stime) or (t <= etime)

            if inclusive in ["right", "both"]:
                assert (t <= etime) or (t >= stime)
            else:
                assert (t < etime) or (t >= stime)
Beispiel #2
0
def update():
    global itemK,itemBi,itemLine,itemZhongshu
    dataToNowDf = DataFrame(index=dataToNow.Times,data = dataToNow.Data[0],columns=['price'])
    dataToNowDf = dataToNowDf.between_time('9:30','11:30').append(dataToNowDf.between_time('13:00','15:00'))
    a = dataToNowDf.resample('30T',how = {'price':'ohlc'},label='right').dropna()
    for i in a.iterrows():
        data.Times.append(i[0].to_datetime())
        data.Data[0].append(i[1]['price']['open'])
        data.Data[1].append(i[1]['price']['high'])
        data.Data[2].append(i[1]['price']['low'])
        data.Data[3].append(i[1]['price']['close'])
        data.Data[4].append(0)
    quotes = []
    for i in range(len(data.Times)):
        quotes.append([i, data.Data[0][i], data.Data[3][i],
                       data.Data[2][i], data.Data[1][i]])
     
    chan = Chan(data.Data[0], data.Data[1], data.Data[2],
                data.Data[3], data.Data[4], data.Times)
    chan.barsMerge()
    chan.findFenxing()
    chan.findBi()
    chan.findLines()
    chan.findZhongshus()
    chan.calculate_ta()
    a += 1
    itemK.set_data(quotes)
#    itemBi.set_data(chan.bis)
#    itemLine.set_data(chan.lines)
#    itemZhongshu.set_data(chan.zhongshus)
    app.processEvents()  ## force complete redraw for every plot
Beispiel #3
0
    def test_between_time_frame(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue(t >= stime)
                else:
                    self.assertTrue(t > stime)

                if inc_end:
                    self.assertTrue(t <= etime)
                else:
                    self.assertTrue(t < etime)

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_frame_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue((t >= stime) or (t <= etime))
                else:
                    self.assertTrue((t > stime) or (t <= etime))

                if inc_end:
                    self.assertTrue((t <= etime) or (t >= stime))
                else:
                    self.assertTrue((t < etime) or (t >= stime))
Beispiel #4
0
    def test_between_time_frame(self):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = 13 * 4 + 1
            if not inc_start:
                exp_len -= 5
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue(t >= stime)
                else:
                    self.assertTrue(t > stime)

                if inc_end:
                    self.assertTrue(t <= etime)
                else:
                    self.assertTrue(t < etime)

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_frame_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        close_open = product([True, False], [True, False])
        for inc_start, inc_end in close_open:
            filtered = ts.between_time(stime, etime, inc_start, inc_end)
            exp_len = (12 * 11 + 1) * 4 + 1
            if not inc_start:
                exp_len -= 4
            if not inc_end:
                exp_len -= 4

            self.assertEqual(len(filtered), exp_len)
            for rs in filtered.index:
                t = rs.time()
                if inc_start:
                    self.assertTrue((t >= stime) or (t <= etime))
                else:
                    self.assertTrue((t > stime) or (t <= etime))

                if inc_end:
                    self.assertTrue((t <= etime) or (t >= stime))
                else:
                    self.assertTrue((t < etime) or (t >= stime))
Beispiel #5
0
    def test_between_time_raises(self, frame_or_series):
        # GH#20725
        obj = DataFrame([[1, 2, 3], [4, 5, 6]])
        obj = tm.get_obj(obj, frame_or_series)

        msg = "Index must be DatetimeIndex"
        with pytest.raises(TypeError,
                           match=msg):  # index is not a DatetimeIndex
            obj.between_time(start_time="00:00", end_time="12:00")
Beispiel #6
0
    def test_between_time_types(self, frame_or_series):
        # GH11818
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        obj = DataFrame({"A": 0}, index=rng)
        obj = tm.get_obj(obj, frame_or_series)

        msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
        with pytest.raises(ValueError, match=msg):
            obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
Beispiel #7
0
    def test_between_time(self, close_open_fixture):
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)
        inc_start, inc_end = close_open_fixture

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = 13 * 4 + 1
        if not inc_start:
            exp_len -= 5
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert t >= stime
            else:
                assert t > stime

            if inc_end:
                assert t <= etime
            else:
                assert t < etime

        result = ts.between_time('00:00', '01:00')
        expected = ts.between_time(stime, etime)
        assert_frame_equal(result, expected)

        # across midnight
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = (12 * 11 + 1) * 4 + 1
        if not inc_start:
            exp_len -= 4
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert (t >= stime) or (t <= etime)
            else:
                assert (t > stime) or (t <= etime)

            if inc_end:
                assert (t <= etime) or (t >= stime)
            else:
                assert (t < etime) or (t >= stime)
    def test_between_time(self, close_open_fixture):
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)
        inc_start, inc_end = close_open_fixture

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = 13 * 4 + 1
        if not inc_start:
            exp_len -= 5
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert t >= stime
            else:
                assert t > stime

            if inc_end:
                assert t <= etime
            else:
                assert t < etime

        result = ts.between_time("00:00", "01:00")
        expected = ts.between_time(stime, etime)
        tm.assert_frame_equal(result, expected)

        # across midnight
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(22, 0)
        etime = time(9, 0)

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = (12 * 11 + 1) * 4 + 1
        if not inc_start:
            exp_len -= 4
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert (t >= stime) or (t <= etime)
            else:
                assert (t > stime) or (t <= etime)

            if inc_end:
                assert (t <= etime) or (t >= stime)
            else:
                assert (t < etime) or (t >= stime)
Beispiel #9
0
    def test_between_time_incorr_arg_inclusive(self):
        # GH40245
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        stime = time(0, 0)
        etime = time(1, 0)
        inclusive = "bad_string"
        msg = "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
        with pytest.raises(ValueError, match=msg):
            ts.between_time(stime, etime, inclusive=inclusive)
Beispiel #10
0
    def test_between_time_incompatiable_args_given(self, include_start, include_end):
        # GH40245
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        stime = time(0, 0)
        etime = time(1, 0)
        msg = (
            "Deprecated arguments `include_start` and `include_end` cannot be "
            "passed if `inclusive` has been given."
        )
        with pytest.raises(ValueError, match=msg):
            ts.between_time(stime, etime, include_start, include_end, inclusive="left")
    def test_between_time_types(self):
        # GH11818
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
        with pytest.raises(ValueError, match=msg):
            rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

        frame = DataFrame({"A": 0}, index=rng)
        with pytest.raises(ValueError, match=msg):
            frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

        series = Series(0, index=rng)
        with pytest.raises(ValueError, match=msg):
            series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
Beispiel #12
0
    def test_between_time_types(self):
        # GH11818
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        with pytest.raises(ValueError):
            rng.indexer_between_time(datetime(2010, 1, 2, 1),
                                     datetime(2010, 1, 2, 5))

        frame = DataFrame({'A': 0}, index=rng)
        with pytest.raises(ValueError):
            frame.between_time(datetime(2010, 1, 2, 1),
                               datetime(2010, 1, 2, 5))

        series = Series(0, index=rng)
        with pytest.raises(ValueError):
            series.between_time(datetime(2010, 1, 2, 1),
                                datetime(2010, 1, 2, 5))
Beispiel #13
0
    def test_all2(self):

        # more sophisticated comparison of integer vs.
        # time-based windowing
        df = DataFrame({'B': np.arange(50)},
                       index=pd.date_range('20130101',
                                           periods=50, freq='H')
                       )
        # in-range data
        dft = df.between_time("09:00", "16:00")

        r = dft.rolling(window='5H')

        for f in ['sum', 'mean', 'count', 'median', 'std',
                  'var', 'kurt', 'skew', 'min', 'max']:

            result = getattr(r, f)()

            # we need to roll the days separately
            # to compare with a time-based roll
            # finally groupby-apply will return a multi-index
            # so we need to drop the day
            def agg_by_day(x):
                x = x.between_time("09:00", "16:00")
                return getattr(x.rolling(5, min_periods=1), f)()
            expected = df.groupby(df.index.day).apply(
                agg_by_day).reset_index(level=0, drop=True)

            tm.assert_frame_equal(result, expected)
    def test_between_time_axis(self, axis):
        # issue 8839
        rng = date_range("1/1/2000", periods=100, freq="10min")
        ts = DataFrame(np.random.randn(len(rng), len(rng)))
        stime, etime = ("08:00:00", "09:00:00")
        exp_len = 7

        if axis in ["index", 0]:
            ts.index = rng
            assert len(ts.between_time(stime, etime)) == exp_len
            assert len(ts.between_time(stime, etime, axis=0)) == exp_len

        if axis in ["columns", 1]:
            ts.columns = rng
            selected = ts.between_time(stime, etime, axis=1).columns
            assert len(selected) == exp_len
def make_timeslot_oneday(data: pd.DataFrame,
                         sta_order_start=1,
                         sta_order_end=23,
                         slot_length=15) -> pd.DataFrame:
    start = pd.Timedelta(minutes=0)
    maxtime = pd.Timedelta(days=1)
    date_day = data.iloc[0]['sum_time']
    date_day = str(date_day)[:11]
    oneday = pd.DataFrame()
    while start < maxtime:
        start_str = str(start)[-8:]
        start = start + pd.Timedelta(minutes=slot_length)
        end_str = str(start)[-8:]
        oneslot = data.between_time(start_str, end_str, include_end=False)
        sta_sum = dict()
        sta_count = dict()
        for row in oneslot.iterrows():
            so = '%02d' % (int(row[1]['sta_order']))
            if so in sta_sum.keys():
                sta_sum[so] += row[1]['pf']
                sta_count[so] += 1
            else:
                sta_sum[so] = row[1]['pf']
                sta_count[so] = 1
        #for i in sta_sum.keys():
        #    sta_sum[i]=round(sta_sum[i]*1.0/sta_count[i],2)
        if len(sta_sum.keys()) > 0:
            sta_sum['start_time'] = date_day + start_str
            oneday = oneday.append(sta_sum, ignore_index=True)
        print('\r\twork done:', date_day + start_str, end='')
    start_time = oneday.pop('start_time')
    oneday.insert(0, 'start_time', start_time)
    return oneday
    def test_all2(self, arithmetic_win_operators):
        f = arithmetic_win_operators
        # more sophisticated comparison of integer vs.
        # time-based windowing
        df = DataFrame({"B": np.arange(50)},
                       index=date_range("20130101", periods=50, freq="H"))
        # in-range data
        dft = df.between_time("09:00", "16:00")

        r = dft.rolling(window="5H")

        result = getattr(r, f)()

        # we need to roll the days separately
        # to compare with a time-based roll
        # finally groupby-apply will return a multi-index
        # so we need to drop the day
        def agg_by_day(x):
            x = x.between_time("09:00", "16:00")
            return getattr(x.rolling(5, min_periods=1), f)()

        expected = (df.groupby(df.index.day).apply(agg_by_day).reset_index(
            level=0, drop=True))

        tm.assert_frame_equal(result, expected)
Beispiel #17
0
    def test_between_time_axis(self, axis):
        # issue 8839
        rng = date_range('1/1/2000', periods=100, freq='10min')
        ts = DataFrame(np.random.randn(len(rng), len(rng)))
        stime, etime = ('08:00:00', '09:00:00')
        exp_len = 7

        if axis in ['index', 0]:
            ts.index = rng
            assert len(ts.between_time(stime, etime)) == exp_len
            assert len(ts.between_time(stime, etime, axis=0)) == exp_len

        if axis in ['columns', 1]:
            ts.columns = rng
            selected = ts.between_time(stime, etime, axis=1).columns
            assert len(selected) == exp_len
Beispiel #18
0
    def test_between_time_axis(self, axis):
        # issue 8839
        rng = date_range('1/1/2000', periods=100, freq='10min')
        ts = DataFrame(np.random.randn(len(rng), len(rng)))
        stime, etime = ('08:00:00', '09:00:00')
        exp_len = 7

        if axis in ['index', 0]:
            ts.index = rng
            assert len(ts.between_time(stime, etime)) == exp_len
            assert len(ts.between_time(stime, etime, axis=0)) == exp_len

        if axis in ['columns', 1]:
            ts.columns = rng
            selected = ts.between_time(stime, etime, axis=1).columns
            assert len(selected) == exp_len
Beispiel #19
0
def make_timeslot_oneday(data:pd.DataFrame,slot_length=15)->pd.DataFrame:
    start=pd.Timedelta(minutes=0)
    maxtime=pd.Timedelta(days=1)
    date_day=data.iloc[0]['arrival']
    date_day=str(date_day)[:11]#stringize and slice 0:11#获得日期信息
    #start=str(start).replace('0 days ','')
    oneday=pd.DataFrame()
    while start<maxtime:
        start_str=str(start)[-8:]#从00:00:00开始 00:00:00结束
        start=start+pd.Timedelta(minutes=slot_length)#一个时隙时间长度
        end_str=str(start)[-8:]
        #obtain one slot of data
        oneslot=data.between_time(start_str,end_str,include_end=False)
        sta_sum=dict()
        sta_count=dict()
        for row in oneslot.iterrows():
            so='%02d' %(int(row[1]['sta_order']))
            if so in sta_sum.keys():
                sta_sum[so]+=row[1]['sta_time']
                sta_count[so]+=1
            else:
                sta_sum[so]=row[1]['sta_time']
                sta_count[so]=1
        for i in sta_sum.keys():#用平均法计算时隙内站点时间,round 控制小数位数
            sta_sum[i]=round(sta_sum[i]*1.0/sta_count[i],2)
        if len(sta_sum.keys())>0:#该时隙没有key代表该时隙为空
            sta_sum['start_time']=date_day+start_str#添加时隙标签
            oneday=oneday.append(sta_sum,ignore_index=True)
        print('\twork done:',date_day+start_str,'\r',end='')
    #print(' ')
    return oneday
Beispiel #20
0
    def test_between_time_types(self):
        # GH11818
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        with pytest.raises(ValueError):
            rng.indexer_between_time(datetime(2010, 1, 2, 1),
                                     datetime(2010, 1, 2, 5))

        frame = DataFrame({'A': 0}, index=rng)
        with pytest.raises(ValueError):
            frame.between_time(datetime(2010, 1, 2, 1),
                               datetime(2010, 1, 2, 5))

        series = Series(0, index=rng)
        with pytest.raises(ValueError):
            series.between_time(datetime(2010, 1, 2, 1),
                                datetime(2010, 1, 2, 5))
Beispiel #21
0
    def test_between_time_types(self):
        # GH11818
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        msg = (r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\]"
               " to a time")
        with pytest.raises(ValueError, match=msg):
            rng.indexer_between_time(datetime(2010, 1, 2, 1),
                                     datetime(2010, 1, 2, 5))

        frame = DataFrame({'A': 0}, index=rng)
        with pytest.raises(ValueError, match=msg):
            frame.between_time(datetime(2010, 1, 2, 1),
                               datetime(2010, 1, 2, 5))

        series = Series(0, index=rng)
        with pytest.raises(ValueError, match=msg):
            series.between_time(datetime(2010, 1, 2, 1),
                                datetime(2010, 1, 2, 5))
Beispiel #22
0
def make_mask(sc: Scenario, df: pd.DataFrame):
    mask = pd.Series(np.zeros(df.values.shape[0], dtype=np.bool_),
                     index=df.index)

    for mask_rule in sc.home_mask_rules:
        if mask_rule.get('type') == 'time':
            begin = mask_rule.get('begin', '23:55')
            end = mask_rule.get('end', '00:00')
            mask[df.between_time(begin, end).index] = True

    return mask.values
Beispiel #23
0
    def test_between_time_datetimeindex(self):
        index = date_range("2012-01-01", "2012-01-05", freq="30min")
        df = DataFrame(np.random.randn(len(index), 5), index=index)
        bkey = slice(time(13, 0, 0), time(14, 0, 0))
        binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]

        result = df.between_time(bkey.start, bkey.stop)
        expected = df.loc[bkey]
        expected2 = df.iloc[binds]
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(result, expected2)
        assert len(result) == 12
Beispiel #24
0
    def test_between_time_formats(self):
        # GH11818
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"),
                   ("0200am", "0230am"), ("2:00:00", "2:30:00"),
                   ("020000", "023000"), ("2:00:00am", "2:30:00am"),
                   ("020000am", "023000am")]
        expected_length = 28

        for time_string in strings:
            assert len(ts.between_time(*time_string)) == expected_length
Beispiel #25
0
    def _get_power_spent_by_type(power_data: pd.DataFrame, times_peak_types: pd.DataFrame):
        power_spent_by_type = {}
        for index, period in times_peak_types.iterrows():
            power_spent_data = power_data.between_time(period[TariffPeriods.COLUMN_NAME_START_PERIOD],
                                                  period[TariffPeriods.COLUMN_NAME_END_PERIOD])
            if power_spent_data.shape[0] > 0:
                power_spent = power_spent_data.sum().values[0] / (4 * 1000)
                peak_type = period[TariffPeriods.COLUMN_NAME_TARIFF_TYPE]
                if peak_type not in power_spent_by_type:
                    power_spent_by_type[peak_type] = 0
                power_spent_by_type[peak_type] += power_spent

        return power_spent_by_type
Beispiel #26
0
    def test_between_time_formats(self):
        # GH11818
        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"),
                   ("0200am", "0230am"), ("2:00:00", "2:30:00"),
                   ("020000", "023000"), ("2:00:00am", "2:30:00am"),
                   ("020000am", "023000am")]
        expected_length = 28

        for time_string in strings:
            assert len(ts.between_time(*time_string)) == expected_length
Beispiel #27
0
    def test_between_time_same_functionality_old_and_new_args(self):
        # GH40245
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        stime = time(0, 0)
        etime = time(1, 0)
        match = ("`include_start` and `include_end` "
                 "are deprecated in favour of `inclusive`.")

        result = ts.between_time(stime, etime)
        expected = ts.between_time(stime, etime, inclusive="both")
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, match=match):
            result = ts.between_time(stime, etime, include_start=False)
        expected = ts.between_time(stime, etime, inclusive="right")
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, match=match):
            result = ts.between_time(stime, etime, include_end=False)
        expected = ts.between_time(stime, etime, inclusive="left")
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, match=match):
            result = ts.between_time(stime,
                                     etime,
                                     include_start=False,
                                     include_end=False)
        expected = ts.between_time(stime, etime, inclusive="neither")
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning, match=match):
            result = ts.between_time(stime,
                                     etime,
                                     include_start=True,
                                     include_end=True)
        expected = ts.between_time(stime, etime, inclusive="both")
        tm.assert_frame_equal(result, expected)
Beispiel #28
0
    def test_between_time_warn(self, include_start, include_end,
                               frame_or_series):
        # GH40245
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        ts = tm.get_obj(ts, frame_or_series)

        stime = time(0, 0)
        etime = time(1, 0)

        match = ("`include_start` and `include_end` "
                 "are deprecated in favour of `inclusive`.")
        with tm.assert_produces_warning(FutureWarning, match=match):
            _ = ts.between_time(stime, etime, include_start, include_end)
def prepare_dataset(data: pd.DataFrame, slot_length=15) -> pd.DataFrame:
    data.set_index(['start_time'], drop=False, inplace=True)
    data.drop(columns=['01', '02', '03'],
              inplace=True)  #cause o1-03 has been droped in tt data
    data = data.between_time('05:15:00', '23:00:00')
    #fill up possible timeslot loss
    d181201 = pd.to_datetime('2018-12-01 05:15:00')
    d181230 = pd.to_datetime('2018-12-30 23:00:00')
    d190111 = pd.to_datetime('2019-01-11 05:15:00')
    d190131 = pd.to_datetime('2019-01-31 23:00:00')
    d190301 = pd.to_datetime('2019-03-01 05:15:00')
    d190320 = pd.to_datetime('2019-03-20 23:00:00')
    index_data = data.loc[:, 'start_time']
    daysta = d181201
    while daysta <= d181230:
        time = daysta
        dayend = str(daysta).replace('05:15:00', '23:00:00')
        dayend = pd.to_datetime(dayend)
        daysta = daysta + pd.Timedelta(days=1)
        while time <= dayend:
            if time not in index_data:
                data = data.append({'start_time': time}, ignore_index=True)
                #print(time)
            time += pd.Timedelta(minutes=slot_length)
    daysta = d190111
    while daysta <= d190131:
        time = daysta
        dayend = str(daysta).replace('05:15:00', '23:00:00')
        dayend = pd.to_datetime(dayend)
        daysta = daysta + pd.Timedelta(days=1)
        while time <= dayend:
            if time not in index_data:
                data = data.append({'start_time': time}, ignore_index=True)
                #print(time)
            time += pd.Timedelta(minutes=slot_length)
    daysta = d190301
    while daysta <= d190320:
        time = daysta
        dayend = str(daysta).replace('05:15:00', '23:00:00')
        dayend = pd.to_datetime(dayend)
        daysta = daysta + pd.Timedelta(days=1)
        while time <= dayend:
            if time not in index_data:
                data = data.append({'start_time': time}, ignore_index=True)
                #print(time)
            time += pd.Timedelta(minutes=slot_length)
    data.sort_values(['start_time'], inplace=True, ignore_index=True)
    data.fillna(value=0, inplace=True)
    data.to_csv('pf_dataset.csv', index=False)
    return data
Beispiel #30
0
def select_hour(d: pd.DataFrame) -> DataFrame:
    """"""
    """    ["t_date_hour"], ["rtn"]    """
    print(d.head(2))
    d = d.between_time(datetime.time(15, 00, 00),
                       datetime.time(15, 00, 00),
                       include_start=True,
                       include_end=True)
    print(d.head())
    # d.reindex(d.index.date.tolist(), inplace=True)
    d.index = d.index.date
    print(d.head())
    """    ["t_date"], ["rtn"]    """
    return d
Beispiel #31
0
    def test_between_time_formats(self):
        # GH11818
        _skip_if_has_locale()

        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"),
                   ("0200am", "0230am"), ("2:00:00", "2:30:00"),
                   ("020000", "023000"), ("2:00:00am", "2:30:00am"),
                   ("020000am", "023000am")]
        expected_length = 28

        for time_string in strings:
            self.assertEqual(len(ts.between_time(*time_string)),
                             expected_length, "%s - %s" % time_string)
Beispiel #32
0
def clean_non_trading_times(df: pd.DataFrame) -> pd.DataFrame:
    """
    :param df: Data with weekends and holidays
    :return trading_data:
    """
    # Weekends go out
    df = df[df['DateTime'].dt.weekday < 5].reset_index(drop=True)
    df = df.set_index('DateTime')
    # Remove non trading hours
    df = df.between_time('9:00', '16:00')
    df.reset_index(inplace=True)
    # Holiday days we want to delete from data_slice
    holidays = Calendar().holidays(start='2000-01-01', end='2020-12-31')
    m = df['DateTime'].isin(holidays)
    clean_df = df[~m].copy()
    trading_data = clean_df.fillna(method='ffill')
    return trading_data
Beispiel #33
0
    def test_between_time_formats(self):
        # GH11818
        _skip_if_has_locale()

        rng = date_range('1/1/2000', '1/5/2000', freq='5min')
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

        strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"),
                   ("0200am", "0230am"), ("2:00:00", "2:30:00"),
                   ("020000", "023000"), ("2:00:00am", "2:30:00am"),
                   ("020000am", "023000am")]
        expected_length = 28

        for time_string in strings:
            self.assertEqual(len(ts.between_time(*time_string)),
                             expected_length,
                             "%s - %s" % time_string)
Beispiel #34
0
    def test_between_time_formats(self, frame_or_series):
        # GH#11818
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        ts = tm.get_obj(ts, frame_or_series)

        strings = [
            ("2:00", "2:30"),
            ("0200", "0230"),
            ("2:00am", "2:30am"),
            ("0200am", "0230am"),
            ("2:00:00", "2:30:00"),
            ("020000", "023000"),
            ("2:00:00am", "2:30:00am"),
            ("020000am", "023000am"),
        ]
        expected_length = 28

        for time_string in strings:
            assert len(ts.between_time(*time_string)) == expected_length
def remove_faulty_data(df: pd.DataFrame, start: str, end: str,
                       filename: str) -> pd.DataFrame:
    """Removes faulty data at a known time interval specified through a
    starting time and ending time. The input time values can either be in a
    known string format or as datetime.time() objects."""

    if (type(start) == str):
        start = to_datetime(start)
    if (type(end) == str):
        end = to_datetime(end)

    # Remove faulty data from df:
    # (Using end time before start time excludes the interval in-between)
    df_filtered = df.between_time(start_time=end,
                                  end_time=start,
                                  include_start=False,
                                  include_end=False)
    # Number of deleted rows due to faulty data:
    n_deleted_rows = df.shape[0] - df_filtered.shape[0]
    print(f"{n_deleted_rows} rows of faulty data between {start} and {end} "
          f"succesfully removed from '{filename}'.")
    return df_filtered
    def test_between_time_axis_raises(self, axis):
        # issue 8839
        rng = date_range("1/1/2000", periods=100, freq="10min")
        mask = np.arange(0, len(rng))
        rand_data = np.random.randn(len(rng), len(rng))
        ts = DataFrame(rand_data, index=rng, columns=rng)
        stime, etime = ("08:00:00", "09:00:00")

        msg = "Index must be DatetimeIndex"
        if axis in ["columns", 1]:
            ts.index = mask
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime)
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime, axis=0)

        if axis in ["index", 0]:
            ts.columns = mask
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime, axis=1)
Beispiel #37
0
    def test_between_time_axis_raises(self, axis):
        # issue 8839
        rng = date_range('1/1/2000', periods=100, freq='10min')
        mask = np.arange(0, len(rng))
        rand_data = np.random.randn(len(rng), len(rng))
        ts = DataFrame(rand_data, index=rng, columns=rng)
        stime, etime = ('08:00:00', '09:00:00')

        msg = "Index must be DatetimeIndex"
        if axis in ['columns', 1]:
            ts.index = mask
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime)
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime, axis=0)

        if axis in ['index', 0]:
            ts.columns = mask
            with pytest.raises(TypeError, match=msg):
                ts.between_time(stime, etime, axis=1)
Beispiel #38
0
    def get_mask(self, df: pd.DataFrame):
        mask = HomeMask.empty(df)

        mask[df.between_time(self.begin, self.end).index] = True

        return mask