コード例 #1
0
    def _get_slice_index(self, start=None, end=None, periods=None, **kwargs):
        """
        Time Array
        """
        if not periods:
            periods = None

        if self.normalize:
            start = rollback_minute(start)
            end = rollforword_minute(end)

        freq = round(self.window * (1 - self.overlap))
        freq = str(freq) + 's'

        # TODO: Here we use datetime array to get every
        # start points and end points of windows during whole timeline
        dtarr = DatetimeArray._generate_range(
            start=start, end=end, periods=periods, freq=freq,
            **kwargs)  # generate datetime array
        print(dtarr)
        dtarr_start = dtarr[:-1]
        dtarr_end = dtarr_start + Second(self.window)

        if self.closed == 'right':
            dtarr_start = dtarr_start - Second(1)

        if self.closed == 'left':
            dtarr_end = dtarr_end - Second(1)

        _index = range(len(dtarr_start))
        dt_index = list(map(lambda i: (dtarr_start[i], dtarr_end[i]), _index))
        return dt_index
コード例 #2
0
ファイル: test_ticks.py プロジェクト: ukarroum/pandas
def test_Second():
    assert_offset_equal(Second(), datetime(2010, 1, 1),
                        datetime(2010, 1, 1, 0, 0, 1))
    assert_offset_equal(Second(-1), datetime(2010, 1, 1, 0, 0, 1),
                        datetime(2010, 1, 1))
    assert_offset_equal(2 * Second(), datetime(2010, 1, 1),
                        datetime(2010, 1, 1, 0, 0, 2))
    assert_offset_equal(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1),
                        datetime(2010, 1, 1))

    assert Second(3) + Second(2) == Second(5)
    assert Second(3) - Second(2) == Second()
コード例 #3
0
 def oringin3(data,sta_num,delta_sec,num):
     print(delta_sec)
     from pandas.tseries.offsets import Second
     su=square.find_info1(num,sta_num)
     orin_dict={'RECDATETIME':(pd.to_datetime(data['RECDATETIME'])-delta_sec*Second()).values,'ISARRLFT':100000,'PRODUCTID':data['PRODUCTID'],'STATIONSEQNUM':data['STATIONSEQNUM'],'PACKCODE':data['PACKCODE'],'GPSSPEED':data['GPSSPEED'],'ROUTEID':data['ROUTEID'],'LONGITUDE':su[0],'LATITUDE':su[1],'STATIONNUM':su[3],'STAORDER':su[2]}
     orin=pd.DataFrame(orin_dict,index=['100000'])
     return orin
コード例 #4
0
def create_flux_ts(thresh_file, bin_width, area):
    # start by loading threshold data

    bins = str(int(bin_width / 60)) + 'T'

    names = ['id', 'jul', 'RE', 'FE', 'timeOverThresh']
    skiprows = f.linesToSkip('data/thresh/' + thresh_file + '.thresh')
    df = pd.read_csv('data/thresh/' + thresh_file + '.thresh',
                     skiprows=skiprows,
                     names=names,
                     delim_whitespace=True)

    df['date/times'] = df['jul'] + df['RE']
    start = df['RE'][0] - 0.5
    df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times']))
    df.index = df['date/times']

    flux_ts = pd.Series(data=df['timeOverThresh'], index=df.index)

    flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area))

    offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600
    offset_minutes = (int(bin_width / 2) + int(start * 86400) -
                      offset_hours * 3600) // 60
    offset_seconds = int(bin_width / 2) + int(
        start * 86400) - offset_hours * 3600 - offset_minutes * 60
    offset = offset_hours * Hour() + offset_minutes * Minute(
    ) + offset_seconds * Second()

    flux_ts.index += offset
コード例 #5
0
def _safe_write_csv(df, file_name):
    """Write DataFrame to CSV file in standard format"""
    der = os.path.dirname(file_name)
    if der:
        # this illogically logical try-except block brought to you by:
        #    http://stackoverflow.com/a/14364249
        try:
            os.makedirs(der)
        except OSError:
            if not os.path.isdir(der):
                raise

    # express timestamps as string to achieve consistent formatting
    df_freq = to_offset(df.index.inferred_freq)
    if df_freq is None:  #include 2 decimal places of subseconds
        str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S.%f')[:22]
    elif df_freq < Second():  #only 1 decimal place of subseconds
        str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S.%f')[:21]
    else:
        str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S')

    df.index.name = 'TIMESTAMP'  # <-- BIG HAMMER SOLUTION
    df = df.reset_index()
    df['TIMESTAMP'] = df['TIMESTAMP'].apply(str_fmt)
    df.set_index('TIMESTAMP', inplace=True)
    df.to_csv(
        file_name,
        na_rep='NAN',
        quoting=QUOTE_NONE,  # since treating all values as strings be
        # explicit about no quoting
        quotechar="'",  # specify alternate quote to avoid triggering
        # QUOTE_NONE/escapechar errors when writing
        # fields with double-quotes (CompileResults and
        # CardStatus columns)
        index_label='TIMESTAMP')
コード例 #6
0
 def auto(self):
     ts = self.get_series()
     self._period = ts.index[1] - ts.index[0]
     freq = Second(self._period.total_seconds())
     self._order = self.select_order()
     self._model = ARIMA(self.get_series(), order=self._order,
                         freq=freq).fit()
コード例 #7
0
ファイル: test_timedelta_range.py プロジェクト: zkw03/pandas
    def test_timedelta_range(self):

        expected = to_timedelta(np.arange(5), unit='D')
        result = timedelta_range('0 days', periods=5, freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(11), unit='D')
        result = timedelta_range('0 days', '10 days', freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
        result = timedelta_range('1 days, 00:00:02',
                                 '5 days, 00:00:02',
                                 freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2)
        result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(50), unit='T') * 30
        result = timedelta_range('0 days', freq='30T', periods=50)
        tm.assert_index_equal(result, expected)

        # GH 11776
        arr = np.arange(10).reshape(2, 5)
        df = pd.DataFrame(np.arange(10).reshape(2, 5))
        for arg in (arr, df):
            with tm.assert_raises_regex(TypeError, "1-d array"):
                to_timedelta(arg)
            for errors in ['ignore', 'raise', 'coerce']:
                with tm.assert_raises_regex(TypeError, "1-d array"):
                    to_timedelta(arg, errors=errors)

        # issue10583
        df = pd.DataFrame(np.random.normal(size=(10, 4)))
        df.index = pd.timedelta_range(start='0s', periods=10, freq='s')
        expected = df.loc[pd.Timedelta('0s'):, :]
        result = df.loc['0s':, :]
        tm.assert_frame_equal(expected, result)

        with pytest.raises(ValueError):
            # GH 22274: CalendarDay is a relative time measurement
            timedelta_range('1day', freq='CD', periods=2)
コード例 #8
0
ファイル: gm3HelpBylw.py プロジェクト: linshilogin/quant_sdk
def getHQData_Fade(symbolist,
                   sDateTime,
                   eDateTim,
                   fre='60s',
                   fields_='symbol,eob,open,high,low,close'):
    # dateList = commonHelpBylw.splitDates(sDateTime, eDateTim)
    # dfData = pd.DataFrame()
    #
    # for symbol_ in symbolist:
    #     for sDtime_, eDtime_ in dateList:
    #         tempHQdata = history(symbol=symbol_,frequency=fre,start_time=sDtime_,end_time=eDtime_,fields=fields_,df=True)
    #
    #         dfData=dfData.append(tempHQdata)
    #
    # return dfData

    #上面由于splitDates无法对于分钟线日期准确的拆分。所以上面这种方式取数据不太对
    #下面用新逻辑。
    # 先取全部数据,然后查看刚出来数据的最后一个日期,取他的下一个秒时间。一直循环到最后取不出来数据为止。
    # sDtime_=sDateTime
    # eDtime_=eDateTim

    if fre == 'tick':
        dateName = 'created_at'
    else:
        dateName = 'eob'

    dfData = pd.DataFrame()
    for symbol_ in symbolist:
        sDtime_ = sDateTime
        eDtime_ = eDateTim
        tempHQdata = history(symbol=symbol_,
                             frequency=fre,
                             start_time=sDtime_,
                             end_time=eDtime_,
                             fields=fields_,
                             df=True)

        while not tempHQdata.empty:
            tempHQdata = tempHQdata.sort_values(dateName)
            dfData = dfData.append(tempHQdata)
            latestDateTime = tempHQdata[dateName].iloc[-1]

            nextDT = latestDateTime + Second()
            sDtime_ = nextDT.strftime('%Y-%m-%d %H:%M:%S')
            if sDtime_ <= eDtime_:
                tempHQdata = history(symbol=symbol_,
                                     frequency=fre,
                                     start_time=sDtime_,
                                     end_time=eDtime_,
                                     fields=fields_,
                                     df=True)
            else:
                #即下一个初始时间大于了最终结束时间,说明数据已经取完了。
                break
    return dfData
コード例 #9
0
    def test_timedelta_range(self):

        expected = to_timedelta(np.arange(5), unit='D')
        result = timedelta_range('0 days', periods=5, freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(11), unit='D')
        result = timedelta_range('0 days', '10 days', freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
        result = timedelta_range('1 days, 00:00:02',
                                 '5 days, 00:00:02',
                                 freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2)
        result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D')
        tm.assert_index_equal(result, expected)
コード例 #10
0
    def test_timedelta_range(self):

        expected = to_timedelta(np.arange(5), unit="D")
        result = timedelta_range("0 days", periods=5, freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(11), unit="D")
        result = timedelta_range("0 days", "10 days", freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day()
        result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2)
        result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(50), unit="T") * 30
        result = timedelta_range("0 days", freq="30T", periods=50)
        tm.assert_index_equal(result, expected)
コード例 #11
0
    def test_timedelta_range(self):

        expected = to_timedelta(np.arange(5), unit="D")
        result = timedelta_range("0 days", periods=5, freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(11), unit="D")
        result = timedelta_range("0 days", "10 days", freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day()
        result = timedelta_range("1 days, 00:00:02",
                                 "5 days, 00:00:02",
                                 freq="D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2)
        result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D")
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(50), unit="T") * 30
        result = timedelta_range("0 days", freq="30T", periods=50)
        tm.assert_index_equal(result, expected)

        # GH 11776
        arr = np.arange(10).reshape(2, 5)
        df = pd.DataFrame(np.arange(10).reshape(2, 5))
        for arg in (arr, df):
            with pytest.raises(TypeError, match="1-d array"):
                to_timedelta(arg)
            for errors in ["ignore", "raise", "coerce"]:
                with pytest.raises(TypeError, match="1-d array"):
                    to_timedelta(arg, errors=errors)

        # issue10583
        df = pd.DataFrame(np.random.normal(size=(10, 4)))
        df.index = pd.timedelta_range(start="0s", periods=10, freq="s")
        expected = df.loc[pd.Timedelta("0s"):, :]
        result = df.loc["0s":, :]
        tm.assert_frame_equal(expected, result)
コード例 #12
0
    def test_resolution(self):
        def _assert_less(ts1, ts2):
            val1 = self.dtc.convert(ts1, None, None)
            val2 = self.dtc.convert(ts2, None, None)
            if not val1 < val2:
                raise AssertionError(f"{val1} is not less than {val2}.")

        # Matplotlib's time representation using floats cannot distinguish
        # intervals smaller than ~10 microsecond in the common range of years.
        ts = Timestamp("2012-1-1")
        _assert_less(ts, ts + Second())
        _assert_less(ts, ts + Milli())
        _assert_less(ts, ts + Micro(50))
コード例 #13
0
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'):
    # creates a time series of flux data
    # returns time series object of flux
    # bin_width is time bin size in seconds, area is area of detector in square meters

    # read in data from threshold file
    names = ['id', 'jul', 'RE', 'FE', 'FLUX']
    skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh')
    df = pd.read_csv(from_dir + thresh_file + '.thresh',
                     skiprows=skiprows,
                     names=names,
                     delim_whitespace=True)

    # sort by date/times instead of julian days
    df['date/times'] = df['jul'] + df['RE']
    df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times']))
    df.index = df['date/times']

    # create time series, sample according to bin_width
    # calculate bins in pandas notation
    bins = str(int(bin_width / 60)) + 'T'
    flux_ts = pd.Series(data=df['FLUX'], index=df.index)
    flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area))
    flux_ts.name = 'FLUX'

    # determine offset (basically the bin centers) and add to the index
    start = df['RE'][0] - 0.5
    offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600
    offset_minutes = (int(bin_width / 2) + int(start * 86400) -
                      offset_hours * 3600) // 60
    offset_seconds = int(bin_width / 2) + int(
        start * 86400) - offset_hours * 3600 - offset_minutes * 60
    offset = offset_hours * Hour() + offset_minutes * Minute(
    ) + offset_seconds * Second()
    flux_ts.index += offset

    # filter out unfilled bins
    for i in range(len(flux_ts)):
        if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0
                                               or flux_ts[i] == 0
                                               or flux_ts[i + 1] == 0):
            flux_ts[i] = 'nan'
        if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0):
            flux_ts[i] = 'nan'

    flux_ts = flux_ts.interpolate()

    return flux_ts
コード例 #14
0
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0):
    """计算下次更新时间
    说明:
        'S':移动到下一秒
        'm':移动到下一分钟
        'H':移动到下一小时
        'D':移动到下一天
        'W':移动到下周一
        'M':移动到下月第一天
        'Q':下一季度的第一天
        将时间调整到指定的hour和minute
    """
    if pd.isnull(last_updated):
        return MARKET_START
    if freq == 'S':
        off = Second()
        return last_updated + off
    elif freq == 'm':
        off = Minute()
        return last_updated + off
    elif freq == 'H':
        off = Hour()
        return last_updated + off
    elif freq == 'D':
        d = BDay(n=1, normalize=True)
        res = last_updated + d
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'W':
        w = Week(normalize=True, weekday=0)
        res = last_updated + w
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'M':
        m = MonthBegin(n=1, normalize=True)
        res = last_updated + m
        return res.replace(hour=hour, minute=minute, second=second)
    elif freq == 'Q':
        q = QuarterBegin(normalize=True, startingMonth=1)
        res = last_updated + q
        return res.replace(hour=hour, minute=minute, second=second)
    else:
        raise TypeError('不能识别的周期类型,仅接受{}'.format(
            ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
コード例 #15
0
 def split_count(data,num,second_sigma=600,code_sigma=10):
     '''找出一片区域内的固定班次数据'''
     from pandas.tseries.offsets import Second
     b=square.sort_time(square.choose_route(data,num))        
     a=b['PRODUCTID'].unique()
     for i in a:
         m=square.choose_bus(b,i)
         m1=copy.copy(m.iloc[0:1,:])
         flag=1
         m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False)
         length=m.iloc[:,0].size
         for j in range(length-1):
             b1=copy.copy(pd.to_datetime(m.iloc[j,0]))
             b2=copy.copy(pd.to_datetime(m.iloc[j+1,0]))
             if (b1+second_sigma*Second()>=b2): 
                 m1=copy.copy(m.iloc[j+1:j+2,:])
                 m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False)
             else:
                 
                 flag+=1
                 m1=copy.copy(m.iloc[j+1:j+2,:])
                 m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False)
コード例 #16
0
def resample(df, sampling_period=1):
    """Resample the data

    Warning: does not handle missing values

    Parameters
    ----------
    df: pandas.DataFrame,
        index: pandas.DatetimeIndex
        values: power measured

    sampling_period: float of int, optional
        Elapsed time between two measures in second

    Returns
    -------
    df: pandas.DataFrame,
        index: pandas.DatetimeIndex with sampling_period seconds between
            two timestapms
        values: power measured
    """
    assert isinstance(df, pd.DataFrame)
    assert isinstance(df.index, pd.DatetimeIndex)

    if isinstance(sampling_period, int):
        df = df.resample(Second(sampling_period),
                         how='last',
                         label='right',
                         closed='right')
    else:
        period = sampling_period * (10**6)
        df = df.resample(Micro(period),
                         how='last',
                         label='right',
                         closed='right')
    return df
コード例 #17
0
# 1) timedelta를 사용한 날짜 연산
from datetime import timedelta

d1 + 100                          # 날짜 + 숫자 연산 불가
d1 + timedelta(days = 100)        # 100일 뒤

# 2) offset으로 사용한 날짜 연산
import pandas.tseries.offsets
dir(pandas.tseries.offsets)

from pandas.tseries.offsets import Day, Hour, Second

Day(5)      # 5일
Hour(5)     # 5시간
Second(5)   # 5초 

d1 + Day(100)

# 9.5 날짜 인덱스 생성 및 색인
# pd.date_range : 연속적 날짜 출력
pd.date_range(start,             # 시작 날짜
              end,               # 끝 날짜
              periods,           # 기간 (출력 개수)
              freq)              # 날짜 빈도 (매월, 매주 ...)

pd.date_range(start = '2020/01/01', end = '2020/01/31')    # 기본 freq = 'D'(일)
pd.date_range(start = '2020/01/01', periods = 100)     # 시작값으로부터 100일의 날짜

pd.date_range(start = '2020/01/01', end = '2020/01/31',
              freq = '7D')    # by값과 비슷
コード例 #18
0
ファイル: frequencies.py プロジェクト: warrenYin/pandas
#: cache of previously seen offsets
_offset_map: Dict[str, DateOffset] = {}


def get_period_alias(offset_str: str) -> Optional[str]:
    """
    Alias to closest period strings BQ->Q etc.
    """
    return _offset_to_period_map.get(offset_str, None)


_name_to_offset_map = {
    "days": Day(1),
    "hours": Hour(1),
    "minutes": Minute(1),
    "seconds": Second(1),
    "milliseconds": Milli(1),
    "microseconds": Micro(1),
    "nanoseconds": Nano(1),
}


def to_offset(freq) -> Optional[DateOffset]:
    """
    Return DateOffset object from string or tuple representation
    or datetime.timedelta object.

    Parameters
    ----------
    freq : str, tuple, datetime.timedelta, DateOffset or None
コード例 #19
0
ファイル: test_converter.py プロジェクト: tnir/pandas
class TestDateTimeConverter:
    @pytest.fixture
    def dtc(self):
        return converter.DatetimeConverter()

    def test_convert_accepts_unicode(self, dtc):
        r1 = dtc.convert("12:22", None, None)
        r2 = dtc.convert("12:22", None, None)
        assert r1 == r2, "DatetimeConverter.convert should accept unicode"

    def test_conversion(self, dtc):
        rs = dtc.convert(["2012-1-1"], None, None)[0]
        xp = dates.date2num(datetime(2012, 1, 1))
        assert rs == xp

        rs = dtc.convert("2012-1-1", None, None)
        assert rs == xp

        rs = dtc.convert(date(2012, 1, 1), None, None)
        assert rs == xp

        rs = dtc.convert("2012-1-1", None, None)
        assert rs == xp

        rs = dtc.convert(Timestamp("2012-1-1"), None, None)
        assert rs == xp

        # also testing datetime64 dtype (GH8614)
        rs = dtc.convert("2012-01-01", None, None)
        assert rs == xp

        rs = dtc.convert("2012-01-01 00:00:00+0000", None, None)
        assert rs == xp

        rs = dtc.convert(
            np.array(["2012-01-01 00:00:00+0000", "2012-01-02 00:00:00+0000"]),
            None,
            None,
        )
        assert rs[0] == xp

        # we have a tz-aware date (constructed to that when we turn to utc it
        # is the same as our sample)
        ts = Timestamp("2012-01-01").tz_localize("UTC").tz_convert(
            "US/Eastern")
        rs = dtc.convert(ts, None, None)
        assert rs == xp

        rs = dtc.convert(ts.to_pydatetime(), None, None)
        assert rs == xp

        rs = dtc.convert(Index([ts - Day(1), ts]), None, None)
        assert rs[1] == xp

        rs = dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), None, None)
        assert rs[1] == xp

    def test_conversion_float(self, dtc):
        rtol = 0.5 * 10**-9

        rs = dtc.convert(Timestamp("2012-1-1 01:02:03", tz="UTC"), None, None)
        xp = converter.dates.date2num(Timestamp("2012-1-1 01:02:03", tz="UTC"))
        tm.assert_almost_equal(rs, xp, rtol=rtol)

        rs = dtc.convert(Timestamp("2012-1-1 09:02:03", tz="Asia/Hong_Kong"),
                         None, None)
        tm.assert_almost_equal(rs, xp, rtol=rtol)

        rs = dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None)
        tm.assert_almost_equal(rs, xp, rtol=rtol)

    def test_conversion_outofbounds_datetime(self, dtc):
        # 2579
        values = [date(1677, 1, 1), date(1677, 1, 2)]
        rs = dtc.convert(values, None, None)
        xp = converter.dates.date2num(values)
        tm.assert_numpy_array_equal(rs, xp)
        rs = dtc.convert(values[0], None, None)
        xp = converter.dates.date2num(values[0])
        assert rs == xp

        values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)]
        rs = dtc.convert(values, None, None)
        xp = converter.dates.date2num(values)
        tm.assert_numpy_array_equal(rs, xp)
        rs = dtc.convert(values[0], None, None)
        xp = converter.dates.date2num(values[0])
        assert rs == xp

    @pytest.mark.parametrize(
        "time,format_expected",
        [
            (0, "00:00"),  # time2num(datetime.time.min)
            (86399.999999, "23:59:59.999999"),  # time2num(datetime.time.max)
            (90000, "01:00"),
            (3723, "01:02:03"),
            (39723.2, "11:02:03.200"),
        ],
    )
    def test_time_formatter(self, time, format_expected):
        # issue 18478
        result = converter.TimeFormatter(None)(time)
        assert result == format_expected

    @pytest.mark.parametrize("freq", ("B", "L", "S"))
    def test_dateindex_conversion(self, freq, dtc):
        rtol = 10**-9
        dateindex = tm.makeDateIndex(k=10, freq=freq)
        rs = dtc.convert(dateindex, None, None)
        xp = converter.dates.date2num(dateindex._mpl_repr())
        tm.assert_almost_equal(rs, xp, rtol=rtol)

    @pytest.mark.parametrize("offset", [Second(), Milli(), Micro(50)])
    def test_resolution(self, offset, dtc):
        # Matplotlib's time representation using floats cannot distinguish
        # intervals smaller than ~10 microsecond in the common range of years.
        ts1 = Timestamp("2012-1-1")
        ts2 = ts1 + offset
        val1 = dtc.convert(ts1, None, None)
        val2 = dtc.convert(ts2, None, None)
        if not val1 < val2:
            raise AssertionError(f"{val1} is not less than {val2}.")

    def test_convert_nested(self, dtc):
        inner = [Timestamp("2017-01-01"), Timestamp("2017-01-02")]
        data = [inner, inner]
        result = dtc.convert(data, None, None)
        expected = [dtc.convert(x, None, None) for x in data]
        assert (np.array(result) == expected).all()
コード例 #20
0
def timeAdd(x, offset):
    result = pd.to_datetime(x, format='%Y%m%d%H%M%S') + offset * Second()
    result = str(result).replace('-', '').replace(' ', '').replace(':', '')
    return int(result)
コード例 #21
0
def parse_arguments():
    usage = '%(prog)s [options] [path/to/subject/directory]'
    description = textwrap.dedent("""
    Process accelerometer data using Sojourns/SIP.

    Proceed in two steps: first, use input accelerometer data to estimate
    wear time and metabolic activity; second, generate many summary
    statistics from these estimates.

    There are two major ways to get input data to this program: you can give
    it the path to your subject's data and have it work out which files are
    where, or you can tell it precisely where to find each input file. You
    can also use the first method and then override specific defaults, if
    you prefer.

    The defaults have been chosen so that you can download your activity
    monitor data directly from the device to a subject's directory and run
    this program specifying only that directory.

    By default, this program searches for files named as in the examples
    below, except with less Victor Hugo. It will also find files directly in
    the subject directory, as well as files with names ending in "_QC.csv",
    which it will use preferentially to allow quality-controlling data
    without editing the original files.

    Don't store data from more than one subject in the same directory; if
    you do, this program will get confused and may mix subjects' data by
    accident!

    Input files:

    - ActiGraph data in 1-second epochs, as generated by the ActiLife
      software. This file must exist in order to complete the first step.

      Example file name: 24601/ActiGraph/JV_1secDataTable.csv

      Set this with `--ag-path FILENAME`.

    - activPAL data, as generated by the activPAL software. These consist of
      two files. If these files are found, use the SIP method in the first
      step; otherwise, use the original Sojourns method.

      Example file name: "24601/activPAL/JV Events.csv"
      Must also exist:   24601/activPAL/JV.def

      (You must quote this file name on the command line because it contains
      a space.)

      Note that these file names must have the *exact* same stem (here
      "JV"). The filenames generated by the activPAL software do this by
      default.

      Set this with `--ap-path FILENAME`.

    Intermediate files:

    - Awake ranges data, indicating when the subject was wearing the
      monitor(s). This file is generated by this program, but if a modified
      version already exists it will be used instead of estimating this
      information. This allows you to account for instances when the subject
      fell asleep while wearing the monitor, for instance.

      Example file name: "24601/ActiGraph/JV awake ranges.csv"

      (You must quote this file name on the command line because it contains
      a space.)

      You can edit this file in Excel, but if you do, you must take care to
      always delete cells rather than clearing their contents. Also, make
      sure to save as a CSV file.

      Set this with `--awake-path "FILENAME"`, or ignore an
      existing awake ranges file with `--ignore-awake-ranges`.

    - Sojourns/SIP annotated data, indicating bout boundaries and second-by-
      second estimated metabolic activity. This file is generated by this
      program, but if it already exists it will not be recomputed to save
      time. Editing this file by hand is not recommended.

      Example file name:
      24601/ActiGraph/JV_1secDataTable_with_activpal_sojourns.csv

      By default, this path will be the same as the ActiGraph data with
      "_sojourns" or "_with_activpal_sojourns" added before the ".csv",
      depending on whether activPAL data have been provided.

      Set this with `--soj-path FILENAME`.

    Output files:

    - Sojourns/SIP processed data, containing loads of summary measures
      generated from the metabolic estimates. See the README for a detailed
      description of the contents of this file.

      Example file name:
      24601/ActiGraph/JV_1secDataTable_with_activpal_sojourns_processed.csv

      This will always use the Sojourns/SIP file path with "_processed"
      added before the ".csv".

    Because many of the summary measures refer to times of day, it's
    important to provide the time zone in which the data were collected if
    it's different from the system time zone of the computer doing the
    processing. (Use the IANA time zone, like "America/Chicago", not the
    ambiguous abbreviation like "CST", which could mean Cuba Standard Time.)

    """)    # TODO: summary measures
    parser = argparse.ArgumentParser(
        usage=usage, description=description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('subjdir', type=pathlib.Path, nargs='?',
                        help='search for subject data in this directory')
    parser.add_argument('-s', '--subject', dest='subj',
                        help='embed this tag as the subject identifier into '
                             'the processed output; the default value is the '
                             'name of the subject directory ("24601" in the '
                             'examples)')
    parser.add_argument('--ag-path', type=pathlib.Path,
                        help='get ActiGraph 1secDataTable data from this file')
    parser.add_argument('--ap-path', type=pathlib.Path,
                        help='get activPAL Events data from this file')
    parser.add_argument('--soj-path', type=pathlib.Path,
                        help='write Sojourns/SIP estimated metabolic activity '
                             "to this file if it doesn't already exist; "
                             'otherwise, read previously computed metabolic '
                             'estimates from this file (to save time)')
    parser.add_argument('--awake-path', type=pathlib.Path,
                        help='read wear time intervals from this file if it '
                             'exists; otherwise, estimate wear time and write '
                             'the estimates to this file')
    parser.add_argument('--soj-intermediate-path', type=pathlib.Path,
                        help=argparse.SUPPRESS)
    parser.add_argument('--ignore-awake-ranges', action='store_true',
                        help='ignore an existing "awake ranges" file and '
                             'estimate wear time anyway')
    parser.add_argument('--tz',
                        help='interpret data as being collected in this time '
                             'zone instead of %r' %
                                 getattr(util.tz, 'zone', util.tz))
    args = parser.parse_args()
    if args.tz is not None:
        util.tz = args.tz
    if args.subjdir is not None:
        if not args.subj:
            args.subj = args.subjdir.resolve().parts[-1]
        if not args.ag_path:
            args.ag_path = util.ActiGraphDataTable.sniff(args.subjdir,
                                                         epoch=Second())
        if not args.ap_path:
            args.ap_path = util.ActivPALData.sniff(args.subjdir)
        if not args.soj_path:
            args.soj_path = util.SojournsData.sniff(args.subjdir)
        if not args.awake_path:
            args.awake_path = util.AwakeRanges.sniff(args.subjdir)
    if not args.ag_path and not args.soj_path:
        if args.subjdir is not None:
            if not args.subjdir.exists():
                raise IOError("can't find subject directory %r" %
                              str(args.subjdir))
            elif not args.subjdir.is_dir():
                raise IOError("subjdir %r isn't a directory" %
                              str(args.subjdir))
            raise IOError("can't find any data in subject directory %r" %
                          str(args.subjdir))
        parser.print_help()
        parser.exit()
    return args
コード例 #22
0
def run(edf):
    # preprocess
    ############################################################################

    print('==> preprocessing')

    for alias, name in settings.EMOJI_MAP.items():
        if name is None:
            edf = edf[~(edf.Name == alias)]
            continue

        ddf = edf[edf.Name == name]
        if ddf.shape[0]:
            edf.loc[edf.Name == alias, 'Path'] = ddf.iloc[0].Path
        edf.loc[edf.Name == alias, 'Name'] = name

    name2path = {
        name: edf[edf.Name == name].iloc[0].Path
        for name in edf.dropna().Name.unique()
    }

    # init and collect per frame ranges
    ############################################################################

    period = Second(settings.ROLLING_WINDOW_PERIOD)
    start = edf.index[0]
    stop = edf.index[-1] - period
    shift = Second(settings.ROLLING_WINDOW_SHIFT)

    export = settings.EXPORT_FORMAT

    frames = []
    for i in itertools.count():
        a = start + shift * i
        b = a + period
        if a > stop:
            break
        frames.append((a, b))

    # setup axes
    ############################################################################

    fig, ax = plt.subplots()

    xa, xb = d2n(edf.index[0]), d2n(edf.index[-1])
    ya, yb = 0, 1 + settings.EMOJI_PAD_TOP
    aspect, _ = get_aspect(ax)
    xrange = xb - xa

    margin = .02
    xmargin = xrange * margin * aspect
    bottom_margin = margin + .1
    top_margin = margin + .1

    image_hspace = xrange * settings.EMOJI_HSPACE
    bar_y = ya - bottom_margin * .7

    def set_lims():
        ax.set_xlim(xa - xmargin, xb + xmargin)
        ax.set_ylim(ya - bottom_margin, yb + top_margin)

    # now the crux calc and plotting
    ############################################################################

    positions = []
    tops = []

    def animation(frame):
        a, b = frame
        i = frames.index(frame)
        print(f'\rframe {i+1}/{len(frames)} - {a}', end='')
        ax.clear()
        set_lims()
        sub_edf = edf[a:b]
        aspect, _ = get_aspect(ax)
        nonlocal tops, positions
        if i == 0:
            tops = []
            positions = []

        # bar
        ax.plot([a, b], [bar_y, bar_y], **settings.BAR_ARGS)

        counted = sub_edf.groupby('Name').Name.count().sort_values()[:-45:-1]
        top = counted.max()

        ref_level = top

        # line
        tops.append((d2n(b), top))
        to_plot = [(x, y / ref_level * .85 + settings.EMOJI_PAD_TOP -
                    settings.LINE_PAD_TOP) for x, y in tops]
        ax.plot(*zip(*to_plot), **settings.LINE_ARGS)

        # sort emotes
        items = counted.items()
        if not positions:
            positions = list(counted.keys())
        elif settings.LAX_SORT:
            tmp = counted.to_dict()
            items = [(name, tmp.pop(name)) for name in positions
                     if name in tmp]
            items += [(name, freq) for name, freq in counted.items()
                      if name not in positions]
            items.sort(key=functools.cmp_to_key(lax_cmp), reverse=True)

        # items = list(items)
        # for name, freq in list(items)[:2]:
        #     ax.axhline(freq / ref_level)

        right = xb
        for i, (name, freq) in enumerate(items):
            height = freq / ref_level
            if i == 0 and settings.HEAD_RELATIVITY:
                prev = tops[-settings.HEAD_RELATIVITY - 1:][0][1]
                height = freq / prev
            left = right - (height * aspect)

            if right < xa:
                break

            path = name2path.get(name) or get_emoji_path(name)
            try:
                image = open_image(name, path)
            except Exception as e:
                err = f'\n==x error reading emoji "{name}" from "{path}": {e}'
                print(err, file=sys.stderr)
            else:
                ax.imshow(image,
                          extent=[left, right, 0, height],
                          aspect='auto')
                # ax.add_patch(mpl.patches.Rectangle((left, 0), (right-left), height, fill=False, alpha=.5))

            right = left - image_hspace

        decorate(ax, ref_level)
        return [ax]

    # now show or export
    ############################################################################

    anim = mpl.animation.FuncAnimation(fig,
                                       animation,
                                       frames=frames,
                                       repeat=True,
                                       repeat_delay=5000,
                                       interval=settings.ANIMATION_INTERVAL)

    if settings.TIGHT_LAYOUT is not None:
        fig.tight_layout(**settings.TIGHT_LAYOUT)
    fig.subplots_adjust(**settings.SUBPLOT_ADJUST)

    if not export:
        plt.show()
    elif export != 'png':
        file = join(settings.OUTPUT_DIR, 'out.' + export)
        print(f'==> exporting to "{file}"')
        anim.save(file)
    else:
        print('==> exporting to frame sequence')
        for i, frame in enumerate(frames):
            animation(frame)
            fig.savefig(join(settings.OUTPUT_DIR, f'out{i:04d}.png'))

    print('\n==> done')
コード例 #23
0
ファイル: test1.py プロジェクト: tiffanycc/Malware-Detection
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 27 20:09:03 2018

@author: Tiffany
"""
import pandas as pd
from pandas.tseries.offsets import Second

df = pd.read_csv(
    r'C:\Users\Tiffany\Documents\dissertation\data\sorted\1_label_data.csv')
del df['Unnamed: 0']
df = df.sort_values(['StartTime']).reset_index(drop=True)
df.StartTime = pd.to_datetime(df.StartTime)

i = 1
frequency = 5
df["Time_window"] = 0

for index in range(0, len(df)):
    if (df.StartTime[index]) < (df.StartTime[0] + frequency * i * Second()):
        df.Time_window[index] = i
    else:
        i = i + 1
        df.Time_window[index] = i

df.to_csv(r'C:\Users\Tiffany\Documents\dissertation\data\ordered\1.csv')
コード例 #24
0
def parse_arguments():
    usage = '%(prog)s [options] [path/to/subject/directory]'
    description = textwrap.dedent("""
    Display activity monitor data in an attractive format.

    Display each type of data differently:

    - ActiGraph data integrated to 1-second epochs are displayed as a line
      plot of the counts along each axis.

      The blue line represents the first (vertical) axis, the green line the
      second (anterior-posterior) axis, and the red line the third (medial-
      lateral) axis.

    - ActiGraph data integrated to 60-second epochs are classified according
      to the modified (Freedson 1998) cut points used by the ActiLife
      software and displayed as bars color coded by estimated intensity.

      The colors correspond to estimated intensities as follows:
      * Dark blue:    non-wear
      * Blue:         sedentary
      * Light yellow: light
      * Yellow:       lifestyle
      * Orange:       moderate
      * Red:          vigorous

    - Sojourns/SIP data are displayed as bars color coded by estimated
      intensity.

      The colors are as above, with additional colors as follows:
      * Green:        standing
      * Cyan:         seated, but light (in practice, this tends to indicate
                      activities like recumbent biking, the intensities of
                      which are typically underestimated)
      * Black:        Sojourns estimated negative intensity for this bout
                      (this is an inherent problem with the method but can
                      only happen when Sojourns has already classified a
                      bout as active; such bouts are typically moderate or
                      vigorous)

    - activPAL Events data are displayed as bars color coded by whether the
      subject was sitting, standing or stepping.

      Here sitting is blue, standing green, and stepping red.

    If the graph crosses a time change (for instance, as caused by Daylight
    Saving Time), data which occurs before the change but on the same day
    will be shifted to fit.

    Files are selected in the same way as in sip.py; for more detail, see
    the help for that program. The exception to this is that this program
    will select as many files as it can find rather than ending its search
    when it finds an appropriate file (but files with names ending in "_QC"
    will still shadow files with identical names that are missing this
    suffix).

    If you wish to exclude a particular file from being plotted, you can
    pass it to the `--exclude` option.

    """)
    epilog = textwrap.dedent("""
    You may specify the --soj-path, --ag-path, --ap-path and --exclude
    options as many times as you like; each file specified this way will be
    plotted or ignored as directed.

    """)

    parser = argparse.ArgumentParser(
        usage=usage,
        description=description,
        epilog=epilog,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('subjdir',
                        type=pathlib.Path,
                        nargs='?',
                        help='search for subject data in this directory')
    parser.add_argument('--soj-path',
                        type=pathlib.Path,
                        action='append',
                        default=[],
                        help='get Sojourns/SIP preprocessed Actigraph data '
                        'from this file')
    parser.add_argument('--ag-path',
                        type=pathlib.Path,
                        action='append',
                        default=[],
                        help='get Actigraph data from this file')
    parser.add_argument('--ap-path',
                        type=pathlib.Path,
                        action='append',
                        default=[],
                        help='get activPAL events data from this file')
    parser.add_argument('--awake-path',
                        type=pathlib.Path,
                        help='get wear time intervals from this file in case '
                        'autodetection of non-wear time is poor')
    parser.add_argument('--ignore-awake-ranges',
                        action='store_true',
                        help='ignore "awake ranges" file')
    parser.add_argument('--no-raw-counts',
                        action='store_true',
                        help="don't plot raw counts (for speed reasons)")
    parser.add_argument('-x',
                        '--exclude',
                        type=pathlib.Path,
                        action='append',
                        default=[],
                        help="don't plot the data in this file")
    parser.add_argument('--tz',
                        help='interpret data as being collected in this time '
                        'zone instead of %r' %
                        getattr(util.tz, 'zone', util.tz))
    args = parser.parse_args()
    if args.tz is not None:
        util.tz = args.tz
    if args.subjdir is not None:
        if not args.ag_path:
            args.ag_path = filter(None, [
                util.ActiGraphDataTable.sniff(args.subjdir, epoch=Minute()),
                util.ActiGraphDataTable.sniff(args.subjdir, epoch=Second())
            ])
        if not args.ap_path:
            args.ap_path = filter(None,
                                  [util.ActivPALData.sniff(args.subjdir)])
        if not args.soj_path:
            args.soj_path = filter(None,
                                   [util.SojournsData.sniff(args.subjdir)])
        if not args.awake_path:
            args.awake_path = util.AwakeRanges.sniff(args.subjdir)
    if args.ignore_awake_ranges:
        args.awake_path = None
    if not any([args.ag_path, args.ap_path, args.soj_path]):
        if args.subjdir is not None:
            if not args.subjdir.exists():
                raise IOError("can't find subject directory %r" %
                              str(args.subjdir))
            elif not args.subjdir.is_dir():
                raise IOError("subjdir %r isn't a directory" %
                              str(args.subjdir))
            raise IOError("can't find any data in subject directory %r" %
                          str(args.subjdir))
        parser.print_help()
        parser.exit()
    return args
コード例 #25
0
def cleanBeggining(feed):
    """
    This function deletes the initial setup problems: If measurement(t1) = 0 or
    (abs(measurements(t1)) <= 25 and measurement(t2) = 0)
            
    """
    from pandas.tseries.offsets import Second
    
    feed = pd.DataFrame(feed)

    while (((feed.ix[feed.first_valid_index()] == float(0))[0])  or (((feed.ix[feed.first_valid_index()] <= float(15))[0]) and ((feed.ix[feed.first_valid_index()+(10 * Second())] == float(0))[0]))):
        # Droping 1st observation
        feed = feed.drop([feed.first_valid_index()])
        feed = feed.loc[feed.first_valid_index():feed.last_valid_index()]
    
    return (feed.first_valid_index())
コード例 #26
0
(d1 - d2).days  # 두 날짜의 시간차(일 기준)

# 2) 날짜와 숫자의 연산 : 기본적으로 불가, 숫자를 timedelta 형식으로 변경 처리 후 연산 가능
d1 + 10  # unsupported operand type(s) for +: 'datetime.datetime' and 'int'
# 날짜 + 숫자 연산 불가
d1 + timedelta(10)  # timedelta의 기본 단위는 "일" 수
d1 + timedelta(1)  # timedelta의 기본 단위는 "일" 수, 하루 뒤
d1 + timedelta(1 / 24)  # timedelta의 기본 단위는 "일" 수, 한시간 뒤

# 3) 날짜와 숫자의 연산 :  기본적으로 불가, 숫자를 offset 형식으로 변경 처리 후 연산 가능
import pandas.tseries.offsets
from pandas.tseries.offsets import Day, Hour, Second

d1 + Day(10)  # 10일 뒤
d1 + Hour(10)  # 10시간 뒤
d1 + Second(10)  # 10초 뒤

# [ 연습 문제 ]
# emp.csv 파일을 읽고
emp = pd.read_csv('emp.csv', engine='python')
# 1) 급여 검토일의 요일 출력 (단, 급여 검토일은 입사날짜의 100일 후 날짜)
(emp['HIREDATE'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M')) +
 timedelta(100)).map(lambda x: x.strftime('%A'))

(pd.to_datetime(emp['HIREDATE']) + Day(100)).map(lambda x: x.strftime('%A'))

# 2) 입사일로부터 근무일수 출력
datetime.now() - emp['HIREDATE'].map(
    lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M'))

(datetime.now() - pd.to_datetime(emp['HIREDATE'])).map(lambda x: x.days)
コード例 #27
0
    'A': 'A-DEC',  # YearEnd(month=12),
    'AS': 'AS-JAN',  # YearBegin(month=1),
    'BA': 'BA-DEC',  # BYearEnd(month=12),
    'BAS': 'BAS-JAN',  # BYearBegin(month=1),
    'Min': 'T',
    'min': 'T',
    'ms': 'L',
    'us': 'U',
    'ns': 'N'
}

_name_to_offset_map = {
    'days': Day(1),
    'hours': Hour(1),
    'minutes': Minute(1),
    'seconds': Second(1),
    'milliseconds': Milli(1),
    'microseconds': Micro(1),
    'nanoseconds': Nano(1)
}

_INVALID_FREQ_ERROR = "Invalid frequency: {0}"


def to_offset(freqstr):
    """
    Return DateOffset object from string representation or
    Timedelta object

    Examples
    --------
コード例 #28
0
#----------------------------------------------------------------------
# Offset names ("time rules") and related functions


from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli,
                                    Week, Micro, MonthEnd, MonthBegin,
                                    BMonthBegin, BMonthEnd, YearBegin, YearEnd,
                                    BYearBegin, BYearEnd, QuarterBegin,
                                    QuarterEnd, BQuarterBegin, BQuarterEnd)

_offset_map = {
    'D'     : Day(),
    'B'     : BDay(),
    'H'     : Hour(),
    'T'     : Minute(),
    'S'     : Second(),
    'L'     : Milli(),
    'U'     : Micro(),
    None    : None,

    # Monthly - Calendar
    'M'      : MonthEnd(),
    'MS'     : MonthBegin(),

    # Monthly - Business
    'BM'     : BMonthEnd(),
    'BMS'    : BMonthBegin(),

    # Annual - Calendar
    'A-JAN' : YearEnd(month=1),
    'A-FEB' : YearEnd(month=2),
コード例 #29
0
df = df.sort_values(by = "auto_signed_time" , ascending = True)
df.reset_index(drop = True ,inplace = True)
df = df.set_index('auto_signed_time')
#print(df['2019-03-14 08:00:16':'2019-03-14 09:00:15'][['student_id','attention','face_area','real_name','gender']])


df['auto_signed_time'] = pd.to_datetime(df['auto_signed_time'],format = '%Y-%m-%d %H:%M:%S')
frequency = 1800
time_range = pd.date_range(df['auto_signed_time'][0],df['auto_signed_time'][df.shape[0]-1]+frequency*Second(),freq = '%sS'%frequency)
df = df.set_index('auto_signed_time')
for i in range(0,len(time_range) - 1):
    print(df.loc[time_range[i]:time_range[i+1]-1*Second()]['student_id'])
'''
import pandas as pd
from pandas.tseries.offsets import Second

df = pd.read_csv('/Users/apple/PycharmProjects/Apriori/python3-fp-growth-master/课程出勤详情.csv',encoding = 'UTF-8')
df = df.dropna(subset = ['face_area'])
df = df.dropna(subset = ['auto_signed_time'])
df = df.dropna(subset=['student_id'])
df.groupby(['auto_signed_time'])
df = df.sort_values(by = "auto_signed_time" , ascending = True)
df.reset_index(drop = True ,inplace = True)
df['auto_signed_time'] = pd.to_datetime(df['auto_signed_time'] , format = '%Y-%m-%d %H:%M:%S')
frequency = 300
time_range = pd.date_range(df['auto_signed_time'][0],
                           df['auto_signed_time'][df.shape[0]-1]+frequency*Second(),freq = '%sS'%frequency)
df = df.set_index('auto_signed_time')
for i in range(0,len(time_range) - 1):
    Series = df.loc[time_range[i]:time_range[i+1]-1*Second()]['student_id']