def _get_slice_index(self, start=None, end=None, periods=None, **kwargs): """ Time Array """ if not periods: periods = None if self.normalize: start = rollback_minute(start) end = rollforword_minute(end) freq = round(self.window * (1 - self.overlap)) freq = str(freq) + 's' # TODO: Here we use datetime array to get every # start points and end points of windows during whole timeline dtarr = DatetimeArray._generate_range( start=start, end=end, periods=periods, freq=freq, **kwargs) # generate datetime array print(dtarr) dtarr_start = dtarr[:-1] dtarr_end = dtarr_start + Second(self.window) if self.closed == 'right': dtarr_start = dtarr_start - Second(1) if self.closed == 'left': dtarr_end = dtarr_end - Second(1) _index = range(len(dtarr_start)) dt_index = list(map(lambda i: (dtarr_start[i], dtarr_end[i]), _index)) return dt_index
def test_Second(): assert_offset_equal(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) assert_offset_equal(Second(-1), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) assert_offset_equal(2 * Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 2)) assert_offset_equal(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) assert Second(3) + Second(2) == Second(5) assert Second(3) - Second(2) == Second()
def oringin3(data,sta_num,delta_sec,num): print(delta_sec) from pandas.tseries.offsets import Second su=square.find_info1(num,sta_num) orin_dict={'RECDATETIME':(pd.to_datetime(data['RECDATETIME'])-delta_sec*Second()).values,'ISARRLFT':100000,'PRODUCTID':data['PRODUCTID'],'STATIONSEQNUM':data['STATIONSEQNUM'],'PACKCODE':data['PACKCODE'],'GPSSPEED':data['GPSSPEED'],'ROUTEID':data['ROUTEID'],'LONGITUDE':su[0],'LATITUDE':su[1],'STATIONNUM':su[3],'STAORDER':su[2]} orin=pd.DataFrame(orin_dict,index=['100000']) return orin
def create_flux_ts(thresh_file, bin_width, area): # start by loading threshold data bins = str(int(bin_width / 60)) + 'T' names = ['id', 'jul', 'RE', 'FE', 'timeOverThresh'] skiprows = f.linesToSkip('data/thresh/' + thresh_file + '.thresh') df = pd.read_csv('data/thresh/' + thresh_file + '.thresh', skiprows=skiprows, names=names, delim_whitespace=True) df['date/times'] = df['jul'] + df['RE'] start = df['RE'][0] - 0.5 df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times'])) df.index = df['date/times'] flux_ts = pd.Series(data=df['timeOverThresh'], index=df.index) flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area)) offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600 offset_minutes = (int(bin_width / 2) + int(start * 86400) - offset_hours * 3600) // 60 offset_seconds = int(bin_width / 2) + int( start * 86400) - offset_hours * 3600 - offset_minutes * 60 offset = offset_hours * Hour() + offset_minutes * Minute( ) + offset_seconds * Second() flux_ts.index += offset
def _safe_write_csv(df, file_name): """Write DataFrame to CSV file in standard format""" der = os.path.dirname(file_name) if der: # this illogically logical try-except block brought to you by: # http://stackoverflow.com/a/14364249 try: os.makedirs(der) except OSError: if not os.path.isdir(der): raise # express timestamps as string to achieve consistent formatting df_freq = to_offset(df.index.inferred_freq) if df_freq is None: #include 2 decimal places of subseconds str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S.%f')[:22] elif df_freq < Second(): #only 1 decimal place of subseconds str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S.%f')[:21] else: str_fmt = lambda x: dt.strftime(x, '%Y-%m-%d %H:%M:%S') df.index.name = 'TIMESTAMP' # <-- BIG HAMMER SOLUTION df = df.reset_index() df['TIMESTAMP'] = df['TIMESTAMP'].apply(str_fmt) df.set_index('TIMESTAMP', inplace=True) df.to_csv( file_name, na_rep='NAN', quoting=QUOTE_NONE, # since treating all values as strings be # explicit about no quoting quotechar="'", # specify alternate quote to avoid triggering # QUOTE_NONE/escapechar errors when writing # fields with double-quotes (CompileResults and # CardStatus columns) index_label='TIMESTAMP')
def auto(self): ts = self.get_series() self._period = ts.index[1] - ts.index[0] freq = Second(self._period.total_seconds()) self._order = self.select_order() self._model = ARIMA(self.get_series(), order=self._order, freq=freq).fit()
def test_timedelta_range(self): expected = to_timedelta(np.arange(5), unit='D') result = timedelta_range('0 days', periods=5, freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(11), unit='D') result = timedelta_range('0 days', '10 days', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(50), unit='T') * 30 result = timedelta_range('0 days', freq='30T', periods=50) tm.assert_index_equal(result, expected) # GH 11776 arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: with tm.assert_raises_regex(TypeError, "1-d array"): to_timedelta(arg, errors=errors) # issue10583 df = pd.DataFrame(np.random.normal(size=(10, 4))) df.index = pd.timedelta_range(start='0s', periods=10, freq='s') expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] tm.assert_frame_equal(expected, result) with pytest.raises(ValueError): # GH 22274: CalendarDay is a relative time measurement timedelta_range('1day', freq='CD', periods=2)
def getHQData_Fade(symbolist, sDateTime, eDateTim, fre='60s', fields_='symbol,eob,open,high,low,close'): # dateList = commonHelpBylw.splitDates(sDateTime, eDateTim) # dfData = pd.DataFrame() # # for symbol_ in symbolist: # for sDtime_, eDtime_ in dateList: # tempHQdata = history(symbol=symbol_,frequency=fre,start_time=sDtime_,end_time=eDtime_,fields=fields_,df=True) # # dfData=dfData.append(tempHQdata) # # return dfData #上面由于splitDates无法对于分钟线日期准确的拆分。所以上面这种方式取数据不太对 #下面用新逻辑。 # 先取全部数据,然后查看刚出来数据的最后一个日期,取他的下一个秒时间。一直循环到最后取不出来数据为止。 # sDtime_=sDateTime # eDtime_=eDateTim if fre == 'tick': dateName = 'created_at' else: dateName = 'eob' dfData = pd.DataFrame() for symbol_ in symbolist: sDtime_ = sDateTime eDtime_ = eDateTim tempHQdata = history(symbol=symbol_, frequency=fre, start_time=sDtime_, end_time=eDtime_, fields=fields_, df=True) while not tempHQdata.empty: tempHQdata = tempHQdata.sort_values(dateName) dfData = dfData.append(tempHQdata) latestDateTime = tempHQdata[dateName].iloc[-1] nextDT = latestDateTime + Second() sDtime_ = nextDT.strftime('%Y-%m-%d %H:%M:%S') if sDtime_ <= eDtime_: tempHQdata = history(symbol=symbol_, frequency=fre, start_time=sDtime_, end_time=eDtime_, fields=fields_, df=True) else: #即下一个初始时间大于了最终结束时间,说明数据已经取完了。 break return dfData
def test_timedelta_range(self): expected = to_timedelta(np.arange(5), unit='D') result = timedelta_range('0 days', periods=5, freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(11), unit='D') result = timedelta_range('0 days', '10 days', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', freq='D') tm.assert_index_equal(result, expected) expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') tm.assert_index_equal(result, expected)
def test_timedelta_range(self): expected = to_timedelta(np.arange(5), unit="D") result = timedelta_range("0 days", periods=5, freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(11), unit="D") result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(50), unit="T") * 30 result = timedelta_range("0 days", freq="30T", periods=50) tm.assert_index_equal(result, expected)
def test_timedelta_range(self): expected = to_timedelta(np.arange(5), unit="D") result = timedelta_range("0 days", periods=5, freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(11), unit="D") result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") tm.assert_index_equal(result, expected) expected = to_timedelta(np.arange(50), unit="T") * 30 result = timedelta_range("0 days", freq="30T", periods=50) tm.assert_index_equal(result, expected) # GH 11776 arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg) for errors in ["ignore", "raise", "coerce"]: with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg, errors=errors) # issue10583 df = pd.DataFrame(np.random.normal(size=(10, 4))) df.index = pd.timedelta_range(start="0s", periods=10, freq="s") expected = df.loc[pd.Timedelta("0s"):, :] result = df.loc["0s":, :] tm.assert_frame_equal(expected, result)
def test_resolution(self): def _assert_less(ts1, ts2): val1 = self.dtc.convert(ts1, None, None) val2 = self.dtc.convert(ts2, None, None) if not val1 < val2: raise AssertionError(f"{val1} is not less than {val2}.") # Matplotlib's time representation using floats cannot distinguish # intervals smaller than ~10 microsecond in the common range of years. ts = Timestamp("2012-1-1") _assert_less(ts, ts + Second()) _assert_less(ts, ts + Milli()) _assert_less(ts, ts + Micro(50))
def create_flux_ts(thresh_file, bin_width, area, from_dir='data/thresh/'): # creates a time series of flux data # returns time series object of flux # bin_width is time bin size in seconds, area is area of detector in square meters # read in data from threshold file names = ['id', 'jul', 'RE', 'FE', 'FLUX'] skiprows = f.linesToSkip(from_dir + thresh_file + '.thresh') df = pd.read_csv(from_dir + thresh_file + '.thresh', skiprows=skiprows, names=names, delim_whitespace=True) # sort by date/times instead of julian days df['date/times'] = df['jul'] + df['RE'] df['date/times'] = pd.to_datetime(map(f.get_date_time, df['date/times'])) df.index = df['date/times'] # create time series, sample according to bin_width # calculate bins in pandas notation bins = str(int(bin_width / 60)) + 'T' flux_ts = pd.Series(data=df['FLUX'], index=df.index) flux_ts = flux_ts.resample(bins).count() * (1 / ((bin_width / 60) * area)) flux_ts.name = 'FLUX' # determine offset (basically the bin centers) and add to the index start = df['RE'][0] - 0.5 offset_hours = (int(bin_width / 2) + int(start * 86400)) // 3600 offset_minutes = (int(bin_width / 2) + int(start * 86400) - offset_hours * 3600) // 60 offset_seconds = int(bin_width / 2) + int( start * 86400) - offset_hours * 3600 - offset_minutes * 60 offset = offset_hours * Hour() + offset_minutes * Minute( ) + offset_seconds * Second() flux_ts.index += offset # filter out unfilled bins for i in range(len(flux_ts)): if i == 0 and (flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i > 0 and i < len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0 or flux_ts[i + 1] == 0): flux_ts[i] = 'nan' if i == len(flux_ts) - 1 and (flux_ts[i - 1] == 0 or flux_ts[i] == 0): flux_ts[i] = 'nan' flux_ts = flux_ts.interpolate() return flux_ts
def next_update_time(last_updated, freq='D', hour=18, minute=0, second=0): """计算下次更新时间 说明: 'S':移动到下一秒 'm':移动到下一分钟 'H':移动到下一小时 'D':移动到下一天 'W':移动到下周一 'M':移动到下月第一天 'Q':下一季度的第一天 将时间调整到指定的hour和minute """ if pd.isnull(last_updated): return MARKET_START if freq == 'S': off = Second() return last_updated + off elif freq == 'm': off = Minute() return last_updated + off elif freq == 'H': off = Hour() return last_updated + off elif freq == 'D': d = BDay(n=1, normalize=True) res = last_updated + d return res.replace(hour=hour, minute=minute, second=second) elif freq == 'W': w = Week(normalize=True, weekday=0) res = last_updated + w return res.replace(hour=hour, minute=minute, second=second) elif freq == 'M': m = MonthBegin(n=1, normalize=True) res = last_updated + m return res.replace(hour=hour, minute=minute, second=second) elif freq == 'Q': q = QuarterBegin(normalize=True, startingMonth=1) res = last_updated + q return res.replace(hour=hour, minute=minute, second=second) else: raise TypeError('不能识别的周期类型,仅接受{}'.format( ('S', 'm', 'H', 'D', 'W', 'M', 'Q')))
def split_count(data,num,second_sigma=600,code_sigma=10): '''找出一片区域内的固定班次数据''' from pandas.tseries.offsets import Second b=square.sort_time(square.choose_route(data,num)) a=b['PRODUCTID'].unique() for i in a: m=square.choose_bus(b,i) m1=copy.copy(m.iloc[0:1,:]) flag=1 m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False) length=m.iloc[:,0].size for j in range(length-1): b1=copy.copy(pd.to_datetime(m.iloc[j,0])) b2=copy.copy(pd.to_datetime(m.iloc[j+1,0])) if (b1+second_sigma*Second()>=b2): m1=copy.copy(m.iloc[j+1:j+2,:]) m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False) else: flag+=1 m1=copy.copy(m.iloc[j+1:j+2,:]) m1.to_csv('bus'+str(num)+'_'+str(i)+'_'+str(flag)+'.csv',mode='a',header=False)
def resample(df, sampling_period=1): """Resample the data Warning: does not handle missing values Parameters ---------- df: pandas.DataFrame, index: pandas.DatetimeIndex values: power measured sampling_period: float of int, optional Elapsed time between two measures in second Returns ------- df: pandas.DataFrame, index: pandas.DatetimeIndex with sampling_period seconds between two timestapms values: power measured """ assert isinstance(df, pd.DataFrame) assert isinstance(df.index, pd.DatetimeIndex) if isinstance(sampling_period, int): df = df.resample(Second(sampling_period), how='last', label='right', closed='right') else: period = sampling_period * (10**6) df = df.resample(Micro(period), how='last', label='right', closed='right') return df
# 1) timedelta를 사용한 날짜 연산 from datetime import timedelta d1 + 100 # 날짜 + 숫자 연산 불가 d1 + timedelta(days = 100) # 100일 뒤 # 2) offset으로 사용한 날짜 연산 import pandas.tseries.offsets dir(pandas.tseries.offsets) from pandas.tseries.offsets import Day, Hour, Second Day(5) # 5일 Hour(5) # 5시간 Second(5) # 5초 d1 + Day(100) # 9.5 날짜 인덱스 생성 및 색인 # pd.date_range : 연속적 날짜 출력 pd.date_range(start, # 시작 날짜 end, # 끝 날짜 periods, # 기간 (출력 개수) freq) # 날짜 빈도 (매월, 매주 ...) pd.date_range(start = '2020/01/01', end = '2020/01/31') # 기본 freq = 'D'(일) pd.date_range(start = '2020/01/01', periods = 100) # 시작값으로부터 100일의 날짜 pd.date_range(start = '2020/01/01', end = '2020/01/31', freq = '7D') # by값과 비슷
#: cache of previously seen offsets _offset_map: Dict[str, DateOffset] = {} def get_period_alias(offset_str: str) -> Optional[str]: """ Alias to closest period strings BQ->Q etc. """ return _offset_to_period_map.get(offset_str, None) _name_to_offset_map = { "days": Day(1), "hours": Hour(1), "minutes": Minute(1), "seconds": Second(1), "milliseconds": Milli(1), "microseconds": Micro(1), "nanoseconds": Nano(1), } def to_offset(freq) -> Optional[DateOffset]: """ Return DateOffset object from string or tuple representation or datetime.timedelta object. Parameters ---------- freq : str, tuple, datetime.timedelta, DateOffset or None
class TestDateTimeConverter: @pytest.fixture def dtc(self): return converter.DatetimeConverter() def test_convert_accepts_unicode(self, dtc): r1 = dtc.convert("12:22", None, None) r2 = dtc.convert("12:22", None, None) assert r1 == r2, "DatetimeConverter.convert should accept unicode" def test_conversion(self, dtc): rs = dtc.convert(["2012-1-1"], None, None)[0] xp = dates.date2num(datetime(2012, 1, 1)) assert rs == xp rs = dtc.convert("2012-1-1", None, None) assert rs == xp rs = dtc.convert(date(2012, 1, 1), None, None) assert rs == xp rs = dtc.convert("2012-1-1", None, None) assert rs == xp rs = dtc.convert(Timestamp("2012-1-1"), None, None) assert rs == xp # also testing datetime64 dtype (GH8614) rs = dtc.convert("2012-01-01", None, None) assert rs == xp rs = dtc.convert("2012-01-01 00:00:00+0000", None, None) assert rs == xp rs = dtc.convert( np.array(["2012-01-01 00:00:00+0000", "2012-01-02 00:00:00+0000"]), None, None, ) assert rs[0] == xp # we have a tz-aware date (constructed to that when we turn to utc it # is the same as our sample) ts = Timestamp("2012-01-01").tz_localize("UTC").tz_convert( "US/Eastern") rs = dtc.convert(ts, None, None) assert rs == xp rs = dtc.convert(ts.to_pydatetime(), None, None) assert rs == xp rs = dtc.convert(Index([ts - Day(1), ts]), None, None) assert rs[1] == xp rs = dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), None, None) assert rs[1] == xp def test_conversion_float(self, dtc): rtol = 0.5 * 10**-9 rs = dtc.convert(Timestamp("2012-1-1 01:02:03", tz="UTC"), None, None) xp = converter.dates.date2num(Timestamp("2012-1-1 01:02:03", tz="UTC")) tm.assert_almost_equal(rs, xp, rtol=rtol) rs = dtc.convert(Timestamp("2012-1-1 09:02:03", tz="Asia/Hong_Kong"), None, None) tm.assert_almost_equal(rs, xp, rtol=rtol) rs = dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) tm.assert_almost_equal(rs, xp, rtol=rtol) def test_conversion_outofbounds_datetime(self, dtc): # 2579 values = [date(1677, 1, 1), date(1677, 1, 2)] rs = dtc.convert(values, None, None) xp = converter.dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = dtc.convert(values[0], None, None) xp = converter.dates.date2num(values[0]) assert rs == xp values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] rs = dtc.convert(values, None, None) xp = converter.dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = dtc.convert(values[0], None, None) xp = converter.dates.date2num(values[0]) assert rs == xp @pytest.mark.parametrize( "time,format_expected", [ (0, "00:00"), # time2num(datetime.time.min) (86399.999999, "23:59:59.999999"), # time2num(datetime.time.max) (90000, "01:00"), (3723, "01:02:03"), (39723.2, "11:02:03.200"), ], ) def test_time_formatter(self, time, format_expected): # issue 18478 result = converter.TimeFormatter(None)(time) assert result == format_expected @pytest.mark.parametrize("freq", ("B", "L", "S")) def test_dateindex_conversion(self, freq, dtc): rtol = 10**-9 dateindex = tm.makeDateIndex(k=10, freq=freq) rs = dtc.convert(dateindex, None, None) xp = converter.dates.date2num(dateindex._mpl_repr()) tm.assert_almost_equal(rs, xp, rtol=rtol) @pytest.mark.parametrize("offset", [Second(), Milli(), Micro(50)]) def test_resolution(self, offset, dtc): # Matplotlib's time representation using floats cannot distinguish # intervals smaller than ~10 microsecond in the common range of years. ts1 = Timestamp("2012-1-1") ts2 = ts1 + offset val1 = dtc.convert(ts1, None, None) val2 = dtc.convert(ts2, None, None) if not val1 < val2: raise AssertionError(f"{val1} is not less than {val2}.") def test_convert_nested(self, dtc): inner = [Timestamp("2017-01-01"), Timestamp("2017-01-02")] data = [inner, inner] result = dtc.convert(data, None, None) expected = [dtc.convert(x, None, None) for x in data] assert (np.array(result) == expected).all()
def timeAdd(x, offset): result = pd.to_datetime(x, format='%Y%m%d%H%M%S') + offset * Second() result = str(result).replace('-', '').replace(' ', '').replace(':', '') return int(result)
def parse_arguments(): usage = '%(prog)s [options] [path/to/subject/directory]' description = textwrap.dedent(""" Process accelerometer data using Sojourns/SIP. Proceed in two steps: first, use input accelerometer data to estimate wear time and metabolic activity; second, generate many summary statistics from these estimates. There are two major ways to get input data to this program: you can give it the path to your subject's data and have it work out which files are where, or you can tell it precisely where to find each input file. You can also use the first method and then override specific defaults, if you prefer. The defaults have been chosen so that you can download your activity monitor data directly from the device to a subject's directory and run this program specifying only that directory. By default, this program searches for files named as in the examples below, except with less Victor Hugo. It will also find files directly in the subject directory, as well as files with names ending in "_QC.csv", which it will use preferentially to allow quality-controlling data without editing the original files. Don't store data from more than one subject in the same directory; if you do, this program will get confused and may mix subjects' data by accident! Input files: - ActiGraph data in 1-second epochs, as generated by the ActiLife software. This file must exist in order to complete the first step. Example file name: 24601/ActiGraph/JV_1secDataTable.csv Set this with `--ag-path FILENAME`. - activPAL data, as generated by the activPAL software. These consist of two files. If these files are found, use the SIP method in the first step; otherwise, use the original Sojourns method. Example file name: "24601/activPAL/JV Events.csv" Must also exist: 24601/activPAL/JV.def (You must quote this file name on the command line because it contains a space.) Note that these file names must have the *exact* same stem (here "JV"). The filenames generated by the activPAL software do this by default. Set this with `--ap-path FILENAME`. Intermediate files: - Awake ranges data, indicating when the subject was wearing the monitor(s). This file is generated by this program, but if a modified version already exists it will be used instead of estimating this information. This allows you to account for instances when the subject fell asleep while wearing the monitor, for instance. Example file name: "24601/ActiGraph/JV awake ranges.csv" (You must quote this file name on the command line because it contains a space.) You can edit this file in Excel, but if you do, you must take care to always delete cells rather than clearing their contents. Also, make sure to save as a CSV file. Set this with `--awake-path "FILENAME"`, or ignore an existing awake ranges file with `--ignore-awake-ranges`. - Sojourns/SIP annotated data, indicating bout boundaries and second-by- second estimated metabolic activity. This file is generated by this program, but if it already exists it will not be recomputed to save time. Editing this file by hand is not recommended. Example file name: 24601/ActiGraph/JV_1secDataTable_with_activpal_sojourns.csv By default, this path will be the same as the ActiGraph data with "_sojourns" or "_with_activpal_sojourns" added before the ".csv", depending on whether activPAL data have been provided. Set this with `--soj-path FILENAME`. Output files: - Sojourns/SIP processed data, containing loads of summary measures generated from the metabolic estimates. See the README for a detailed description of the contents of this file. Example file name: 24601/ActiGraph/JV_1secDataTable_with_activpal_sojourns_processed.csv This will always use the Sojourns/SIP file path with "_processed" added before the ".csv". Because many of the summary measures refer to times of day, it's important to provide the time zone in which the data were collected if it's different from the system time zone of the computer doing the processing. (Use the IANA time zone, like "America/Chicago", not the ambiguous abbreviation like "CST", which could mean Cuba Standard Time.) """) # TODO: summary measures parser = argparse.ArgumentParser( usage=usage, description=description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('subjdir', type=pathlib.Path, nargs='?', help='search for subject data in this directory') parser.add_argument('-s', '--subject', dest='subj', help='embed this tag as the subject identifier into ' 'the processed output; the default value is the ' 'name of the subject directory ("24601" in the ' 'examples)') parser.add_argument('--ag-path', type=pathlib.Path, help='get ActiGraph 1secDataTable data from this file') parser.add_argument('--ap-path', type=pathlib.Path, help='get activPAL Events data from this file') parser.add_argument('--soj-path', type=pathlib.Path, help='write Sojourns/SIP estimated metabolic activity ' "to this file if it doesn't already exist; " 'otherwise, read previously computed metabolic ' 'estimates from this file (to save time)') parser.add_argument('--awake-path', type=pathlib.Path, help='read wear time intervals from this file if it ' 'exists; otherwise, estimate wear time and write ' 'the estimates to this file') parser.add_argument('--soj-intermediate-path', type=pathlib.Path, help=argparse.SUPPRESS) parser.add_argument('--ignore-awake-ranges', action='store_true', help='ignore an existing "awake ranges" file and ' 'estimate wear time anyway') parser.add_argument('--tz', help='interpret data as being collected in this time ' 'zone instead of %r' % getattr(util.tz, 'zone', util.tz)) args = parser.parse_args() if args.tz is not None: util.tz = args.tz if args.subjdir is not None: if not args.subj: args.subj = args.subjdir.resolve().parts[-1] if not args.ag_path: args.ag_path = util.ActiGraphDataTable.sniff(args.subjdir, epoch=Second()) if not args.ap_path: args.ap_path = util.ActivPALData.sniff(args.subjdir) if not args.soj_path: args.soj_path = util.SojournsData.sniff(args.subjdir) if not args.awake_path: args.awake_path = util.AwakeRanges.sniff(args.subjdir) if not args.ag_path and not args.soj_path: if args.subjdir is not None: if not args.subjdir.exists(): raise IOError("can't find subject directory %r" % str(args.subjdir)) elif not args.subjdir.is_dir(): raise IOError("subjdir %r isn't a directory" % str(args.subjdir)) raise IOError("can't find any data in subject directory %r" % str(args.subjdir)) parser.print_help() parser.exit() return args
def run(edf): # preprocess ############################################################################ print('==> preprocessing') for alias, name in settings.EMOJI_MAP.items(): if name is None: edf = edf[~(edf.Name == alias)] continue ddf = edf[edf.Name == name] if ddf.shape[0]: edf.loc[edf.Name == alias, 'Path'] = ddf.iloc[0].Path edf.loc[edf.Name == alias, 'Name'] = name name2path = { name: edf[edf.Name == name].iloc[0].Path for name in edf.dropna().Name.unique() } # init and collect per frame ranges ############################################################################ period = Second(settings.ROLLING_WINDOW_PERIOD) start = edf.index[0] stop = edf.index[-1] - period shift = Second(settings.ROLLING_WINDOW_SHIFT) export = settings.EXPORT_FORMAT frames = [] for i in itertools.count(): a = start + shift * i b = a + period if a > stop: break frames.append((a, b)) # setup axes ############################################################################ fig, ax = plt.subplots() xa, xb = d2n(edf.index[0]), d2n(edf.index[-1]) ya, yb = 0, 1 + settings.EMOJI_PAD_TOP aspect, _ = get_aspect(ax) xrange = xb - xa margin = .02 xmargin = xrange * margin * aspect bottom_margin = margin + .1 top_margin = margin + .1 image_hspace = xrange * settings.EMOJI_HSPACE bar_y = ya - bottom_margin * .7 def set_lims(): ax.set_xlim(xa - xmargin, xb + xmargin) ax.set_ylim(ya - bottom_margin, yb + top_margin) # now the crux calc and plotting ############################################################################ positions = [] tops = [] def animation(frame): a, b = frame i = frames.index(frame) print(f'\rframe {i+1}/{len(frames)} - {a}', end='') ax.clear() set_lims() sub_edf = edf[a:b] aspect, _ = get_aspect(ax) nonlocal tops, positions if i == 0: tops = [] positions = [] # bar ax.plot([a, b], [bar_y, bar_y], **settings.BAR_ARGS) counted = sub_edf.groupby('Name').Name.count().sort_values()[:-45:-1] top = counted.max() ref_level = top # line tops.append((d2n(b), top)) to_plot = [(x, y / ref_level * .85 + settings.EMOJI_PAD_TOP - settings.LINE_PAD_TOP) for x, y in tops] ax.plot(*zip(*to_plot), **settings.LINE_ARGS) # sort emotes items = counted.items() if not positions: positions = list(counted.keys()) elif settings.LAX_SORT: tmp = counted.to_dict() items = [(name, tmp.pop(name)) for name in positions if name in tmp] items += [(name, freq) for name, freq in counted.items() if name not in positions] items.sort(key=functools.cmp_to_key(lax_cmp), reverse=True) # items = list(items) # for name, freq in list(items)[:2]: # ax.axhline(freq / ref_level) right = xb for i, (name, freq) in enumerate(items): height = freq / ref_level if i == 0 and settings.HEAD_RELATIVITY: prev = tops[-settings.HEAD_RELATIVITY - 1:][0][1] height = freq / prev left = right - (height * aspect) if right < xa: break path = name2path.get(name) or get_emoji_path(name) try: image = open_image(name, path) except Exception as e: err = f'\n==x error reading emoji "{name}" from "{path}": {e}' print(err, file=sys.stderr) else: ax.imshow(image, extent=[left, right, 0, height], aspect='auto') # ax.add_patch(mpl.patches.Rectangle((left, 0), (right-left), height, fill=False, alpha=.5)) right = left - image_hspace decorate(ax, ref_level) return [ax] # now show or export ############################################################################ anim = mpl.animation.FuncAnimation(fig, animation, frames=frames, repeat=True, repeat_delay=5000, interval=settings.ANIMATION_INTERVAL) if settings.TIGHT_LAYOUT is not None: fig.tight_layout(**settings.TIGHT_LAYOUT) fig.subplots_adjust(**settings.SUBPLOT_ADJUST) if not export: plt.show() elif export != 'png': file = join(settings.OUTPUT_DIR, 'out.' + export) print(f'==> exporting to "{file}"') anim.save(file) else: print('==> exporting to frame sequence') for i, frame in enumerate(frames): animation(frame) fig.savefig(join(settings.OUTPUT_DIR, f'out{i:04d}.png')) print('\n==> done')
# -*- coding: utf-8 -*- """ Created on Wed Jun 27 20:09:03 2018 @author: Tiffany """ import pandas as pd from pandas.tseries.offsets import Second df = pd.read_csv( r'C:\Users\Tiffany\Documents\dissertation\data\sorted\1_label_data.csv') del df['Unnamed: 0'] df = df.sort_values(['StartTime']).reset_index(drop=True) df.StartTime = pd.to_datetime(df.StartTime) i = 1 frequency = 5 df["Time_window"] = 0 for index in range(0, len(df)): if (df.StartTime[index]) < (df.StartTime[0] + frequency * i * Second()): df.Time_window[index] = i else: i = i + 1 df.Time_window[index] = i df.to_csv(r'C:\Users\Tiffany\Documents\dissertation\data\ordered\1.csv')
def parse_arguments(): usage = '%(prog)s [options] [path/to/subject/directory]' description = textwrap.dedent(""" Display activity monitor data in an attractive format. Display each type of data differently: - ActiGraph data integrated to 1-second epochs are displayed as a line plot of the counts along each axis. The blue line represents the first (vertical) axis, the green line the second (anterior-posterior) axis, and the red line the third (medial- lateral) axis. - ActiGraph data integrated to 60-second epochs are classified according to the modified (Freedson 1998) cut points used by the ActiLife software and displayed as bars color coded by estimated intensity. The colors correspond to estimated intensities as follows: * Dark blue: non-wear * Blue: sedentary * Light yellow: light * Yellow: lifestyle * Orange: moderate * Red: vigorous - Sojourns/SIP data are displayed as bars color coded by estimated intensity. The colors are as above, with additional colors as follows: * Green: standing * Cyan: seated, but light (in practice, this tends to indicate activities like recumbent biking, the intensities of which are typically underestimated) * Black: Sojourns estimated negative intensity for this bout (this is an inherent problem with the method but can only happen when Sojourns has already classified a bout as active; such bouts are typically moderate or vigorous) - activPAL Events data are displayed as bars color coded by whether the subject was sitting, standing or stepping. Here sitting is blue, standing green, and stepping red. If the graph crosses a time change (for instance, as caused by Daylight Saving Time), data which occurs before the change but on the same day will be shifted to fit. Files are selected in the same way as in sip.py; for more detail, see the help for that program. The exception to this is that this program will select as many files as it can find rather than ending its search when it finds an appropriate file (but files with names ending in "_QC" will still shadow files with identical names that are missing this suffix). If you wish to exclude a particular file from being plotted, you can pass it to the `--exclude` option. """) epilog = textwrap.dedent(""" You may specify the --soj-path, --ag-path, --ap-path and --exclude options as many times as you like; each file specified this way will be plotted or ignored as directed. """) parser = argparse.ArgumentParser( usage=usage, description=description, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('subjdir', type=pathlib.Path, nargs='?', help='search for subject data in this directory') parser.add_argument('--soj-path', type=pathlib.Path, action='append', default=[], help='get Sojourns/SIP preprocessed Actigraph data ' 'from this file') parser.add_argument('--ag-path', type=pathlib.Path, action='append', default=[], help='get Actigraph data from this file') parser.add_argument('--ap-path', type=pathlib.Path, action='append', default=[], help='get activPAL events data from this file') parser.add_argument('--awake-path', type=pathlib.Path, help='get wear time intervals from this file in case ' 'autodetection of non-wear time is poor') parser.add_argument('--ignore-awake-ranges', action='store_true', help='ignore "awake ranges" file') parser.add_argument('--no-raw-counts', action='store_true', help="don't plot raw counts (for speed reasons)") parser.add_argument('-x', '--exclude', type=pathlib.Path, action='append', default=[], help="don't plot the data in this file") parser.add_argument('--tz', help='interpret data as being collected in this time ' 'zone instead of %r' % getattr(util.tz, 'zone', util.tz)) args = parser.parse_args() if args.tz is not None: util.tz = args.tz if args.subjdir is not None: if not args.ag_path: args.ag_path = filter(None, [ util.ActiGraphDataTable.sniff(args.subjdir, epoch=Minute()), util.ActiGraphDataTable.sniff(args.subjdir, epoch=Second()) ]) if not args.ap_path: args.ap_path = filter(None, [util.ActivPALData.sniff(args.subjdir)]) if not args.soj_path: args.soj_path = filter(None, [util.SojournsData.sniff(args.subjdir)]) if not args.awake_path: args.awake_path = util.AwakeRanges.sniff(args.subjdir) if args.ignore_awake_ranges: args.awake_path = None if not any([args.ag_path, args.ap_path, args.soj_path]): if args.subjdir is not None: if not args.subjdir.exists(): raise IOError("can't find subject directory %r" % str(args.subjdir)) elif not args.subjdir.is_dir(): raise IOError("subjdir %r isn't a directory" % str(args.subjdir)) raise IOError("can't find any data in subject directory %r" % str(args.subjdir)) parser.print_help() parser.exit() return args
def cleanBeggining(feed): """ This function deletes the initial setup problems: If measurement(t1) = 0 or (abs(measurements(t1)) <= 25 and measurement(t2) = 0) """ from pandas.tseries.offsets import Second feed = pd.DataFrame(feed) while (((feed.ix[feed.first_valid_index()] == float(0))[0]) or (((feed.ix[feed.first_valid_index()] <= float(15))[0]) and ((feed.ix[feed.first_valid_index()+(10 * Second())] == float(0))[0]))): # Droping 1st observation feed = feed.drop([feed.first_valid_index()]) feed = feed.loc[feed.first_valid_index():feed.last_valid_index()] return (feed.first_valid_index())
(d1 - d2).days # 두 날짜의 시간차(일 기준) # 2) 날짜와 숫자의 연산 : 기본적으로 불가, 숫자를 timedelta 형식으로 변경 처리 후 연산 가능 d1 + 10 # unsupported operand type(s) for +: 'datetime.datetime' and 'int' # 날짜 + 숫자 연산 불가 d1 + timedelta(10) # timedelta의 기본 단위는 "일" 수 d1 + timedelta(1) # timedelta의 기본 단위는 "일" 수, 하루 뒤 d1 + timedelta(1 / 24) # timedelta의 기본 단위는 "일" 수, 한시간 뒤 # 3) 날짜와 숫자의 연산 : 기본적으로 불가, 숫자를 offset 형식으로 변경 처리 후 연산 가능 import pandas.tseries.offsets from pandas.tseries.offsets import Day, Hour, Second d1 + Day(10) # 10일 뒤 d1 + Hour(10) # 10시간 뒤 d1 + Second(10) # 10초 뒤 # [ 연습 문제 ] # emp.csv 파일을 읽고 emp = pd.read_csv('emp.csv', engine='python') # 1) 급여 검토일의 요일 출력 (단, 급여 검토일은 입사날짜의 100일 후 날짜) (emp['HIREDATE'].map(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M')) + timedelta(100)).map(lambda x: x.strftime('%A')) (pd.to_datetime(emp['HIREDATE']) + Day(100)).map(lambda x: x.strftime('%A')) # 2) 입사일로부터 근무일수 출력 datetime.now() - emp['HIREDATE'].map( lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M')) (datetime.now() - pd.to_datetime(emp['HIREDATE'])).map(lambda x: x.days)
'A': 'A-DEC', # YearEnd(month=12), 'AS': 'AS-JAN', # YearBegin(month=1), 'BA': 'BA-DEC', # BYearEnd(month=12), 'BAS': 'BAS-JAN', # BYearBegin(month=1), 'Min': 'T', 'min': 'T', 'ms': 'L', 'us': 'U', 'ns': 'N' } _name_to_offset_map = { 'days': Day(1), 'hours': Hour(1), 'minutes': Minute(1), 'seconds': Second(1), 'milliseconds': Milli(1), 'microseconds': Micro(1), 'nanoseconds': Nano(1) } _INVALID_FREQ_ERROR = "Invalid frequency: {0}" def to_offset(freqstr): """ Return DateOffset object from string representation or Timedelta object Examples --------
#---------------------------------------------------------------------- # Offset names ("time rules") and related functions from pandas.tseries.offsets import (Day, BDay, Hour, Minute, Second, Milli, Week, Micro, MonthEnd, MonthBegin, BMonthBegin, BMonthEnd, YearBegin, YearEnd, BYearBegin, BYearEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd) _offset_map = { 'D' : Day(), 'B' : BDay(), 'H' : Hour(), 'T' : Minute(), 'S' : Second(), 'L' : Milli(), 'U' : Micro(), None : None, # Monthly - Calendar 'M' : MonthEnd(), 'MS' : MonthBegin(), # Monthly - Business 'BM' : BMonthEnd(), 'BMS' : BMonthBegin(), # Annual - Calendar 'A-JAN' : YearEnd(month=1), 'A-FEB' : YearEnd(month=2),
df = df.sort_values(by = "auto_signed_time" , ascending = True) df.reset_index(drop = True ,inplace = True) df = df.set_index('auto_signed_time') #print(df['2019-03-14 08:00:16':'2019-03-14 09:00:15'][['student_id','attention','face_area','real_name','gender']]) df['auto_signed_time'] = pd.to_datetime(df['auto_signed_time'],format = '%Y-%m-%d %H:%M:%S') frequency = 1800 time_range = pd.date_range(df['auto_signed_time'][0],df['auto_signed_time'][df.shape[0]-1]+frequency*Second(),freq = '%sS'%frequency) df = df.set_index('auto_signed_time') for i in range(0,len(time_range) - 1): print(df.loc[time_range[i]:time_range[i+1]-1*Second()]['student_id']) ''' import pandas as pd from pandas.tseries.offsets import Second df = pd.read_csv('/Users/apple/PycharmProjects/Apriori/python3-fp-growth-master/课程出勤详情.csv',encoding = 'UTF-8') df = df.dropna(subset = ['face_area']) df = df.dropna(subset = ['auto_signed_time']) df = df.dropna(subset=['student_id']) df.groupby(['auto_signed_time']) df = df.sort_values(by = "auto_signed_time" , ascending = True) df.reset_index(drop = True ,inplace = True) df['auto_signed_time'] = pd.to_datetime(df['auto_signed_time'] , format = '%Y-%m-%d %H:%M:%S') frequency = 300 time_range = pd.date_range(df['auto_signed_time'][0], df['auto_signed_time'][df.shape[0]-1]+frequency*Second(),freq = '%sS'%frequency) df = df.set_index('auto_signed_time') for i in range(0,len(time_range) - 1): Series = df.loc[time_range[i]:time_range[i+1]-1*Second()]['student_id']