Example #1
0
class TimestampOps:
    params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
    param_names = ["tz"]

    def setup(self, tz):
        self.ts = Timestamp("2017-08-25 08:16:14", tz=tz)

    def time_replace_tz(self, tz):
        self.ts.replace(tzinfo=pytz.timezone("US/Eastern"))

    def time_replace_None(self, tz):
        self.ts.replace(tzinfo=None)

    def time_to_pydatetime(self, tz):
        self.ts.to_pydatetime()

    def time_normalize(self, tz):
        self.ts.normalize()

    def time_tz_convert(self, tz):
        if self.ts.tz is not None:
            self.ts.tz_convert(tz)

    def time_tz_localize(self, tz):
        if self.ts.tz is None:
            self.ts.tz_localize(tz)

    def time_to_julian_date(self, tz):
        self.ts.to_julian_date()

    def time_floor(self, tz):
        self.ts.floor("5T")

    def time_ceil(self, tz):
        self.ts.ceil("5T")
Example #2
0
class TimestampOps(object):
    params = [None, 'US/Eastern', pytz.UTC, dateutil.tz.tzutc()]
    param_names = ['tz']

    def setup(self, tz):
        self.ts = Timestamp('2017-08-25 08:16:14', tz=tz)

    def time_replace_tz(self, tz):
        self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

    def time_replace_None(self, tz):
        self.ts.replace(tzinfo=None)

    def time_to_pydatetime(self, tz):
        self.ts.to_pydatetime()

    def time_normalize(self, tz):
        self.ts.normalize()

    def time_tz_convert(self, tz):
        if self.ts.tz is not None:
            self.ts.tz_convert(tz)

    def time_tz_localize(self, tz):
        if self.ts.tz is None:
            self.ts.tz_localize(tz)

    def time_to_julian_date(self, tz):
        self.ts.to_julian_date()

    def time_floor(self, tz):
        self.ts.floor('5T')

    def time_ceil(self, tz):
        self.ts.ceil('5T')
Example #3
0
    def __call__(self, timestamp: pd.Timestamp, ticker: str, price: float) \
            -> Union[Tuple[pd.Timestamp, np.ndarray, np.ndarray, np.ndarray, np.ndarray], None]:
        """
        Update bar information and return opens, highs, lows, closes when time's up

        :param pd.Timestamp timestamp: timestamp of current tick
        :param str ticker: ticker of the tick
        :param float price: mid of the tick
        :return: None or opens, highs, lows closes when time's up
        """
        self.data[ticker].update(price)

        if self.timestamp is None:
            # use floor so that we can end this bar earlier
            self.timestamp = timestamp.floor(
                self.unit) if self.unit != '' else timestamp
        elif timestamp >= self.timestamp + self.delta:
            opens = np.zeros(self.N)
            highs = np.zeros(self.N)
            lows = np.zeros(self.N)
            closes = np.zeros(self.N)

            for idx, tic in enumerate(self.tickers):
                opens[idx], highs[idx], lows[idx], closes[idx] = self.data[
                    tic].clear()

            self.timestamp = timestamp.floor(
                self.unit) if self.unit != '' else timestamp
            return self.timestamp, opens, highs, lows, closes

        return None
Example #4
0
    def _list_historical_pricing(self,
                                 now: pd.Timestamp,
                                 symbol: str,
                                 limit: int = 1500):
        assert limit < 2000

        if limit >= 1000:
            pricing = self.binance_cli.fetch_ohlcv(symbol=symbol,
                                                   timeframe="1m",
                                                   limit=1000)

            ext_limit = (limit + 1) - 1000
            pricing += self.binance_cli.fetch_ohlcv(
                symbol=symbol,
                timeframe="1m",
                limit=ext_limit,
                since=(pricing[0][0] - (60 * ext_limit * 1000)),
            )
        else:
            pricing = self.binance_cli.fetch_ohlcv(symbol=symbol,
                                                   timeframe="1m",
                                                   limit=limit + 1)

        pricing = pd.DataFrame(
            pricing,
            columns=["date", "open", "high", "low", "close",
                     "volume"]).set_index("date")
        pricing.index = pricing.index.map(
            lambda x: datetime.utcfromtimestamp(x / 1000)).tz_localize("UTC")

        # We drop one value always
        pricing = pricing.sort_index()
        return pricing[pricing.index < now.floor("T")]
Example #5
0
    def test_round_minute_freq(self, test_input, freq, expected):
        # ensure timestamps that shouldn't round don't
        # GH#21262
        dt = Timestamp(test_input)
        expected = Timestamp(expected)

        result_ceil = dt.ceil(freq)
        assert result_ceil == expected
        result_floor = dt.floor(freq)
        assert result_floor == expected
        result_round = dt.round(freq)
        assert result_round == expected
Example #6
0
class TimestampOps:
    params = [None, 'US/Eastern', pytz.UTC,
              dateutil.tz.tzutc()]
    param_names = ['tz']

    def setup(self, tz):
        self.ts = Timestamp('2017-08-25 08:16:14', tz=tz)

    def time_replace_tz(self, tz):
        self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

    def time_replace_None(self, tz):
        self.ts.replace(tzinfo=None)

    def time_to_pydatetime(self, tz):
        self.ts.to_pydatetime()

    def time_normalize(self, tz):
        self.ts.normalize()

    def time_tz_convert(self, tz):
        if self.ts.tz is not None:
            self.ts.tz_convert(tz)

    def time_tz_localize(self, tz):
        if self.ts.tz is None:
            self.ts.tz_localize(tz)

    def time_to_julian_date(self, tz):
        self.ts.to_julian_date()

    def time_floor(self, tz):
        self.ts.floor('5T')

    def time_ceil(self, tz):
        self.ts.ceil('5T')
Example #7
0
    def test_round_int64(self, timestamp, freq):
        # check that all rounding modes are accurate to int64 precision
        # see GH#22591
        dt = Timestamp(timestamp)
        unit = to_offset(freq).nanos

        # test floor
        result = dt.floor(freq)
        assert result.value % unit == 0, f"floor not a {freq} multiple"
        assert 0 <= dt.value - result.value < unit, "floor error"

        # test ceil
        result = dt.ceil(freq)
        assert result.value % unit == 0, f"ceil not a {freq} multiple"
        assert 0 <= result.value - dt.value < unit, "ceil error"

        # test round
        result = dt.round(freq)
        assert result.value % unit == 0, f"round not a {freq} multiple"
        assert abs(result.value - dt.value) <= unit // 2, "round error"
        if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2:
            # round half to even
            assert result.value // unit % 2 == 0, "round half to even error"
Example #8
0
    def test_round_int64(self, timestamp, freq):
        """check that all rounding modes are accurate to int64 precision
           see GH#22591
        """
        dt = Timestamp(timestamp)
        unit = to_offset(freq).nanos

        # test floor
        result = dt.floor(freq)
        assert result.value % unit == 0, "floor not a {} multiple".format(freq)
        assert 0 <= dt.value - result.value < unit, "floor error"

        # test ceil
        result = dt.ceil(freq)
        assert result.value % unit == 0, "ceil not a {} multiple".format(freq)
        assert 0 <= result.value - dt.value < unit, "ceil error"

        # test round
        result = dt.round(freq)
        assert result.value % unit == 0, "round not a {} multiple".format(freq)
        assert abs(result.value - dt.value) <= unit // 2, "round error"
        if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2:
            # round half to even
            assert result.value // unit % 2 == 0, "round half to even error"
Example #9
0
 def test_floor(self):
     dt = Timestamp("20130101 09:10:11")
     result = dt.floor("D")
     expected = Timestamp("20130101")
     assert result == expected
Example #10
0
 def test_floor(self):
     dt = Timestamp('20130101 09:10:11')
     result = dt.floor('D')
     expected = Timestamp('20130101')
     assert result == expected
Example #11
0
 def test_floor(self):
     dt = Timestamp('20130101 09:10:11')
     result = dt.floor('D')
     expected = Timestamp('20130101')
     assert result == expected
Example #12
0
    def _get_minutes_to_sync(self, now: pd.Timestamp):
        last_sync_on = self.usecase.get_last_sync_on()
        minutes_delta = int(
            (now.floor("T") - last_sync_on).total_seconds() // 60)

        return minutes_delta - 1
Example #13
0
def nyiso_cbl(meter, event_start, event_end, look_back, event_type = 'weekday'):
    '''
    calculates the nysio customer baseline given the input parameters
    
    Parameters:
        meter (dataframe): A dataframe consisting of datetime and load values
        event_start (str) : A str coercile to timestamp for the start of the event
        event_end (str) : A str coercile to timestamp for the end of the event
        look_back (int) : An integer specifying the number of days to look back
        event_type (str) : A string specifying the type of event (weekday, sunday, saturday)
        
    Returns:
        tuple : A tuple of dataframe which give the baselins and the performance for the event hour
    '''
    start = Timestamp(event_start)
    end = Timestamp(event_end)
    event_hours = date_range(start, end, freq = 'H').hour.tolist()
    event_hours = event_hours[:-1] # accounting for hour ending
    # get max lookback days
    window_start = start.date() - Timedelta(look_back, unit = 'days')
    datelist = date_range(window_start, periods = look_back).date.tolist()
    data = meter[meter.date.isin(datelist)]
    #TODO: weekend cbl logic
    if event_type == 'weekday':
        days = list(range(1,6))
    
    if event_type == 'saturday':
        days = [6]
        
    if event_type == 'sunday':
        days = [7]
    
    #get the seed values
    seed_data = data[data.hour.isin(event_hours)]
    seed_data = seed_data[seed_data['date'] != start.date()]
    seed_data = seed_data.groupby(['date','hour']).mean().reset_index()
    seed_value = seed_data['kW'].max()*0.25
    
    # identify the low usage days
    low_usage = seed_data.groupby(['date']).mean()
    low_usage_dates = low_usage[low_usage.kW < seed_value].index.tolist()
    
    rm_day = [d for d in seed_data.date.to_list() if not d.isoweekday() in days]
    rm_day = list(set(rm_day))
    # get dates and holidays to exclude
    exclude = get_holidays(start.year)
    exclude.extend(low_usage_dates)
    exclude.extend([start.date()-Timedelta(1, unit = 'day')])
    exclude.extend(rm_day)
    
    # get cbl basis days 
    max_days = seed_data.date.unique().tolist()
    days_to_keep = [d for d in max_days if d not in exclude]
    days_to_keep.sort(reverse = True)
    
    if len(days_to_keep) > 10:
        cbl_basis = days_to_keep[:10]
    else:
        cbl_basis = days_to_keep
    
    #get averages and rank them, pick the top 5 of the averages
    averages = seed_data.groupby('date').mean()
    averages = averages[averages.index.isin(cbl_basis)]
    averages['rank'] = averages['kW'].rank(ascending = False)
    baseline_dates = averages[averages['rank'] <= 5].index.tolist()
    
    # calculate baseline as average of the hours for the selected days
    baseline = data[data.date.isin(baseline_dates)]
    baseline = baseline.groupby('hour').mean()
    # actual values during event day
    event_day = meter[meter.dttm >= start.floor('24H')]
    event_day = event_day[event_day.dttm < start.ceil('24H')]
    event_day = event_day.groupby(['id','hour']).mean().reset_index()
    event_day['baseline'] = baseline.kW
    
    #get adjustment factor
    gaf = weather_adjustment(start = start, end = end, meter = meter,
                             basis_dates = cbl_basis)
    # get the adjusted baseline
    event_day['adjustment'] = event_day.baseline * gaf.kW
    # calculate the event performance per hour
    perf = perf_calc(event_day, event_hours)
    
    return event_day, perf
 def round_value(self, value: pd.Timestamp) -> pd.Timestamp:
     logger.debug(f"Rounding value: {value}")
     round_step_in_seconds = int(self._round_step.total_seconds())
     rounded_value = value.floor(f"{round_step_in_seconds}s")
     return rounded_value
Example #15
0
def sync(session, dataset: str, variables: List[str],
         start: pd.Timestamp, end: pd.Timestamp, debug: bool, force: bool):

    session.query(Observation).where(
        Observation.timestamp.between(start, end),
        Observation.dataset == dataset,
        Observation.variable.in_(variables)

    ).delete(synchronize_session=False)

    obs_count = row_count = 0
    timestamps = set()
    latest_timestamp = start
    for row_count, row in enumerate(retrieve(dataset, variables, start, end), start=1):

        if debug:
            print(f'{dataset}:', ' '.join(f'{k}={v}' for k, v in row.items()))

        for variable in variables:

            value = row[variable]
            if value in ('', 'x'):
                continue
            if value == 'tr':
                value = 0

            timestamp = row['TimeStamp']
            timestamps.add(timestamp)
            latest_timestamp = max(latest_timestamp, timestamp)

            session.add(Observation(
                timestamp=timestamp,
                dataset=dataset,
                variable=variable,
                value=value
            ))

            obs_count += 1

    print(f'{dataset}: {row_count} rows giving {obs_count} observations, '
          f'latest at {latest_timestamp}')

    frequency = FREQUENCY[dataset]
    start = pd.Timestamp(start)
    end = pd.Timestamp(end)
    if frequency == 'D':
        expected_timestamps = datetime_range(start, end)
    else:
        start = (start+pd.Timedelta(1, 'S')).ceil(frequency)
        end = end.floor(frequency)
        expected_timestamps = pd.date_range(start, end, freq=frequency)

    missing = set(expected_timestamps) - timestamps
    if missing:
        missing_text = ', '.join(str(m) for m in sorted(missing))
        message = f'{len(missing)} missing for {dataset}: {missing_text}'
        if sorted(missing) == list(expected_timestamps[-len(missing):]) or force:
            if len(missing) > 1:
                print('WARNING '+message)
        else:
            raise AssertionError(message)
    unexpected = timestamps - set(expected_timestamps)
    if unexpected:
        raise AssertionError('Unexpected: '+', '.join(str(m) for m in sorted(unexpected)))

    if not debug:
        session.commit()
Example #16
0
    def test_round(self):

        # round
        dt = Timestamp('20130101 09:10:11')
        result = dt.round('D')
        expected = Timestamp('20130101')
        self.assertEqual(result, expected)

        dt = Timestamp('20130101 19:10:11')
        result = dt.round('D')
        expected = Timestamp('20130102')
        self.assertEqual(result, expected)

        dt = Timestamp('20130201 12:00:00')
        result = dt.round('D')
        expected = Timestamp('20130202')
        self.assertEqual(result, expected)

        dt = Timestamp('20130104 12:00:00')
        result = dt.round('D')
        expected = Timestamp('20130105')
        self.assertEqual(result, expected)

        dt = Timestamp('20130104 12:32:00')
        result = dt.round('30Min')
        expected = Timestamp('20130104 12:30:00')
        self.assertEqual(result, expected)

        dti = date_range('20130101 09:10:11', periods=5)
        result = dti.round('D')
        expected = date_range('20130101', periods=5)
        tm.assert_index_equal(result, expected)

        # floor
        dt = Timestamp('20130101 09:10:11')
        result = dt.floor('D')
        expected = Timestamp('20130101')
        self.assertEqual(result, expected)

        # ceil
        dt = Timestamp('20130101 09:10:11')
        result = dt.ceil('D')
        expected = Timestamp('20130102')
        self.assertEqual(result, expected)

        # round with tz
        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('D')
        expected = Timestamp('20130101', tz='US/Eastern')
        self.assertEqual(result, expected)

        dt = Timestamp('20130101 09:10:11', tz='US/Eastern')
        result = dt.round('s')
        self.assertEqual(result, dt)

        dti = date_range('20130101 09:10:11',
                         periods=5).tz_localize('UTC').tz_convert('US/Eastern')
        result = dti.round('D')
        expected = date_range('20130101', periods=5).tz_localize('US/Eastern')
        tm.assert_index_equal(result, expected)

        result = dti.round('s')
        tm.assert_index_equal(result, dti)

        # invalid
        for freq in ['Y', 'M', 'foobar']:
            self.assertRaises(ValueError, lambda: dti.round(freq))