def parse(human_time, round_latest=False): """ Parse a 'humanized' time string. Examples of parseable strings: - "15 minutes ago" - "About 2 hours ago" - "1 day ago" Returns a pendulum object representing the best guess of the timestamp referred to by the string. Note that the error between the actual timestamp and the one parsed by this function can be as large 2x the unit of time referred to. Therefore, to avoid the erroneous appearance of precision, this function zeroes out the units of the timestamp smaller than the precision of the input. I.e. `parse_rt("1 day ago")` → a timestamp representing midnight yesterday. Returns `None` if the input cannot be parsed. Sometimes it's more useful to round to the ceiling of the precision instead of the floor. To do so, set `round_latest=True`. `parse("1 day ago", round_latest=True)` → a timestamp representing `23:59:59` yesterday. """ match = PARSER.fullmatch(human_time) if match: qty = int(match.group('qty')) unit = match.group('unit').lower() ts = pendulum.now() - pendulum.Interval(**{unit + 's': qty}) if round_latest: return ts.end_of(unit) return ts.start_of(unit) else: return None
def fetch_date_range(self, start, end, limit=None, **kwargs): """Fetch data from the specified date range. Yields: FetchResult """ if not isinstance(start, datetime.date): raise TypeError('start must be a datetime.date. Got {!r}'.format(start)) if not isinstance(end, datetime.date): raise TypeError('end must be a datetime.date. Got {!r}'.format(end)) if start >= end: raise ValueError('start must be before end. {!r} > {!r}'.format(start, end)) if limit == 0: return # No need to do anything # Cast to datetimes for compat reasons start = pendulum.Pendulum.instance(datetime.datetime.combine(start, datetime.time(0, 0, 0, 0, timezone.utc))) end = pendulum.Pendulum.instance(datetime.datetime.combine(end, datetime.time(0, 0, 0, 0, timezone.utc))) if hasattr(self, 'shift_range'): warnings.warn( '{!r} implements a deprecated interface. ' 'Handle date transforms in _do_fetch. ' 'shift_range will no longer be called in SHARE 2.9.0'.format(self), DeprecationWarning ) start, end = self.shift_range(start, end) data_gen = self._do_fetch(start, end, **self._get_kwargs(**kwargs)) if not isinstance(data_gen, types.GeneratorType) and len(data_gen) != 0: raise TypeError('{!r}._do_fetch must return a GeneratorType for optimal performance and memory usage'.format(self)) for i, blob in enumerate(data_gen): result = FetchResult(blob[0], self.serializer.serialize(blob[1]), *blob[2:]) if result.datestamp is None: result.datestamp = start elif (result.datestamp.date() < start.date() or result.datestamp.date() > end.date()): if (start - result.datestamp) > pendulum.Interval(hours=24) or (result.datestamp - end) > pendulum.Interval(hours=24): raise ValueError( 'result.datestamp is outside of the requested date range. ' '{} from {} is not within [{} - {}]'.format(result.datestamp, result.identifier, start, end) ) logger.warning( 'result.datestamp is within 24 hours of the requested date range. ' 'This is probably a timezone conversion error and will be accepted. ' '{} from {} is within 24 hours of [{} - {}]'.format(result.datestamp, result.identifier, start, end) ) yield result if limit is not None and i >= limit: break
def main(db_path, debug): if debug: logger.setLevel(logging.DEBUG) db = SqliteDatabase(path=db_path) end_date = pendulum.now() step = pendulum.Interval(minutes=1000) symbols = get_symbols() logging.info(f'Found {len(symbols)} symbols') for i, symbol in enumerate(symbols, 1): # get start date for symbol # this is either the last entry from the db # or the trading start date (from json file) latest_candle_date = db.get_latest_candle_date(symbol) if latest_candle_date is None: logging.debug('No previous entries in db. Starting from scratch') # TODO: handle case when symbol is missing from trading start days # e.g. symbol is in symbols.json but not in symbols_trading_start_days.json start_date = symbol_start_date(symbol) else: logging.debug('Found previous db entries. Resuming from latest') start_date = latest_candle_date logging.info( f'{i}/{len(symbols)} | {symbol} | Processing from {start_date.to_datetime_string()}' ) for d1, d2 in date_range(start_date, end_date, step): logging.debug(f'{d1} -> {d2}') # returns (max) 1000 candles, one for every minute candles = get_candles(symbol, d1, d2) logging.debug(f'Fetched {len(candles)} candles') if candles: db.insert_candles(symbol, candles) # prevent from api rate-limiting time.sleep(3) db.close()
def test_date_range(): """ Test that the iterator yields the correct end-date. """ start_date = pendulum.create(2015, 5, 12) end_date = pendulum.create(2015, 5, 13, 15, 0) d = pendulum.Interval(minutes=1000) ranges = [(d1, d2) for d1, d2 in date_range(start_date, end_date, d)] assert len(ranges) == 3 assert ranges[0][0] == start_date assert ranges[0][1] == pendulum.create(2015, 5, 12, 16, 40) # the start date of the next element should be the end # date of the first one assert ranges[1][0] == pendulum.create(2015, 5, 12, 16, 40) assert ranges[1][1] == pendulum.create(2015, 5, 13, 9, 20) # the last element shouldn't be later than the end date assert ranges[2][0] == pendulum.create(2015, 5, 13, 9, 20) assert ranges[2][1] == end_date
class AdsInsightAPI(IncrementalStreamAPI): entity_prefix = "" state_pk = "date_start" ALL_ACTION_ATTRIBUTION_WINDOWS = [ "1d_click", "7d_click", "28d_click", "1d_view", "7d_view", "28d_view", ] ALL_ACTION_BREAKDOWNS = [ "action_type", "action_target_id", "action_destination", ] # Some automatic fields (primary-keys) cannot be used as 'fields' query params. INVALID_INSIGHT_FIELDS = [ "impression_device", "publisher_platform", "platform_position", "age", "gender", "country", "placement", "region", "dma", ] MAX_WAIT_TO_START = pendulum.Interval(minutes=5) MAX_WAIT_TO_FINISH = pendulum.Interval(minutes=30) MAX_ASYNC_SLEEP = pendulum.Interval(minutes=5) action_breakdowns = ALL_ACTION_BREAKDOWNS level = "ad" action_attribution_windows = ALL_ACTION_ATTRIBUTION_WINDOWS time_increment = 1 def __init__(self, api, start_date, breakdowns=None, buffer_days=28): super().__init__(api=api) self.start_date = start_date self.buffer_days = buffer_days self._state = start_date self.breakdowns = breakdowns @staticmethod def _get_job_result(job, **params) -> Iterator: for obj in job.get_result(): yield obj.export_all_data() def list(self, fields: Sequence[str] = None) -> Iterator[dict]: for params in self._params(fields=fields): job = self._run_job_until_completion(params) yield from super().read(partial(self._get_job_result, job=job), params) @retry_pattern(backoff.expo, (FacebookRequestError, JobTimeoutException, FacebookBadObjectError), max_tries=5, factor=4) def _run_job_until_completion(self, params) -> AdReportRun: # TODO parallelize running these jobs job = self._get_insights(params) logger.info(f"Created AdReportRun: {job} to sync insights with breakdown {self.breakdowns}") start_time = pendulum.now() sleep_seconds = 2 while True: job = job.api_get() job_progress_pct = job["async_percent_completion"] logger.info(f"ReportRunId {job['report_run_id']} is {job_progress_pct}% complete") runtime = pendulum.now() - start_time if job["async_status"] == "Job Completed": return job elif job["async_status"] == "Job Failed": raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.") elif job["async_status"] == "Job Skipped": raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.") if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0: raise JobTimeoutException( f"AdReportRun {job} did not start after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting." ) elif runtime > self.MAX_WAIT_TO_FINISH: raise JobTimeoutException( f"AdReportRun {job} did not finish after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting." ) logger.info(f"Sleeping {sleep_seconds} seconds while waiting for AdReportRun: {job} to complete") time.sleep(sleep_seconds) if sleep_seconds < self.MAX_ASYNC_SLEEP.in_seconds(): sleep_seconds *= 2 def _params(self, fields: Sequence[str] = None) -> Iterator[dict]: # Facebook freezes insight data 28 days after it was generated, which means that all data # from the past 28 days may have changed since we last emitted it, so we retrieve it again. buffered_start_date = self._state.subtract(days=self.buffer_days) end_date = pendulum.now() fields = list(set(fields) - set(self.INVALID_INSIGHT_FIELDS)) while buffered_start_date <= end_date: yield { "level": self.level, "action_breakdowns": self.action_breakdowns, "breakdowns": self.breakdowns, "limit": self.result_return_limit, "fields": fields, "time_increment": self.time_increment, "action_attribution_windows": self.action_attribution_windows, "time_ranges": [{"since": buffered_start_date.to_date_string(), "until": buffered_start_date.to_date_string()}], } buffered_start_date = buffered_start_date.add(days=1) @backoff_policy def _get_insights(self, params) -> AdReportRun: return self._api.account.get_insights(params=params, is_async=True)
class SwimlaneJwtAuth(SwimlaneResolver): """Handles authentication for all requests""" _token_expiration_buffer = pendulum.Interval(minutes=5) def __init__(self, swimlane, username, password): super(SwimlaneJwtAuth, self).__init__(swimlane) self._username = username self._password = password self.user = None self._login_headers = {} self._token_expiration = pendulum.now() def __call__(self, request): """Attach necessary headers to all requests Automatically reauthenticate before sending request when nearing token expiration """ # Refresh token if it expires soon if pendulum.now( ) + self._token_expiration_buffer >= self._token_expiration: self.authenticate() request.headers.update(self._login_headers) return request def authenticate(self): """Send login request and update User instance, login headers, and token expiration""" # Temporarily remove auth from Swimlane session for auth request to avoid recursive loop during login request self._swimlane._session.auth = None resp = self._swimlane.request( 'post', 'user/login', json={ 'userName': self._username, 'password': self._password }, ) self._swimlane._session.auth = self # Get JWT from response content json_content = resp.json() token = json_content.pop('token', None) # Grab token expiration token_data = jwt.decode(token, verify=False) token_expiration = pendulum.from_timestamp(token_data['exp']) headers = {'Authorization': 'Bearer {}'.format(token)} # Create User instance for authenticating user from login response data user = User(self._swimlane, _user_raw_from_login_content(json_content)) self._login_headers = headers self.user = user self._token_expiration = token_expiration
def mock_default_sleep_interval(mocker): mocker.patch( "source_facebook_marketing.client.common.DEFAULT_SLEEP_INTERVAL", return_value=pendulum.Interval(seconds=5))
import json import sys from time import sleep from typing import Sequence import backoff import pendulum from airbyte_cdk.entrypoint import logger # FIXME (Eugene K): register logger as standard python logger from facebook_business.exceptions import FacebookRequestError # The Facebook API error codes indicating rate-limiting are listed at # https://developers.facebook.com/docs/graph-api/overview/rate-limiting/ FACEBOOK_RATE_LIMIT_ERROR_CODES = (4, 17, 32, 613, 80000, 80001, 80002, 80003, 80004, 80005, 80006, 80008) FACEBOOK_UNKNOWN_ERROR_CODE = 99 DEFAULT_SLEEP_INTERVAL = pendulum.Interval(minutes=1) class FacebookAPIException(Exception): """General class for all API errors""" class JobTimeoutException(Exception): """Scheduled job timed out""" def batch(iterable: Sequence, size: int = 1): total_size = len(iterable) for ndx in range(0, total_size, size): yield iterable[ndx : min(ndx + size, total_size)]
def test_log_onlinetime(clients): assert clients['1'].onlinetime == pendulum.Interval(seconds=402, microseconds=149208) assert clients['2'].onlinetime == pendulum.Interval(seconds=19, microseconds=759644)
print(f'Processing {symbol}') while True: # ts + milliseconds end_ts = end_date.int_timestamp * 1000 url = f"https://api.bitfinex.com/v2/candles/trade:1D:t{symbol.upper()}/hist?end={end_ts}&limit={limit}" print(url) r = requests.get(url, timeout=10) r.raise_for_status() data = r.json() # if we request 1000 results and got back 1000 results, there is a high # chance that there might be an earlier date if len(data) == limit: # save this data and make another request # with reduced end_ts previous_data = data end_date = end_date - pendulum.Interval(days=limit) print(f'hit limit, new end date is {end_date}') print('sleep...') sleep(5) continue else: # we got less than `limit` results, which means that the # earliest trading date is in the current results # if we got no results, it means we went too far back # and need to use the previous data if len(data) == 0: start_ts = previous_data[-1][0] else: start_ts = data[-1][0] print(f'Found start date for {symbol}')