Esempio n. 1
0
def parse(human_time, round_latest=False):
    """
    Parse a 'humanized' time string.

    Examples of parseable strings:
    - "15 minutes ago"
    - "About 2 hours ago"
    - "1 day ago"

    Returns a pendulum object representing the best guess of the timestamp
    referred to by the string. Note that the error between the actual timestamp
    and the one parsed by this function can be as large 2x the unit of time
    referred to. Therefore, to avoid the erroneous appearance of precision,
    this function zeroes out the units of the timestamp smaller than the
    precision of the input. I.e. `parse_rt("1 day ago")` → a timestamp representing
    midnight yesterday.

    Returns `None` if the input cannot be parsed.

    Sometimes it's more useful to round to the ceiling of the precision instead
    of the floor. To do so, set `round_latest=True`.
    `parse("1 day ago", round_latest=True)` → a timestamp representing
    `23:59:59` yesterday.
    """
    match = PARSER.fullmatch(human_time)
    if match:
        qty = int(match.group('qty'))
        unit = match.group('unit').lower()

        ts = pendulum.now() - pendulum.Interval(**{unit + 's': qty})
        if round_latest:
            return ts.end_of(unit)
        return ts.start_of(unit)
    else:
        return None
Esempio n. 2
0
    def fetch_date_range(self, start, end, limit=None, **kwargs):
        """Fetch data from the specified date range.

        Yields:
            FetchResult

        """
        if not isinstance(start, datetime.date):
            raise TypeError('start must be a datetime.date. Got {!r}'.format(start))

        if not isinstance(end, datetime.date):
            raise TypeError('end must be a datetime.date. Got {!r}'.format(end))

        if start >= end:
            raise ValueError('start must be before end. {!r} > {!r}'.format(start, end))

        if limit == 0:
            return  # No need to do anything

        # Cast to datetimes for compat reasons
        start = pendulum.Pendulum.instance(datetime.datetime.combine(start, datetime.time(0, 0, 0, 0, timezone.utc)))
        end = pendulum.Pendulum.instance(datetime.datetime.combine(end, datetime.time(0, 0, 0, 0, timezone.utc)))

        if hasattr(self, 'shift_range'):
            warnings.warn(
                '{!r} implements a deprecated interface. '
                'Handle date transforms in _do_fetch. '
                'shift_range will no longer be called in SHARE 2.9.0'.format(self),
                DeprecationWarning
            )
            start, end = self.shift_range(start, end)

        data_gen = self._do_fetch(start, end, **self._get_kwargs(**kwargs))

        if not isinstance(data_gen, types.GeneratorType) and len(data_gen) != 0:
            raise TypeError('{!r}._do_fetch must return a GeneratorType for optimal performance and memory usage'.format(self))

        for i, blob in enumerate(data_gen):
            result = FetchResult(blob[0], self.serializer.serialize(blob[1]), *blob[2:])

            if result.datestamp is None:
                result.datestamp = start
            elif (result.datestamp.date() < start.date() or result.datestamp.date() > end.date()):
                if (start - result.datestamp) > pendulum.Interval(hours=24) or (result.datestamp - end) > pendulum.Interval(hours=24):
                    raise ValueError(
                        'result.datestamp is outside of the requested date range. '
                        '{} from {} is not within [{} - {}]'.format(result.datestamp, result.identifier, start, end)
                    )
                logger.warning(
                    'result.datestamp is within 24 hours of the requested date range. '
                    'This is probably a timezone conversion error and will be accepted. '
                    '{} from {} is within 24 hours of [{} - {}]'.format(result.datestamp, result.identifier, start, end)
                )

            yield result

            if limit is not None and i >= limit:
                break
Esempio n. 3
0
def main(db_path, debug):
    if debug:
        logger.setLevel(logging.DEBUG)

    db = SqliteDatabase(path=db_path)
    end_date = pendulum.now()
    step = pendulum.Interval(minutes=1000)

    symbols = get_symbols()
    logging.info(f'Found {len(symbols)} symbols')
    for i, symbol in enumerate(symbols, 1):
        # get start date for symbol
        # this is either the last entry from the db
        # or the trading start date (from json file)
        latest_candle_date = db.get_latest_candle_date(symbol)
        if latest_candle_date is None:
            logging.debug('No previous entries in db. Starting from scratch')
            # TODO: handle case when symbol is missing from trading start days
            # e.g. symbol is in symbols.json but not in symbols_trading_start_days.json
            start_date = symbol_start_date(symbol)
        else:
            logging.debug('Found previous db entries. Resuming from latest')
            start_date = latest_candle_date

        logging.info(
            f'{i}/{len(symbols)} | {symbol} | Processing from {start_date.to_datetime_string()}'
        )
        for d1, d2 in date_range(start_date, end_date, step):
            logging.debug(f'{d1} -> {d2}')
            # returns (max) 1000 candles, one for every minute
            candles = get_candles(symbol, d1, d2)
            logging.debug(f'Fetched {len(candles)} candles')
            if candles:
                db.insert_candles(symbol, candles)

            # prevent from api rate-limiting
            time.sleep(3)
    db.close()
Esempio n. 4
0
def test_date_range():
    """
    Test that the iterator yields the correct end-date.
    """
    start_date = pendulum.create(2015, 5, 12)
    end_date = pendulum.create(2015, 5, 13, 15, 0)
    d = pendulum.Interval(minutes=1000)

    ranges = [(d1, d2) for d1, d2 in date_range(start_date, end_date, d)]

    assert len(ranges) == 3

    assert ranges[0][0] == start_date
    assert ranges[0][1] == pendulum.create(2015, 5, 12, 16, 40)

    # the start date of the next element should be the end
    # date of the first one
    assert ranges[1][0] == pendulum.create(2015, 5, 12, 16, 40)
    assert ranges[1][1] == pendulum.create(2015, 5, 13, 9, 20)

    # the last element shouldn't be later than the end date
    assert ranges[2][0] == pendulum.create(2015, 5, 13, 9, 20)
    assert ranges[2][1] == end_date
Esempio n. 5
0
class AdsInsightAPI(IncrementalStreamAPI):
    entity_prefix = ""
    state_pk = "date_start"

    ALL_ACTION_ATTRIBUTION_WINDOWS = [
        "1d_click",
        "7d_click",
        "28d_click",
        "1d_view",
        "7d_view",
        "28d_view",
    ]

    ALL_ACTION_BREAKDOWNS = [
        "action_type",
        "action_target_id",
        "action_destination",
    ]

    # Some automatic fields (primary-keys) cannot be used as 'fields' query params.
    INVALID_INSIGHT_FIELDS = [
        "impression_device",
        "publisher_platform",
        "platform_position",
        "age",
        "gender",
        "country",
        "placement",
        "region",
        "dma",
    ]

    MAX_WAIT_TO_START = pendulum.Interval(minutes=5)
    MAX_WAIT_TO_FINISH = pendulum.Interval(minutes=30)
    MAX_ASYNC_SLEEP = pendulum.Interval(minutes=5)

    action_breakdowns = ALL_ACTION_BREAKDOWNS
    level = "ad"
    action_attribution_windows = ALL_ACTION_ATTRIBUTION_WINDOWS
    time_increment = 1

    def __init__(self, api, start_date, breakdowns=None, buffer_days=28):
        super().__init__(api=api)
        self.start_date = start_date
        self.buffer_days = buffer_days
        self._state = start_date
        self.breakdowns = breakdowns

    @staticmethod
    def _get_job_result(job, **params) -> Iterator:
        for obj in job.get_result():
            yield obj.export_all_data()

    def list(self, fields: Sequence[str] = None) -> Iterator[dict]:
        for params in self._params(fields=fields):
            job = self._run_job_until_completion(params)
            yield from super().read(partial(self._get_job_result, job=job), params)

    @retry_pattern(backoff.expo, (FacebookRequestError, JobTimeoutException, FacebookBadObjectError), max_tries=5, factor=4)
    def _run_job_until_completion(self, params) -> AdReportRun:
        # TODO parallelize running these jobs
        job = self._get_insights(params)
        logger.info(f"Created AdReportRun: {job} to sync insights with breakdown {self.breakdowns}")
        start_time = pendulum.now()
        sleep_seconds = 2
        while True:
            job = job.api_get()
            job_progress_pct = job["async_percent_completion"]
            logger.info(f"ReportRunId {job['report_run_id']} is {job_progress_pct}% complete")
            runtime = pendulum.now() - start_time

            if job["async_status"] == "Job Completed":
                return job
            elif job["async_status"] == "Job Failed":
                raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.")
            elif job["async_status"] == "Job Skipped":
                raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.")

            if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not start after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            elif runtime > self.MAX_WAIT_TO_FINISH:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not finish after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            logger.info(f"Sleeping {sleep_seconds} seconds while waiting for AdReportRun: {job} to complete")
            time.sleep(sleep_seconds)
            if sleep_seconds < self.MAX_ASYNC_SLEEP.in_seconds():
                sleep_seconds *= 2

    def _params(self, fields: Sequence[str] = None) -> Iterator[dict]:
        # Facebook freezes insight data 28 days after it was generated, which means that all data
        # from the past 28 days may have changed since we last emitted it, so we retrieve it again.
        buffered_start_date = self._state.subtract(days=self.buffer_days)
        end_date = pendulum.now()

        fields = list(set(fields) - set(self.INVALID_INSIGHT_FIELDS))

        while buffered_start_date <= end_date:
            yield {
                "level": self.level,
                "action_breakdowns": self.action_breakdowns,
                "breakdowns": self.breakdowns,
                "limit": self.result_return_limit,
                "fields": fields,
                "time_increment": self.time_increment,
                "action_attribution_windows": self.action_attribution_windows,
                "time_ranges": [{"since": buffered_start_date.to_date_string(), "until": buffered_start_date.to_date_string()}],
            }
            buffered_start_date = buffered_start_date.add(days=1)

    @backoff_policy
    def _get_insights(self, params) -> AdReportRun:
        return self._api.account.get_insights(params=params, is_async=True)
Esempio n. 6
0
class SwimlaneJwtAuth(SwimlaneResolver):
    """Handles authentication for all requests"""

    _token_expiration_buffer = pendulum.Interval(minutes=5)

    def __init__(self, swimlane, username, password):
        super(SwimlaneJwtAuth, self).__init__(swimlane)

        self._username = username
        self._password = password

        self.user = None
        self._login_headers = {}
        self._token_expiration = pendulum.now()

    def __call__(self, request):
        """Attach necessary headers to all requests

        Automatically reauthenticate before sending request when nearing token expiration
        """

        # Refresh token if it expires soon
        if pendulum.now(
        ) + self._token_expiration_buffer >= self._token_expiration:
            self.authenticate()

        request.headers.update(self._login_headers)

        return request

    def authenticate(self):
        """Send login request and update User instance, login headers, and token expiration"""

        # Temporarily remove auth from Swimlane session for auth request to avoid recursive loop during login request
        self._swimlane._session.auth = None
        resp = self._swimlane.request(
            'post',
            'user/login',
            json={
                'userName': self._username,
                'password': self._password
            },
        )
        self._swimlane._session.auth = self

        # Get JWT from response content
        json_content = resp.json()
        token = json_content.pop('token', None)

        # Grab token expiration
        token_data = jwt.decode(token, verify=False)
        token_expiration = pendulum.from_timestamp(token_data['exp'])

        headers = {'Authorization': 'Bearer {}'.format(token)}

        # Create User instance for authenticating user from login response data
        user = User(self._swimlane, _user_raw_from_login_content(json_content))

        self._login_headers = headers
        self.user = user
        self._token_expiration = token_expiration
Esempio n. 7
0
def mock_default_sleep_interval(mocker):
    mocker.patch(
        "source_facebook_marketing.client.common.DEFAULT_SLEEP_INTERVAL",
        return_value=pendulum.Interval(seconds=5))
Esempio n. 8
0
import json
import sys
from time import sleep
from typing import Sequence

import backoff
import pendulum
from airbyte_cdk.entrypoint import logger  # FIXME (Eugene K): register logger as standard python logger
from facebook_business.exceptions import FacebookRequestError

# The Facebook API error codes indicating rate-limiting are listed at
# https://developers.facebook.com/docs/graph-api/overview/rate-limiting/
FACEBOOK_RATE_LIMIT_ERROR_CODES = (4, 17, 32, 613, 80000, 80001, 80002, 80003, 80004, 80005, 80006, 80008)
FACEBOOK_UNKNOWN_ERROR_CODE = 99
DEFAULT_SLEEP_INTERVAL = pendulum.Interval(minutes=1)


class FacebookAPIException(Exception):
    """General class for all API errors"""


class JobTimeoutException(Exception):
    """Scheduled job timed out"""


def batch(iterable: Sequence, size: int = 1):
    total_size = len(iterable)
    for ndx in range(0, total_size, size):
        yield iterable[ndx : min(ndx + size, total_size)]
Esempio n. 9
0
def test_log_onlinetime(clients):
    assert clients['1'].onlinetime == pendulum.Interval(seconds=402,
                                                        microseconds=149208)
    assert clients['2'].onlinetime == pendulum.Interval(seconds=19,
                                                        microseconds=759644)
Esempio n. 10
0
    print(f'Processing {symbol}')
    while True:
        # ts + milliseconds
        end_ts = end_date.int_timestamp * 1000
        url = f"https://api.bitfinex.com/v2/candles/trade:1D:t{symbol.upper()}/hist?end={end_ts}&limit={limit}"
        print(url)
        r = requests.get(url, timeout=10)
        r.raise_for_status()
        data = r.json()
        # if we request 1000 results and got back 1000 results, there is a high
        # chance that there might be an earlier date
        if len(data) == limit:
            # save this data and make another request
            # with reduced end_ts
            previous_data = data
            end_date = end_date - pendulum.Interval(days=limit)
            print(f'hit limit, new end date is {end_date}')
            print('sleep...')
            sleep(5)
            continue
        else:
            # we got less than `limit` results, which means that the
            # earliest trading date is in the current results

            # if we got no results, it means we went too far back
            # and need to use the previous data
            if len(data) == 0:
                start_ts = previous_data[-1][0]
            else:
                start_ts = data[-1][0]
            print(f'Found start date for {symbol}')