Example #1
0
def request(endpoint, params=None):
    url = BASE_URL + endpoint
    params = params or {}
    headers = {}
    if 'user_agent' in CONFIG:
        headers['User-Agent'] = CONFIG['user_agent']

    headers['Authorization'] = "Basic " + CONFIG['api_key']
    req = requests.Request("GET", url, params=params, headers=headers).prepare()
    LOGGER.info("GET {}".format(req.url))

    with metrics.http_request_timer(url) as timer:
        resp = SESSION.send(req)
        timer.tags[metrics.Tag.http_status_code] = resp.status_code

    if resp.headers.get('Content-Type') == "application/gzip":
        json_body = unzip_to_json(resp.content)
    else:
        json_body = resp.json()

    resp.raise_for_status()

    return json_body
Example #2
0
    def request(self, method, path=None, url=None, **kwargs):

        self.get_access_token()

        if not url and self.base_url is None:
            self.base_url = '{}/{}'.format(API_URL, API_VERSION)

        if not url and path:
            url = '{}/{}'.format(self.base_url, path)

        # endpoint = stream_name (from sync.py API call)
        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if 'headers' not in kwargs:
            kwargs['headers'] = {}
        kwargs['headers']['Authorization'] = 'Bearer {}'.format(
            self.__access_token)

        if self.__user_agent:
            kwargs['headers']['User-Agent'] = self.__user_agent

        if method == 'POST':
            kwargs['headers']['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(method, url, **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        # Rate limits: https://developers.snapchat.com/api/docs/#rate-limits
        # Use retry functionality in backoff to wait and retry if
        # response code equals 429 because rate limit has been exceeded
        # LOGGER.info('headers = {}'.format(response.headers))
        rate_limit = int(response.headers.get('X-Rate-Limit-Limit', 0))
        rate_limit_remaining = int(
            response.headers.get('X-Rate-Limit-Remaining', 0))
        rate_limit_reset = int(response.headers.get('X-Rate-Limit-Reset', 0))

        if rate_limit == 0:
            rate_limit_percent_remaining = 100
        else:
            rate_limit_percent_remaining = 100 * (
                Decimal(rate_limit_remaining) / Decimal(rate_limit))

        # Wait for reset if remaining calls are less than 5%
        if rate_limit_percent_remaining < 5:
            LOGGER.warning(
                'Rate Limit Warning: {}; remaining calls: {}; remaining %: {}% '
                .format(rate_limit, rate_limit_remaining,
                        int(rate_limit_percent_remaining)))
            wait_time = rate_limit_reset - int(time.time())
            LOGGER.warning('Waiting for {} seconds.'.format(wait_time))
            time.sleep(int(wait_time))

        if response.status_code == 429:
            raise Server429Error()

        elif response.status_code >= 500:
            raise Server5xxError()

        if response.status_code != 200:
            LOGGER.error('{}: {}'.format(response.status_code, response.text))
            raise_for_error(response)

        # Catch invalid json response
        try:
            response_json = response.json()
        except Exception as err:
            LOGGER.error('{}'.format(err))
            LOGGER.error('response.headers = {}'.format(response.headers))
            LOGGER.error('response.reason = {}'.format(response.reason))
            raise Exception(err)

        return response_json
Example #3
0
    def request(self,
                method,
                url=None,
                path=None,
                headers=None,
                json=None,
                version=None,
                **kwargs):
        if not self.__verified:
            self.__verified = self.check_access()

        if not url and path:
            url = '{}/{}'.format(self.base_url, path)

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if not headers:
            headers = {}

        # API Version: https://developer.github.com/v3/#current-version
        if not version:
            version = 'v3'
        headers['Accept'] = 'application/vnd.github.{}+json'.format(version)

        # Authentication: https://developer.github.com/v3/#authentication
        headers['Authorization'] = 'Token {}'.format(self.__api_token)

        if self.__user_agent:
            headers['User-Agent'] = self.__user_agent

        if method == 'POST':
            headers['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(method=method,
                                              url=url,
                                              headers=headers,
                                              json=json,
                                              **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        # 304: File Not Modified status_code
        if response.status_code == 304:
            return None, None

        if response.status_code != 200:
            raise_for_error(response)

        last_modified = response.headers.get('Last-Modified')

        response_json = response.json()
        # last-modified: https://developer.github.com/v3/#conditional-requests
        if last_modified:
            last_modified_dttm = datetime.strptime(last_modified,
                                                   '%a, %d %b %Y %H:%M:%S %Z')
            response_json['last_modified'] = last_modified_dttm.strftime(
                "%Y-%m-%dT%H:%M:%SZ")

        # Pagination: https://developer.github.com/v3/guides/traversing-with-pagination/
        links_header = response.headers.get('Link')
        links = []
        next_url = None
        if links_header:
            links = links_header.split(',')
        for link in links:
            try:
                url, rel = re.search(r'^\<(https.*)\>; rel\=\"(.*)\"$',
                                     link.strip()).groups()
                if rel == 'next':
                    next_url = url
            except AttributeError:
                next_url = None

        return response_json, next_url
Example #4
0
    def request(self,
                method,
                url=None,
                path=None,
                data=None,
                params=None,
                **kwargs):

        if not url and path:
            url = '{}/{}'.format(self.base_url, path)

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if 'headers' not in kwargs:
            kwargs['headers'] = {}
        kwargs['headers']['Accept'] = 'application/json'

        if self.__user_agent:
            kwargs['headers']['User-Agent'] = self.__user_agent

        if method == 'POST':
            kwargs['headers']['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(
                method,
                url,
                auth=self.__auth_header,
                data=data,
                params=params,
                timeout=(DEFAULT_CONNECTION_TIMEOUT, DEFAULT_REST_TIMEOUT),
                **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        # Rate Limit reference: https://developer.twitter.com/en/docs/basics/rate-limiting
        # LOGGER.info('headers = {}'.format(response.headers))
        rate_limit = int(response.headers.get('x-rate-limit-limit'))
        rate_limit_remaining = int(
            response.headers.get('x-rate-limit-remaining'))
        rate_limit_reset = int(response.headers.get('x-rate-limit-reset'))
        rate_limit_percent_remaining = 100 * (Decimal(rate_limit_remaining) /
                                              Decimal(rate_limit))

        # Wait for reset if remaining calls are less than 5%
        if rate_limit_percent_remaining < 5:
            LOGGER.warning(
                'Rate Limit Warning: {}; remaining calls: {}; remaining %: {}% '
                .format(rate_limit, rate_limit_remaining,
                        int(rate_limit_percent_remaining)))
            wait_time = rate_limit_reset - int(time.time())
            LOGGER.warning('Waiting for {} seconds.'.format(wait_time))
            time.sleep(int(wait_time))

        if response.status_code in (420, 429):
            raise Server42xRateLimitError()

        elif response.status_code >= 500:
            raise Server5xxError()

        elif response.status_code == 400:
            error_combined = raise_for_error(response)
            if 'INVALID_ACCOUNT_SERVICE_LEVEL' in error_combined:
                return None

        elif response.status_code != 200:
            error_combined = raise_for_error(response)

        return response.json()
Example #5
0
def authed_get(source, url, headers={}):
    with metrics.http_request_timer(source) as timer:
        session.headers.update(headers)
        resp = session.request(method='get', url=url)
        timer.tags[metrics.Tag.http_status_code] = resp.status_code
        return resp
Example #6
0
def gen_request(stream_id, url):
    with metrics.http_request_timer(stream_id) as timer:
        resp = requests.get(url, auth=HTTPBasicAuth(CONFIG["consumer_key"], CONFIG["consumer_secret"]))
        timer.tags[metrics.Tag.http_status_code] = resp.status_code
        resp.raise_for_status()
        return resp.json()
Example #7
0
 def rest_request(self, method, url, **kwargs):
     with metrics.http_request_timer(url):
         url = self.get_url(url, rest=True)
         return self._request(method, url, headers=self.rest_headers, **kwargs)
Example #8
0
def gen_request(stream_id, url):
    with metrics.http_request_timer(stream_id) as timer:
        resp = requests.get(url, headers={"token": CONFIG["api_key"]})
        timer.tags[metrics.Tag.http_status_code] = resp.status_code
        resp.raise_for_status()
        return resp.json()
Example #9
0
def authed_get(source, url, params):
    with metrics.http_request_timer(source) as timer:
        resp = session.request(method='get', url=url, params=params)
        timer.tags[metrics.Tag.http_status_code] = resp.status_code
        return resp
Example #10
0
def __process_periodic_data_calcs(req_state,
                                  scenario_name='Actual',
                                  currency_code='USD'):  # pylint: disable=too-many-statements
    entity_types = ['assets']  # Currently: assets only (not funds)
    period_types = req_state.period_types.strip().replace(' ', '').split(',')
    batch_size = 10000
    end_dttm = req_state.end_date
    max_bookmark_value = req_state.last_date

    # Init params_list and results
    i_get_params_list = req_state.client.factory.create(
        'ArrayOfBaseRequestParameters')
    results = []
    req_id = 1
    batch = 1
    update_count = 0

    # Base objects
    data_value_types = req_state.client.factory.create('DataValueTypes')

    # scenario_id for scenario_name
    scenarios = req_state.client.service.GetScenarios()
    scenario = [i for i in scenarios.NamedEntity if i.Name == scenario_name][0]
    scenario_id = scenario.Id

    # current_date
    date_types = req_state.client.factory.create('DateTypes')
    current_date = req_state.client.factory.create('Date')
    current_date.Type = date_types.Current

    # latest_date
    latest_date = req_state.client.factory.create('Date')
    latest_date.Type = date_types.Latest

    # Get all calc data items
    data_item_search_criteria = req_state.client.factory.create(
        'DataItemsSearchCriteria')
    data_item_search_criteria.GetGlobalDataItemsOnly = True  # Global Data Items ONLY
    data_items = req_state.client.service.GetDataItems(
        data_item_search_criteria)
    calc_data_items = [
        i for i in data_items.DataItemObjectEx if i.FormulaTypeIDsString
    ]  # TESTING (add): and 'Gross Margin' in i.Name
    calc_data_items_len = len(calc_data_items)

    # entity_type loop
    for entity_type in entity_types:  # funds, assets pylint: disable=too-many-nested-blocks
        LOGGER.info('entity_type = %s', entity_type)  # COMMENT OUT
        # entity_ids for funds_or_assets
        if entity_type == 'funds':
            entities = req_state.client.service.GetFunds()
            entity_objs = entities.Fund
            # entity_objs = [i for i in entity_objs if 'IV, L.P.' in i.ExcelName] # COMMENT OUT
        else:  # assets
            entities = req_state.client.service.GetAssets()
            entity_objs = entities.Asset
            # entity_objs = [i for i in entity_objs if 'Guild Education' in i.Name] # TESTING: COMMENT OUT
        entity_objs_len = len(entity_objs)

        # calc_data_items loop
        cdi = 1
        for data_item in calc_data_items:
            data_item_id = data_item.Id
            data_item_name = data_item.Name
            LOGGER.info('data_item_name = %s (%s)', data_item_name,
                        data_item_id)  # COMMENT OUT

            # data_value_type for data_item
            data_value_type_id = data_item.DataValueType
            data_value_type = data_value_types[data_value_type_id]

            # entity loop
            ent = 1
            for entity in entity_objs:
                entity_dict = ilevel.sobject_to_dict(entity)
                entity_id = entity_dict.get('Id')
                # LOGGER.info('entity = {} ({})'.format(entity_name, entity_id)) # COMMENT OUT
                entity_initial_dttm = datetime.strptime(
                    entity_dict.get('InitialPeriod')[:10], '%Y-%m-%d')
                start_dttm = datetime.strptime(req_state.last_date, '%Y-%m-%d')
                max_dttm = [start_dttm, entity_initial_dttm]
                # Choose the earliest date for which there is data for an entity
                start_dttm = max(i for i in max_dttm if i is not None)

                # LOGGER.info('periodic_data_calculated: {}, {}: {} ({})'.format(
                #     data_item_name, entity_type, entity_name, entity_id)) # COMMENT OUT
                entity_path = ilevel.create_entity_path(req_state, [entity_id])

                # period_type loop
                last_period_type = period_types[-1]
                for period_type in period_types:
                    period, period_diff = ilevel.get_periods(
                        req_state, start_dttm, end_dttm, period_type)

                    # offset_period loop (0, -1, -2, ...) look-back
                    pd = 0
                    while pd <= period_diff + 1:
                        # LOGGER.info('{}: periodic_data_calculated: {}, Period Type: {}, Offset: {}'.format(
                        #    req_id, data_item_name, period_type, -pd)) # COMMENT OUT
                        offset_period = copy.copy(period)
                        offset_period.IsOffset = True
                        offset_period.Quantity = int(-1 * pd)

                        i_get_params = req_state.client.factory.create(
                            'AssetAndFundGetRequestParameters')
                        i_get_params.RequestIdentifier = req_id
                        i_get_params.DataValueType = data_value_type
                        i_get_params.EntitiesPath = entity_path
                        i_get_params.DataItemId = data_item_id
                        i_get_params.ScenarioId = scenario_id
                        i_get_params.Period = period
                        i_get_params.Offset = offset_period
                        i_get_params.EndOfPeriod = latest_date
                        i_get_params.ReportedDate = current_date
                        i_get_params.CurrencyCode = currency_code

                        i_get_params_list.BaseRequestParameters.append(
                            i_get_params)
                        # LOGGER.info('i_get_params = {}'.format(i_get_params)) # COMMENT OUT

                        # run iGetBatch
                        end_of_batches = False
                        if (pd == (period_diff + 1) and period_type == last_period_type \
                            and ent == entity_objs_len and cdi == calc_data_items_len and entity_type == 'assets'):
                            end_of_batches = True
                            LOGGER.info('xxx END OF BATCHES xxx')
                        if (req_id % batch_size == 0) or end_of_batches:
                            LOGGER.info('xxx BATCH: %s xxx', batch)
                            i_get_count = len(i_get_params_list)
                            i_get_request = req_state.client.factory.create(
                                'DataServiceRequest')
                            i_get_request.IncludeStandardizedDataInfo = True
                            i_get_request.IncludeExcelFormula = True
                            i_get_request.ParametersList = i_get_params_list
                            # LOGGER.info('i_get_request = {}'.format(i_get_request)) # COMMENT OUT

                            # pylint: disable=unused-variable
                            metrics_string = (
                                'periodic_data_calculated, iGetBatch #{}: {} requests'
                                .format(batch, i_get_count))
                            with metrics.http_request_timer(
                                    metrics_string) as timer:
                                data_values = req_state.client.service.iGetBatch(
                                    i_get_request)

                            # LOGGER.info('data_values = {}'.format(data_values)) # COMMENT OUT

                            if isinstance(data_values, str):
                                continue

                            try:
                                periodic_data_records = data_values.DataValue
                            except Exception as err:
                                LOGGER.error('%s', err)
                                LOGGER.error(
                                    'data_values dict = %s',
                                    ilevel.sobject_to_dict(data_values))
                                raise err

                            for periodic_data_record in periodic_data_records:
                                if "Error" in periodic_data_record:
                                    continue

                                if "NoDataAvailable" in periodic_data_record:
                                    continue

                                periodic_data_record_dict = ilevel.sobject_to_dict(
                                    periodic_data_record)
                                # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT

                                transformed_record = transform_json(
                                    periodic_data_record_dict)
                                # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT

                                if 'value' in transformed_record:
                                    value = transformed_record.get('value')
                                    value_string = str(value)
                                    if type(value) in (int, float):
                                        value_numeric = float(value)
                                    else:
                                        value_numeric = None
                                    if value == 'No Data Available':
                                        LOGGER.info(
                                            'No Data Available, skipping record'
                                        )
                                        continue
                                    sd_parameters = transformed_record.get(
                                        'sd_parameters', {})
                                    excel_formula = transformed_record.get(
                                        'excel_formula')
                                    currency_code = sd_parameters.get(
                                        'currency_code')
                                    data_item_id = sd_parameters.get(
                                        'data_item_id')
                                    data_value_type = sd_parameters.get(
                                        'data_value_type')
                                    detail_id = sd_parameters.get('detail_id')
                                    entity_id = next(
                                        iter(
                                            sd_parameters.get(
                                                'entities_path',
                                                {}).get('path',
                                                        {}).get('int', [])),
                                        None)
                                    scenario_id = sd_parameters.get(
                                        'scenario_id')
                                    period_type = sd_parameters.get(
                                        'period', {}).get('type')
                                    end_of_period_value = sd_parameters.get(
                                        'end_of_period', {}).get('value')
                                    reported_date_value = sd_parameters.get(
                                        'reported_date', {}).get('value')
                                    exchange_rate_type = sd_parameters.get(
                                        'exchange_rate', {}).get('type')
                                    request_id = sd_parameters.get(
                                        'request_identifier')
                                    standardized_data_id = sd_parameters.get(
                                        'standardized_data_id')

                                    dimensions = {
                                        'data_item_id': data_item_id,
                                        'entity_id': entity_id,
                                        'scenario_id': scenario_id,
                                        'period_type': period_type,
                                        'end_of_period_value':
                                        end_of_period_value,
                                        'currency_code': currency_code,
                                        'exchange_rate_type':
                                        exchange_rate_type,
                                        'data_value_type': data_value_type
                                    }
                                    hash_key = str(
                                        hash_data(
                                            json.dumps(dimensions,
                                                       sort_keys=True)))

                                    # Primary key dimensions, create md5 hash key
                                    new_record = {
                                        'hash_key': hash_key,
                                        'excel_formula': excel_formula,
                                        'currency_code': currency_code,
                                        'data_item_id': data_item_id,
                                        'data_value_type': data_value_type,
                                        'detail_id': detail_id,
                                        'entity_id': entity_id,
                                        'scenario_id': scenario_id,
                                        'period_type': period_type,
                                        'end_of_period_value':
                                        end_of_period_value,
                                        'reported_date_value':
                                        reported_date_value,
                                        'exchange_rate_type':
                                        exchange_rate_type,
                                        'request_id': request_id,
                                        'standardized_data_id':
                                        standardized_data_id,
                                        'value': value,
                                        'value_string': value_string,
                                        'value_numeric': value_numeric
                                    }

                                    results.append(new_record)
                                # end for rec in period_data_records

                            # Process batch records
                            max_bookmark_value, process_record_count = process_records(
                                result_records=results,
                                req_state=req_state,
                                deletion_flag=False,
                                max_bookmark_value=max_bookmark_value)

                            update_count = update_count + process_record_count

                            # Init new params_list and results
                            i_get_params_list = req_state.client.factory.create(
                                'ArrayOfBaseRequestParameters')
                            results = []

                            batch = batch + 1
                            # end iGetBatch

                        req_id = req_id + 1
                        pd = pd + 1
                        # end offset_period loop

                    # end period_type loop

                ent = ent + 1
                # end entity_id loop

            cdi = cdi + 1
            # end calc_data_items loop

        # end entity_type loop

    # Update the state with the max_bookmark_value for the stream after ALL records
    # Always process past year of calculated data (Subtract 365 days from max_bookmark_value)
    max_bookmark_dttm = datetime.strptime(max_bookmark_value[:10],
                                          "%Y-%m-%d") - timedelta(days=365)
    max_bookmark_value = max_bookmark_dttm.strftime("%Y-%m-%d")
    singer_ops.write_bookmark(req_state.state, req_state.stream_name,
                              max_bookmark_value)

    return update_count
def request_metrics_patch(self, method, url, **kwargs):
    with singer_metrics.http_request_timer(None):
        return request(self, method, url, **kwargs)
Example #12
0
    def get_objects(self):
        updated_at_min = self.get_bookmark()

        stop_time = singer.utils.now().replace(microsecond=0)
        # Retrieve data for max 1 year. Otherwise log incremental needed.
        diff_days = (stop_time - updated_at_min).days
        yearly = False
        if diff_days > 365:
            yearly = True
            stop_time = updated_at_min + datetime.timedelta(days=365)
            LOGGER.info("This import will only import the first year of historical data. "
                        "You need to trigger further incremental imports to get the missing rows.")

        date_window_size = float(Context.config.get("date_window_size", DATE_WINDOW_SIZE))
        results_per_page = Context.get_results_per_page(RESULTS_PER_PAGE)

        # Page through till the end of the resultset
        while updated_at_min < stop_time:
            # Bookmarking can also occur on the since_id
            since_id = self.get_since_id() or 1

            if since_id != 1:
                LOGGER.info("Resuming sync from since_id %d", since_id)

            # It's important that `updated_at_min` has microseconds
            # truncated. Why has been lost to the mists of time but we
            # think it has something to do with how the API treats
            # microseconds on its date windows. Maybe it's possible to
            # drop data due to rounding errors or something like that?
            updated_at_max = updated_at_min + datetime.timedelta(days=date_window_size)
            if updated_at_max > stop_time:
                updated_at_max = stop_time

            singer.log_info("getting from %s - %s", updated_at_min,
                            updated_at_max)

            min_filer_key = self.get_min_replication_key()
            max_filer_key = self.get_max_replication_key()

            while True:
                status_key = self.status_key or "status"
                query_params = {
                    "since_id": since_id,
                    min_filer_key: updated_at_min,
                    max_filer_key: updated_at_max,
                    "limit": results_per_page,
                }

                if self.add_status:
                    query_params[status_key] = "any"

                with metrics.http_request_timer(self.name):
                    objects = self.call_api(query_params)

                for obj in objects:
                    if obj.id < since_id:
                        # This verifies the api behavior expectation we
                        # have that all results actually honor the
                        # since_id parameter.
                        raise OutOfOrderIdsError("obj.id < since_id: {} < {}".format(
                            obj.id, since_id))
                    yield obj

                # You know you're at the end when the current page has
                # less than the request size limits you set.
                singer.log_info(f"Got {len(objects)} records")
                if len(objects) < results_per_page:
                    # Save the updated_at_max as our bookmark as we've synced all rows up in our
                    # window and can move forward. Also remove the since_id because we want to
                    # restart at 1.
                    Context.state.get('bookmarks', {}).get(self.name, {}).pop('since_id', None)
                    state_val = updated_at_max
                    if self.skip_day:
                        state_val = state_val + datetime.timedelta(days=1)
                    self.update_bookmark(utils.strftime(state_val))
                    break

                if objects[-1].id != max([o.id for o in objects]):
                    # This verifies the api behavior expectation we have
                    # that all pages are internally ordered by the
                    # `since_id`.
                    raise OutOfOrderIdsError("{} is not the max id in objects ({})".format(
                        objects[-1].id, max([o.id for o in objects])))
                since_id = objects[-1].id

                # Put since_id into the state.
                self.update_bookmark(since_id, bookmark_key='since_id')

            updated_at_min = updated_at_max + datetime.timedelta(seconds=1)

            if self.skip_day:
                updated_at_min = updated_at_min + datetime.timedelta(days=1)

        if yearly:
            LOGGER.info("This import only imported one year of historical data. "
                        "Please trigger further incremental data to get the missing rows.")
Example #13
0
    def request(self, method, path=None, url=None, **kwargs): # pylint: disable=too-many-branches,too-many-statements
        if not self.__verified:
            self.__verified = self.check_access_token()

        if not url and self.__base_url is None:
            self.__base_url = 'https://api.rechargeapps.com/'

        if not url and path:
            url = self.__base_url + path

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if 'headers' not in kwargs:
            kwargs['headers'] = {}
        kwargs['headers']['X-Recharge-Access-Token'] = self.__access_token
        kwargs['headers']['Accept'] = 'application/json'
        # If we did not specify any API Version during API Call, the Recharge will use the default API Version of our store
        # the 'collections' was added as part of API Version: '2021-11', for older API Version,
        # we will get empty records so adding 'X-Recharge-Version' for 'collections' API call
        if path == 'collections':
            kwargs['headers']['X-Recharge-Version'] = '2021-11'

        if self.__user_agent:
            kwargs['headers']['User-Agent'] = self.__user_agent

        if method == 'POST':
            kwargs['headers']['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(method, url, stream=True, timeout=self.request_timeout, **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        if response.status_code == 429:
            # Delay for 5 seconds for leaky bucket rate limit algorithm
            time.sleep(5)
            raise Server429Error()

        if response.status_code != 200:
            raise_for_error(response)

        # Intermittent JSONDecodeErrors when parsing JSON; Adding 2 attempts
        # FIRST ATTEMPT
        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(method, url, stream=True, timeout=self.request_timeout, **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        if response.status_code == 429:
            # Delay for 5 seconds for leaky bucket rate limit algorithm
            time.sleep(5)
            raise Server429Error()

        if response.status_code != 200:
            raise_for_error(response)

        # Catch invalid JSON (e.g. unterminated string errors)
        try:
            response_json = response.json()
            return response_json, response.links
        except ValueError as err:  # includes simplejson.decoder.JSONDecodeError
            LOGGER.warning(err)

        # SECOND ATTEMPT, if there is a ValueError (unterminated string error)
        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(
                method,
                url,
                stream=True,
                timeout=self.request_timeout,
                **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        if response.status_code == 429:
            raise Server429Error()

        if response.status_code != 200:
            raise_for_error(response)

        # Log invalid JSON (e.g. unterminated string errors)
        try:
            response_json = response.json()
            return response_json, response.links
        except ValueError as err:  # includes simplejson.decoder.JSONDecodeError
            LOGGER.error(err)
            raise Exception(err)
Example #14
0
 def get_gzip_json(self, url, endpoint):
     resp = None
     with metrics.http_request_timer(endpoint) as timer:
         resp = self.__session.request(method='GET', url=url, timeout=60)
         timer.tags[metrics.Tag.http_status_code] = resp.status_code
     return self.unzip(resp.content)
Example #15
0
    def download_request(self, start_date, end_date):
        #returns a datastream for the csv File
        with metrics.http_request_timer("create_report") as timer:
            url = BASE_URL + "api/v1/shops/" + self.shop_id + "/" + "click-reports"
            headers = {
                "Content-Type": "application/json",
                "Authorization": "Bearer " + self.access_token,
            }
            data = {"from": start_date, "to": end_date, "site": self.site}
            response = requests.post(url,
                                     headers=headers,
                                     data=json.dumps(data))
            timer.tags[metrics.Tag.http_status_code] = response.status_code
        if response.status_code in [429, 502]:
            raise RateLimitException()
        if ("The date should be in the past" in response.text
            ):  #checks whether the end date was set correctly
            return -1
        response.raise_for_status()
        LOGGER.debug(response.json())
        status = response.json()["status"]
        report_id = response.json()["id"]

        # file was requested now poll for availability
        while status == "PROCESSING":  #maybe a timeout for 1 minute or sth. similar
            LOGGER.info("Check whether the report is online")
            with metrics.http_request_timer("poll_report status") as timer:
                url = BASE_URL + "api/v1/shops/" + self.shop_id + "/" + "click-reports" + "/" + report_id
                headers = {
                    "Authorization": "Bearer " + self.access_token,
                }
                if self.user_agent:
                    headers["User-Agent"] = self.user_agent
                request = requests.Request("GET", url, headers=headers)
                response = self.session.send(request.prepare())
                timer.tags[metrics.Tag.http_status_code] = response.status_code
            if response.status_code in [429, 502]:
                raise RateLimitException()
            response.raise_for_status()
            LOGGER.debug(response.json())
            status = response.json()["status"]
            if status == "PROCESSING":
                LOGGER.info("waiting 1 sec")
                time.sleep(1)

        #it is no longer processing - so either FAILDED or SUCCESSFUL
        if status == "FAILED":
            raise Exception()
        else:  ## if successful
            ##download the code
            with metrics.http_request_timer("download_status") as timer:
                url = BASE_URL + "api/v1/shops/" + self.shop_id + "/" + "click-reports" + "/" + report_id + "/download"
                headers = {"Authorization": "Bearer " + self.access_token}
                request = requests.Request("GET", url, headers=headers)
                response = self.session.send(request.prepare())
                timer.tags[metrics.Tag.http_status_code] = response.status_code
            if response.status_code in [429, 502]:
                raise RateLimitException()
            response.raise_for_status()

            #extract the files inside the zipfile
            zf = zipfile.ZipFile(io.BytesIO(response.content), 'r')

            for filename in zf.namelist():
                try:
                    LOGGER.info("read File %s" % filename)
                    return io.StringIO(zf.read(filename).decode())
                except KeyError:
                    LOGGER.critical('ERROR: Did not find %s in zip file' %
                                    filename)
Example #16
0
def perform_igetbatch_operation_for_standardized_id_set(id_set, req_state):
    data_value_types = req_state.client.factory.create('DataValueTypes')

    # current_date
    date_types = req_state.client.factory.create('DateTypes')
    current_date = req_state.client.factory.create('Date')
    current_date.Type = date_types.Current

    # latest_date
    latest_date = req_state.client.factory.create('Date')
    latest_date.Type = date_types.Latest

    req_id = 1
    id_set_len = len(id_set)
    i_get_params_list = req_state.client.factory.create(
        'ArrayOfBaseRequestParameters')
    for cur_id in id_set:
        req_id = req_id + 1
        i_get_params = req_state.client.factory.create(
            'AssetAndFundGetRequestParameters')
        i_get_params.StandardizedDataId = cur_id

        i_get_params.RequestIdentifier = req_id
        i_get_params.DataValueType = getattr(data_value_types, 'ObjectId')
        i_get_params.EndOfPeriod = latest_date
        i_get_params.ReportedDate = current_date

        i_get_params_list.BaseRequestParameters.append(i_get_params)

    i_get_request = req_state.client.factory.create('DataServiceRequest')
    i_get_request.IncludeStandardizedDataInfo = True
    i_get_request.IncludeExcelFormula = True
    i_get_request.ParametersList = i_get_params_list

    # pylint: disable=unused-variable
    metrics_string = (
        'Standardized Data Item iGetBatch: {} requests'.format(id_set_len))
    with metrics.http_request_timer(metrics_string) as timer:
        data_values = req_state.client.service.iGetBatch(i_get_request)

    # LOGGER.info('data_values dict = {}'.format(sobject_to_dict(data_values))) # COMMENT OUT

    if isinstance(data_values, str):
        return []

    try:
        periodic_data_records = data_values.DataValue
    except Exception as err:
        LOGGER.error('{}'.format(err))
        LOGGER.error('data_values dict = {}'.format(
            sobject_to_dict(data_values)))
        raise err

    results = []
    for periodic_data_record in periodic_data_records:
        if "Error" in periodic_data_record:
            continue

        if "NoDataAvailable" in periodic_data_record:
            continue

        periodic_data_record_dict = sobject_to_dict(periodic_data_record)
        # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT

        transformed_record = transform_json(periodic_data_record_dict)
        # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT

        if 'value' in transformed_record:
            value = transformed_record.get('value')
            value_string = str(value)
            if type(value) in (int, float):
                value_numeric = float(value)
            else:
                value_numeric = None
            if value == 'No Data Available':
                continue

            sd_parameters = transformed_record.get('sd_parameters', {})
            excel_formula = transformed_record.get('excel_formula')
            currency_code = sd_parameters.get('currency_code')
            data_item_id = sd_parameters.get('data_item_id')
            data_value_type = sd_parameters.get('data_value_type')
            detail_id = sd_parameters.get('detail_id')
            scenario_id = sd_parameters.get('scenario_id')
            period_type = sd_parameters.get('period', {}).get('type')
            end_of_period_value = sd_parameters.get('end_of_period',
                                                    {}).get('value')
            reported_date_value = sd_parameters.get('reported_date',
                                                    {}).get('value')
            exchange_rate_type = sd_parameters.get('exchange_rate',
                                                   {}).get('type')
            request_id = sd_parameters.get('request_identifier')
            standardized_data_id = sd_parameters.get('standardized_data_id')

            entity_ids = sd_parameters.get('entities_path',
                                           {}).get('path', {}).get('int', [])
            for entity_id in entity_ids:
                # Primary key dimensions, create md5 hash key
                dimensions = {
                    'data_item_id': data_item_id,
                    'entity_id': entity_id,
                    'scenario_id': scenario_id,
                    'period_type': period_type,
                    'end_of_period_value': end_of_period_value,
                    'currency_code': currency_code,
                    'exchange_rate_type': exchange_rate_type,
                    'data_value_type': data_value_type
                }
                hash_key = str(
                    hash_data(json.dumps(dimensions, sort_keys=True)))
                new_record = {
                    'hash_key': hash_key,
                    'excel_formula': excel_formula,
                    'currency_code': currency_code,
                    'data_item_id': data_item_id,
                    'data_value_type': data_value_type,
                    'detail_id': detail_id,
                    'entity_id': entity_id,
                    'scenario_id': scenario_id,
                    'period_type': period_type,
                    'end_of_period_value': end_of_period_value,
                    'reported_date_value': reported_date_value,
                    'exchange_rate_type': exchange_rate_type,
                    'request_id': request_id,
                    'standardized_data_id': standardized_data_id,
                    'value': value,
                    'value_string': value_string,
                    'value_numeric': value_numeric
                }

                results.append(new_record)
            # end for rec in periodic_data_records

    # LOGGER.info('results = {}'.format(results)) # COMMENT OUT
    return results
def gen_request(stream_id, url):
    with metrics.http_request_timer(stream_id) as timer:
        resp = requests.get(url)
        timer.tags[metrics.Tag.http_status_code] = resp.status_code
        resp.raise_for_status()
        return resp.json()
Example #18
0
    def request(self, method, url=None, path=None, headers=None, json=None, version=None, **kwargs):
        if not self.__verified:
            self.__verified = self.check_access()

        if not url and path:
            url = '{}/{}'.format(self.base_url, path)

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if not headers:
            headers = {}

        # API Version: https://developer.github.com/v3/#current-version
        if not version:
            version = 'v3'
        headers['Accept'] = 'application/vnd.github.{}+json'.format(version)

        # Authentication: https://developer.github.com/v3/#authentication
        headers['Authorization'] = 'Token {}'.format(self.__api_token)

        if self.__user_agent:
            headers['User-Agent'] = self.__user_agent

        if method == 'POST':
            headers['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(
                method=method,
                url=url,
                headers=headers,
                json=json,
                **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        # Pagination: https://developer.github.com/v3/guides/traversing-with-pagination/
        links_header = response.headers.get('Link')
        links = []
        next_url = None
        if links_header:
            links = links_header.split(',')
        for link in links:
            try:
                url, rel = re.search(r'^\<(https.*)\>; rel\=\"(.*)\"$', link.strip()).groups()
                if rel == 'next':
                    next_url = url
            except AttributeError:
                next_url = None

        # last-modified: https://developer.github.com/v3/#conditional-requests
        last_modified = response.headers.get('Last-Modified')
        last_modified_str = None
        if last_modified:
            last_modified_dttm = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
            last_modified_str = last_modified_dttm.strftime("%Y-%m-%dT%H:%M:%SZ")

        # 304: File Not Modified status_code
        if response.status_code == 304:
            LOGGER.warning('304: FILE NOT UPDATED, Stream: {}, URL: {}'.format(endpoint, url))
            return None, next_url, last_modified_str
        # Catch 403 error with message:
        #  "You have triggered an abuse detection mechanism. Please wait a few minutes before you try again."
        # Reference: https://developer.github.com/v3/#abuse-rate-limits
        if response.status_code == 403:
            response_json = response.json()
            response_message = response_json.get('message', '')
            if 'abuse detection mechanism.' in response_message:
                # Wait 3 minutes
                LOGGER.warning('Abuse Detection 403 Error: API triggered an abuse detection mechanism. Waiting 3 mins and trying again.')
                time.sleep(180) # Wait for 3 minutes
                raise AbuseDetection403Error(response)

        if response.status_code != 200:
            raise_for_error(response)

        response_json = response.json()

        return response_json, next_url, last_modified_str
Example #19
0
 def aqua_request(self, method, url, **kwargs):
     with metrics.http_request_timer(url):
         url = self.get_url(url, rest=False)
         return self._request(method, url, auth=self.aqua_auth, **kwargs)
Example #20
0
def sync_statistics_for_day(
    config,
    state,
    stream,
    sdk_client,
    token,
    start,
    report_metrics,
    report_dimensions,
):  # pylint: disable=too-many-locals
    """Sync and output Criteo Statistics endpoint for one day."""
    mdata = metadata.to_map(stream.metadata)
    stats_query = {
        "report_type": stream.tap_stream_id,
        "dimensions": report_dimensions,
        "metrics": report_metrics,
        "start_date": start.strftime("%Y-%m-%d"),
        "end_date": start.strftime("%Y-%m-%d"),
        "currency": metadata.get(mdata, (), "tap-criteo.currency"),
    }
    # Filter advertiser_ids if defined in config
    advertiser_ids = config.get("advertiser_ids")
    if advertiser_ids:
        stats_query["advertiserId"] = advertiser_ids
    # Add ignore_x_device if defined in metadata
    ignore_x_device = metadata.get(mdata, (), "tap-criteo.ignoreXDevice")
    if ignore_x_device:
        stats_query["tap-criteo.ignoreXDevice"] = ignore_x_device

    # Fetch the report as a csv string
    with metrics.http_request_timer(stream.tap_stream_id):
        result = get_statistics_report(sdk_client, stats_query, token=token)

    csv_reader = parse_csv_string(mdata, result)
    with metrics.record_counter(stream.tap_stream_id) as counter:
        time_extracted = utils.now()

        with Transformer() as bumble_bee:
            for row in csv_reader:
                row["_sdc_report_datetime"] = REPORT_RUN_DATETIME
                row["_sdc_report_currency"] = metadata.get(
                    mdata, (), "tap-criteo.currency")
                row = bumble_bee.transform(row, stream.schema.to_dict())

                singer.write_record(stream.stream,
                                    row,
                                    time_extracted=time_extracted)
                counter.increment()

        if start > get_start_for_stream(config, state, advertiser_ids,
                                        stream.stream):
            LOGGER.info(
                "updating bookmark: %s > %s",
                start,
                get_start_for_stream(config, state, advertiser_ids,
                                     stream.stream),
            )
            bookmarks.write_bookmark(
                state,
                state_key_name(advertiser_ids, stream.stream),
                "date",
                utils.strftime(start),
            )
            singer.write_state(state)
        else:
            LOGGER.info(
                "not updating bookmark: %s <= %s",
                start,
                get_start_for_stream(config, state, advertiser_ids,
                                     stream.stream),
            )

        LOGGER.info(
            "Done syncing %s records for the %s report for " +
            "advertiser_ids %s on %s",
            counter.value,
            stream.stream,
            advertiser_ids,
            start,
        )
Example #21
0
    def get_objects(self):
        updated_at_min = self.get_bookmark()

        stop_time = singer.utils.now().replace(microsecond=0)
        date_window_size = float(
            Context.config.get("date_window_size", DATE_WINDOW_SIZE))

        # Page through till the end of the resultset
        while updated_at_min < stop_time:
            # Bookmarking can also occur on the since_id
            since_id = self.get_since_id() or 1

            if since_id != 1:
                LOGGER.info("Resuming sync from since_id %d", since_id)

            # It's important that `updated_at_min` has microseconds
            # truncated. Why has been lost to the mists of time but we
            # think it has something to do with how the API treats
            # microseconds on its date windows. Maybe it's possible to
            # drop data due to rounding errors or something like that?
            updated_at_max = updated_at_min + datetime.timedelta(
                days=date_window_size)
            if updated_at_max > stop_time:
                updated_at_max = stop_time
            while True:
                status_key = self.status_key or "status"
                query_params = self.get_query_params(since_id, status_key,
                                                     updated_at_min,
                                                     updated_at_max)

                with metrics.http_request_timer(self.name):
                    objects = self.call_api(query_params)

                for obj in objects:
                    if obj.id < since_id:
                        # This verifies the api behavior expectation we
                        # have that all results actually honor the
                        # since_id parameter.
                        raise OutOfOrderIdsError(
                            "obj.id < since_id: {} < {}".format(
                                obj.id, since_id))
                    yield obj

                # You know you're at the end when the current page has
                # less than the request size limits you set.
                if len(objects) < self.results_per_page:
                    # Save the updated_at_max as our bookmark as we've synced all rows up in our
                    # window and can move forward. Also remove the since_id because we want to
                    # restart at 1.
                    Context.state.get('bookmarks',
                                      {}).get(self.name,
                                              {}).pop('since_id', None)
                    self.update_bookmark(utils.strftime(updated_at_max))
                    break

                if objects[-1].id != max([o.id for o in objects]):
                    # This verifies the api behavior expectation we have
                    # that all pages are internally ordered by the
                    # `since_id`.
                    raise OutOfOrderIdsError(
                        "{} is not the max id in objects ({})".format(
                            objects[-1].id, max([o.id for o in objects])))
                since_id = objects[-1].id

                # Put since_id into the state.
                self.update_bookmark(since_id, bookmark_key='since_id')

            updated_at_min = updated_at_max
Example #22
0
def sync_generic_endpoint(config, state, stream, sdk_client, token):
    """Sync a stream which is backed by a generic Criteo endpoint."""
    stream = add_synthetic_keys_to_stream_schema(stream)
    stream = add_synthetic_keys_to_stream_metadata(stream)
    mdata = metadata.to_map(stream.metadata)
    primary_keys = metadata.get(mdata, (), "table-key-properties") or []
    LOGGER.info("{} primary keys are {}".format(stream.stream, primary_keys))
    singer.write_schema(stream.stream, stream.schema.to_dict(), primary_keys)

    advertiser_ids = config.get("advertiser_ids", None)
    if stream.tap_stream_id == "Audiences":
        if not advertiser_ids:
            LOGGER.warn(
                "%s stream needs at least one advertiser_id defined in config"
                % stream.stream)
        for advertiser_id in advertiser_ids.split(","):
            token = refresh_auth_token(sdk_client, token)
            with metrics.http_request_timer(stream.tap_stream_id):
                result = get_audiences_endpoint(sdk_client,
                                                advertiser_id,
                                                token=token)
    else:
        module = GENERIC_ENDPOINT_MAPPINGS[stream.tap_stream_id]["module"]
        method = GENERIC_ENDPOINT_MAPPINGS[stream.tap_stream_id]["method"]
        if stream.tap_stream_id in (
                "Portfolio",
                "AdvertiserInfo",
                "Sellers",
                "SellerBudgets",
                "SellerCampaigns",
        ):
            result = call_generic_endpoint(stream,
                                           sdk_client,
                                           module,
                                           method,
                                           token=token)
        else:
            result = call_generic_endpoint(
                stream,
                sdk_client,
                module,
                method,
                advertiser_ids=advertiser_ids,
                token=token,
            )

    result = convert_keys_snake_to_camel([_.to_dict() for _ in result])

    with metrics.record_counter(stream.tap_stream_id) as counter:
        time_extracted = utils.now()

        with Transformer() as bumble_bee:
            for row in result:
                row["_sdc_report_datetime"] = REPORT_RUN_DATETIME
                row = bumble_bee.transform(row, stream.schema.to_dict())

                singer.write_record(stream.stream,
                                    row,
                                    time_extracted=time_extracted)
                counter.increment()

    LOGGER.info(
        "Done syncing %s records for the %s report for advertiser_ids %s",
        counter.value,
        stream.stream,
        advertiser_ids,
    )
Example #23
0
def sync_report_for_day(stream_name, stream_schema, sdk_client, start,
                        field_list):  # pylint: disable=too-many-locals
    report_downloader = sdk_client.GetReportDownloader(version=VERSION)
    customer_id = sdk_client.client_customer_id
    report = {
        'reportName': 'Seems this is required',
        'dateRangeType': 'CUSTOM_DATE',
        'reportType': stream_name,
        'downloadFormat': 'CSV',
        'selector': {
            'fields': field_list,
            'dateRange': {
                'min': start.strftime('%Y%m%d'),
                'max': start.strftime('%Y%m%d')
            }
        }
    }

    # Fetch the report as a csv string
    with metrics.http_request_timer(stream_name):
        result = attempt_download_report(report_downloader, report)

    headers, csv_reader = parse_csv_stream(result)
    with metrics.record_counter(stream_name) as counter:
        time_extracted = utils.now()

        with Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING
                         ) as bumble_bee:
            for row in csv_reader:
                obj = dict(
                    zip(get_xml_attribute_headers(stream_schema, headers),
                        row))
                obj['_sdc_customer_id'] = customer_id
                obj['_sdc_report_datetime'] = REPORT_RUN_DATETIME

                bumble_bee.pre_hook = transform_pre_hook
                obj = bumble_bee.transform(obj, stream_schema)

                singer.write_record(stream_name,
                                    obj,
                                    time_extracted=time_extracted)
                counter.increment()

        if start > get_start_for_stream(sdk_client.client_customer_id,
                                        stream_name):
            LOGGER.info(
                'updating bookmark: %s > %s', start,
                get_start_for_stream(sdk_client.client_customer_id,
                                     stream_name))
            bookmarks.write_bookmark(
                STATE,
                state_key_name(sdk_client.client_customer_id, stream_name),
                'date', start.strftime(utils.DATETIME_FMT))
            singer.write_state(STATE)
        else:
            LOGGER.info(
                'not updating bookmark: %s <= %s', start,
                get_start_for_stream(sdk_client.client_customer_id,
                                     stream_name))

        LOGGER.info(
            "Done syncing %s records for the %s report for customer_id %s on %s",
            counter.value, stream_name, customer_id, start)
Example #24
0
    def request(self,
                method,
                path=None,
                url=None,
                json=None,
                version=None,
                **kwargs):
        if not self.__verified:
            self.__verified = self.check_access()

        if not version:
            version = 'v2'

        if not url and path:
            url = '{}/{}'.format(self.base_url, path)

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = None

        if 'headers' not in kwargs:
            kwargs['headers'] = {}

        # Version represents API version (e.g. v2): https://api.Ujet.com/?http#versioning
        kwargs['headers']['Accept'] = 'application/vnd.json'.format(version)

        if self.__user_agent:
            kwargs['headers']['User-Agent'] = self.__user_agent

        if method == 'POST':
            kwargs['headers']['Content-Type'] = 'application/json'

        with metrics.http_request_timer(endpoint) as timer:
            response = self.__session.request(method=method,
                                              url=url,
                                              auth=(self.__company_key,
                                                    self.__company_secret),
                                              json=json,
                                              **kwargs)
            timer.tags[metrics.Tag.http_status_code] = response.status_code

        if response.status_code >= 500:
            raise Server5xxError()

        if response.status_code != 200:
            raise_for_error(response)

        # pagination details are returned in the header: total, per-page, next url
        total_records = int(response.headers.get('total', 0))

        # Not returning currently due to client API bug
        per_page = total_records = int(response.headers.get('per-page', 0))
        next_url = None
        if ((response.headers.get('link') is not None)
                and ('link' in response.headers)):
            links = response.headers.get('link').split(',')
            next_url = None
            for link in links:
                try:
                    url, rel = re.search(r'^\<(https.*)\>; rel\=\"(.*)\"$',
                                         link.strip()).groups()
                    if rel == 'next':
                        next_url = url
                except AttributeError:
                    next_url = None

        return response.json(), total_records, next_url
Example #25
0
 def wrapped_request(*args, **kwargs):
     url = args[1]
     match = re.match(r'http[s]?://api\.stripe\.com/v1/(\w+)\??', url)
     stream_name = match.groups()[0]
     with metrics.http_request_timer(stream_name):
         return _original_request(*args, **kwargs)
Example #26
0
    def request_export(self,
                       method,
                       url=None,
                       path=None,
                       params=None,
                       json=None,
                       **kwargs):
        if not self.__verified:
            self.__verified = self.check_access()

        if url and path:
            url = '{}/{}'.format(url, path)
        elif path and not url:
            url = 'https://data.mixpanel.com/api/2.0/{}'.format(path)

        if 'endpoint' in kwargs:
            endpoint = kwargs['endpoint']
            del kwargs['endpoint']
        else:
            endpoint = 'export'

        if 'headers' not in kwargs:
            kwargs['headers'] = {}

        kwargs['headers']['Accept'] = 'application/json'

        if self.__user_agent:
            kwargs['headers']['User-Agent'] = self.__user_agent

        if method == 'POST':
            kwargs['headers']['Content-Type'] = 'application/json'

        kwargs['headers']['Authorization'] = 'Basic {}'.format(
            str(base64.urlsafe_b64encode(self.__api_secret.encode("utf-8")),
                "utf-8"))
        with metrics.http_request_timer(endpoint) as timer:
            with self.__session.request(method=method,
                                        url=url,
                                        params=params,
                                        json=json,
                                        stream=True,
                                        timeout=180,
                                        **kwargs) as response:

                if response.status_code >= 500:
                    raise Server5xxError()

                if response.status_code != 200:
                    raise_for_error(response)

                # export endpoint returns jsonl results;
                #  other endpoints return json with array of results
                #  jsonlines reference: https://jsonlines.readthedocs.io/en/latest/

                if response.text == '':
                    LOGGER.warning('/export API response empty')
                    yield None
                else:
                    file_like_object = io.StringIO(response.text)
                    reader = jsonlines.Reader(file_like_object)
                    for record in reader.iter(allow_none=True,
                                              skip_empty=True):
                        yield record

            timer.tags[metrics.Tag.http_status_code] = response.status_code