예제 #1
0
파일: base.py 프로젝트: walexi/ichnaea
        def closure(request, *args, **kwargs):
            api_key = request.GET.get('key', None)
            heka_client = get_heka_client()
            stats_client = request.registry.stats_client

            if api_key is None:
                stats_client.incr('%s.no_api_key' % func_name)
                if error_on_invalidkey:
                    return invalid_api_key_response()

            session = request.db_slave_session
            try:
                result = session.execute(API_CHECK.bindparams(api_key=api_key))
                found_key = result.fetchone()
            except Exception:  # pragma: no cover
                # if we cannot connect to backend DB, skip api key check
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.dbfailure_skip_api_key' % func_name)
                return func(request, *args, **kwargs)

            if found_key is not None:
                maxreq, api_key_log, shortname = found_key
                if not shortname:  # pragma: no cover
                    shortname = api_key

                # remember api key and shortname on the request
                request.api_key_log = bool(api_key_log)
                request.api_key_name = shortname

                stats_client.incr('%s.api_key.%s' % (func_name, shortname))
                should_limit = rate_limit(request.registry.redis_client,
                                          api_key,
                                          maxreq=maxreq)
                if should_limit:
                    result = HTTPForbidden()
                    result.content_type = 'application/json'
                    result.body = DAILY_LIMIT
                    return result
                elif should_limit is None:  # pragma: no cover
                    # We couldn't connect to Redis
                    stats_client.incr('%s.redisfailure_skip_limit' % func_name)
            else:
                stats_client.incr('%s.unknown_api_key' % func_name)
                if error_on_invalidkey:
                    return invalid_api_key_response()

                # provide the same api log/name attributes
                request.api_key_log = False
                request.api_key_name = None

            return func(request, *args, **kwargs)
예제 #2
0
파일: locate.py 프로젝트: boostrack/ichnaea
    def __init__(self, api_key_name, api_key_log, api_name):
        """
        A StatsLogger sends counted and timed named statistics to
        a statistic aggregator client.

        :param api_key_name: Human readable API key name
            (for example 'test_1')
        :type api_key_name: str
        :param api_key_log: Gather additional API key specific stats?
        :type api_key_log: bool
        :param api_name: Name of the API, used as stats prefix
            (for example 'geolocate')
        :type api_name: str
        """
        self.api_key_name = api_key_name
        self.api_key_log = api_key_log
        self.api_name = api_name
        self.heka_client = get_heka_client()
        self.stats_client = get_stats_client()
예제 #3
0
        def closure(request, *args, **kwargs):
            api_key = request.GET.get('key', None)
            heka_client = get_heka_client()
            stats_client = request.registry.stats_client

            if api_key is None:
                stats_client.incr('%s.no_api_key' % func_name)
                if error_on_invalidkey:
                    return invalid_api_key_response()

            session = request.db_slave_session
            try:
                result = session.execute(API_CHECK.bindparams(api_key=api_key))
                found_key = result.fetchone()
            except Exception:
                # if we cannot connect to backend DB, skip api key check
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.dbfailure_skip_api_key' % func_name)
                return func(request, *args, **kwargs)

            if found_key is not None:
                maxreq, shortname = found_key
                if not shortname:
                    shortname = api_key
                stats_client.incr('%s.api_key.%s' % (func_name, shortname))
                should_limit = rate_limit(request.registry.redis_client,
                                          api_key, maxreq=maxreq)
                if should_limit:
                    result = HTTPForbidden()
                    result.content_type = 'application/json'
                    result.body = DAILY_LIMIT
                    return result
                elif should_limit is None:
                    # We couldn't connect to Redis
                    stats_client.incr('%s.redisfailure_skip_limit' % func_name)
            else:
                stats_client.incr('%s.unknown_api_key' % func_name)
                if error_on_invalidkey:
                    return invalid_api_key_response()

            return func(request, *args, **kwargs)
예제 #4
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None,
                       result_type='position'):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    :param api_key_log: Enable additional api key specific logging?
    :param api_key_name: The metric friendly api key name.
    :param result_type: What kind of result to return, either a lat/lon
                        position or a country estimate.
    """

    if result_type not in ('country', 'position'):
        raise ValueError('Invalid result_type, must be one of '
                         'position or country')

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key)

    found_cells = []

    # Query all cells and OCID cells
    for model in Cell, OCIDCell, CellArea:
        cell_filter = []
        for key in validated['cell']:
            # create a list of 'and' criteria for cell keys
            criterion = join_cellkey(model, key)
            cell_filter.append(and_(*criterion))

        if cell_filter:
            # only do a query if we have cell results, or this will match
            # all rows in the table
            load_fields = ('radio', 'mcc', 'mnc', 'lac', 'lat', 'lon', 'range')
            query = (session.query(model).options(
                load_only(*load_fields)).filter(or_(*cell_filter)).filter(
                    model.lat.isnot(None)).filter(model.lon.isnot(None)))

            try:
                found_cells.extend(query.all())
            except Exception:
                heka_client.raven(RAVEN_ERROR)

    if found_cells:
        # Group all found_cellss by location area
        lacs = defaultdict(list)
        for cell in found_cells:
            cellarea_key = (cell.radio, cell.mcc, cell.mnc, cell.lac)
            lacs[cellarea_key].append(cell)

        def sort_lac(v):
            # use the lac with the most values,
            # or the one with the smallest range
            return (len(v), -min([e.range for e in v]))

        # If we get data from multiple location areas, use the one with the
        # most data points in it. That way a lac with a cell hit will
        # have two entries and win over a lac with only the lac entry.
        lac = sorted(lacs.values(), key=sort_lac, reverse=True)

        for cell in lac[0]:
            # The first entry is the key,
            # used only to distinguish cell from lac
            network = Network(key=None,
                              lat=cell.lat,
                              lon=cell.lon,
                              range=cell.range)
            if type(cell) is CellArea:
                validated['cell_lac_network'].append(network)
            else:
                validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db, stats_client)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field], stats_client,
                              api_name)
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))

    if result_type == 'position':
        rounded_result = {
            'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
            'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
            'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
        }
        stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                            rounded_result['accuracy'])
        return rounded_result
    elif result_type == 'country':
        if countries:
            country = iso3166.countries.get(countries[0])
            return {
                'country_name': country.name,
                'country_code': country.alpha2
            }
예제 #5
0
파일: task.py 프로젝트: walexi/ichnaea
 def heka_client(self):
     return get_heka_client()
예제 #6
0
def search_all_sources(session, api_name, data,
                       client_addr=None, geoip_db=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pass-through wifi data
    validated['wifi'] = data.get('wifi', [])

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res, countries) = geoip_and_best_guess_country_codes(
        validated['cell'], api_name, client_addr, geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
            ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
            ('cell', 'cell_network', 'cell', search_cell),
            ('wifi', 'wifi', 'wifi', search_wifi)]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' %
                                  (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' %
                                  (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' %
                                  (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']),
                               float(result['lon']), lat, lon) * 1000
                      <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result
예제 #7
0
 def heka_client(self):
     return get_heka_client()
예제 #8
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result