Beispiel #1
0
def search_cell_lac(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    lacs = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        cell['cid'] = CELLID_LAC
        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            lacs.append(Network(key, *result))

    if not lacs:
        return

    # take the smallest LAC of any the user is inside
    lac = sorted(lacs, key=operator.attrgetter('range'))[0]

    return {
        'lat': quantize(lac.lat),
        'lon': quantize(lac.lon),
        'accuracy': max(LAC_MIN_ACCURACY, lac.range),
    }
Beispiel #2
0
def make_cell_import_dict(row):

    def val(key, default):
        if key in row and row[key] != '' and row[key] is not None:
            return row[key]
        else:
            return default

    d = dict()

    d['created'] = datetime.fromtimestamp(
        int(val('created', 0))).replace(tzinfo=UTC)

    d['modified'] = datetime.fromtimestamp(
        int(val('updated', 0))).replace(tzinfo=UTC)

    d['lat'] = float(val('lat', -255))
    d['lon'] = float(val('lon', -255))

    d['radio'] = RADIO_TYPE.get(row['radio'].lower(), -1)

    for k in ['mcc', 'mnc', 'lac', 'cid', 'psc']:
        d[k] = int(val(k, -1))

    d['range'] = int(float(val('range', 0)))

    d['total_measures'] = int(val('samples', -1))
    d['changeable'] = bool(val('changeable', True))
    return normalized_cell_dict(d)
Beispiel #3
0
def search_cell(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            cells.append(Network(key, *result))

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c.lat for c in cells]) / length
    avg_lon = sum([c.lon for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      cells, CELL_MIN_ACCURACY),
    }
Beispiel #4
0
def search_all_sources(session, api_name, data,
                       client_addr=None, geoip_db=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pass-through wifi data
    validated['wifi'] = data.get('wifi', [])

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because we at _least_ want to use the
    # country estimate to filter out bogus requests. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res, countries) = geoip_and_best_guess_country_codes(
        validated['cell'], api_name, client_addr, geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
            ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
            ('cell', 'cell_network', 'cell', search_cell),
            ('wifi', 'wifi', 'wifi', search_wifi)]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' %
                                  (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' %
                                  (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' %
                                  (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Otherwise at least accept the first result we get.
                elif result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']),
                               float(result['lon']), lat, lon) * 1000
                      <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result