Ejemplo n.º 1
0
def mark_moving_cells(session, moving_cells):
    moving_keys = []
    blacklist = set()
    for cell in moving_cells:
        query = session.query(CellBlacklist).filter(
            *join_cellkey(CellBlacklist, cell))
        b = query.first()
        if b is None:
            key = to_cellkey(cell)._asdict()
            blacklist.add(CellBlacklist(**key))
            moving_keys.append(key)

    get_heka_client().incr("items.blacklisted.cell_moving", len(moving_keys))
    session.add_all(blacklist)
    remove_cell.delay(moving_keys)
Ejemplo n.º 2
0
def submit_view(request):
    api_key = request.GET.get('key', None)
    heka_client = get_heka_client()

    if api_key is None:
        # we don't require API keys for submit yet
        heka_client.incr('submit.no_api_key')
    else:
        heka_client.incr('submit.api_key.%s' % api_key.replace('.', '__'))

    data, errors = preprocess_request(
        request,
        schema=SubmitSchema(),
        extra_checks=(submit_validator, ),
    )

    items = data['items']
    nickname = request.headers.get('X-Nickname', u'')
    if isinstance(nickname, str):
        nickname = nickname.decode('utf-8', 'ignore')
    # batch incoming data into multiple tasks, in case someone
    # manages to submit us a huge single request
    for i in range(0, len(items), 100):
        insert_measures.delay(
            # TODO convert items to json with support for decimal/datetime
            items=dumps(items[i:i + 100]),
            nickname=nickname,
        )
    return HTTPNoContent()
Ejemplo n.º 3
0
def mark_moving_wifis(session, moving_wifis):
    moving_keys = set([wifi.key for wifi in moving_wifis])
    utcnow = datetime.utcnow()
    query = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(moving_keys))
    already_blocked = set([a[0] for a in query.all()])
    moving_keys = moving_keys - already_blocked
    if not moving_keys:
        return
    for key in moving_keys:
        # on duplicate key, do a no-op change
        stmt = WifiBlacklist.__table__.insert(
            on_duplicate='created=created').values(key=key, created=utcnow)
        session.execute(stmt)
    get_heka_client().incr("items.blacklisted.wifi_moving", len(moving_keys))
    remove_wifi.delay(list(moving_keys))
Ejemplo n.º 4
0
def submit_view(request):
    api_key = request.GET.get('key', None)
    heka_client = get_heka_client()

    if api_key is None:
        # we don't require API keys for submit yet
        heka_client.incr('submit.no_api_key')
    else:
        heka_client.incr('submit.api_key.%s' % api_key.replace('.', '__'))

    data, errors = preprocess_request(
        request,
        schema=SubmitSchema(),
        extra_checks=(submit_validator, ),
    )

    items = data['items']
    nickname = request.headers.get('X-Nickname', u'')
    if isinstance(nickname, str):
        nickname = nickname.decode('utf-8', 'ignore')
    # batch incoming data into multiple tasks, in case someone
    # manages to submit us a huge single request
    for i in range(0, len(items), 100):
        insert_measures.delay(
            # TODO convert items to json with support for decimal/datetime
            items=dumps(items[i:i + 100]),
            nickname=nickname,
        )
    return HTTPNoContent()
Ejemplo n.º 5
0
def mark_moving_cells(session, moving_cells):
    moving_keys = []
    blacklist = set()
    for cell in moving_cells:
        query = session.query(CellBlacklist).filter(
            *join_cellkey(CellBlacklist, cell))
        b = query.first()
        if b is None:
            key = to_cellkey(cell)._asdict()
            blacklist.add(CellBlacklist(**key))
            moving_keys.append(key)

    get_heka_client().incr("items.blacklisted.cell_moving",
                           len(moving_keys))
    session.add_all(blacklist)
    remove_cell.delay(moving_keys)
Ejemplo n.º 6
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cell_key = CellKey(measure['radio'], measure['mcc'],
                               measure['mnc'], measure['lac'],
                               measure['cid'], measure['psc'])
            cells[cell_key].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Ejemplo n.º 7
0
def mark_moving_wifis(session, moving_wifis):
    moving_keys = set([wifi.key for wifi in moving_wifis])
    utcnow = datetime.utcnow()
    query = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(moving_keys))
    already_blocked = set([a[0] for a in query.all()])
    moving_keys = moving_keys - already_blocked
    if not moving_keys:
        return
    for key in moving_keys:
        # on duplicate key, do a no-op change
        stmt = WifiBlacklist.__table__.insert(
            on_duplicate='created=created').values(
            key=key, created=utcnow)
        session.execute(stmt)
    get_heka_client().incr("items.blacklisted.wifi_moving",
                           len(moving_keys))
    remove_wifi.delay(list(moving_keys))
Ejemplo n.º 8
0
def geolocate_view(request):
    heka_client = get_heka_client()

    data, errors = preprocess_request(
        request,
        schema=GeoLocateSchema(),
        extra_checks=(geolocate_validator, ),
        response=JSONError,
        accept_empty=True,
    )

    session = request.db_slave_session
    result = None

    if data and data['wifiAccessPoints']:
        result = search_wifi_ap(session, data)
        if result is not None:
            heka_client.incr('geolocate.wifi_hit')
            heka_client.timer_send('geolocate.accuracy.wifi',
                                   result['accuracy'])
    elif data:
        result = search_cell_tower(session, data)
        if result is not None:
            heka_client.incr('geolocate.cell_hit')
            heka_client.timer_send('geolocate.accuracy.cell',
                                   result['accuracy'])

        if result is None:
            result = search_cell_tower_lac(session, data)
            if result is not None:
                heka_client.incr('geolocate.cell_lac_hit')
                heka_client.timer_send('geolocate.accuracy.cell_lac',
                                       result['accuracy'])

    if result is None and request.client_addr:
        result = search_geoip(request.registry.geoip_db,
                              request.client_addr)
        if result is not None:
            heka_client.incr('geolocate.geoip_hit')
            heka_client.timer_send('geolocate.accuracy.geoip',
                                   result['accuracy'])

    if result is None:
        heka_client.incr('geolocate.miss')
        result = HTTPNotFound()
        result.content_type = 'application/json'
        result.body = NOT_FOUND
        return result

    return {
        "location": {
            "lat": result['lat'],
            "lng": result['lon'],
        },
        "accuracy": float(result['accuracy']),
    }
Ejemplo n.º 9
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Ejemplo n.º 10
0
def search_view(request):
    heka_client = get_heka_client()

    data, errors = preprocess_request(
        request,
        schema=SearchSchema(),
        extra_checks=(check_cell_or_wifi, ),
        accept_empty=True,
    )

    session = request.db_slave_session
    result = None

    if data and data['wifi']:
        result = search_wifi(session, data)
        if result is not None:
            heka_client.incr('search.wifi_hit')
            heka_client.timer_send('search.accuracy.wifi',
                                   result['accuracy'])

    if result is None and data:
        # no wifi result found, fall back to cell
        result = search_cell(session, data)
        if result is not None:
            heka_client.incr('search.cell_hit')
            heka_client.timer_send('search.accuracy.cell',
                                   result['accuracy'])

    if result is None and data:
        # no direct cell result found, try cell LAC
        result = search_cell_lac(session, data)
        if result is not None:
            heka_client.incr('search.cell_lac_hit')
            heka_client.timer_send('search.accuracy.cell_lac',
                                   result['accuracy'])

    if result is None and request.client_addr:
        # no cell or wifi, fall back again to geoip
        result = search_geoip(request.registry.geoip_db,
                              request.client_addr)
        if result is not None:
            heka_client.incr('search.geoip_hit')
            heka_client.timer_send('search.accuracy.geoip',
                                   result['accuracy'])

    if result is None:
        heka_client.incr('search.miss')
        return {'status': 'not_found'}

    return {
        'status': 'ok',
        'lat': result['lat'],
        'lon': result['lon'],
        'accuracy': result['accuracy'],
    }
Ejemplo n.º 11
0
def geolocate_view(request):
    api_key = request.GET.get('key', None)
    heka_client = get_heka_client()

    if api_key is None:
        heka_client.incr('geolocate.no_api_key')

        result = HTTPBadRequest()
        result.content_type = 'application/json'
        result.body = NO_API_KEY
        return result

    heka_client.incr('geolocate.api_key.%s' % api_key.replace('.', '__'))

    data, errors = preprocess_request(
        request,
        schema=GeoLocateSchema(),
        extra_checks=(geolocate_validator, ),
        response=JSONError,
    )

    session = request.db_slave_session
    result = None

    if data['wifiAccessPoints']:
        result = search_wifi_ap(session, data)
        if result is not None:
            heka_client.incr('geolocate.wifi_hit')
    else:
        result = search_cell_tower(session, data)
        if result is not None:
            heka_client.incr('geolocate.cell_hit')

    if result is None and request.client_addr:
        result = search_geoip(request.registry.geoip_db,
                              request.client_addr)
        if result is not None:
            heka_client.incr('geolocate.geoip_hit')

    if result is None:
        heka_client.incr('geolocate.miss')
        result = HTTPNotFound()
        result.content_type = 'application/json'
        result.body = NOT_FOUND
        return result

    return {
        "location": {
            "lat": result['lat'],
            "lng": result['lon'],
        },
        "accuracy": float(result['accuracy']),
    }
Ejemplo n.º 12
0
def geolocate_view(request):
    api_key = request.GET.get('key', None)
    heka_client = get_heka_client()

    if api_key is None:
        heka_client.incr('geolocate.no_api_key')

        result = HTTPBadRequest()
        result.content_type = 'application/json'
        result.body = NO_API_KEY
        return result

    heka_client.incr('geolocate.api_key.%s' % api_key.replace('.', '__'))

    data, errors = preprocess_request(
        request,
        schema=GeoLocateSchema(),
        extra_checks=(geolocate_validator, ),
        response=JSONError,
    )

    session = request.db_slave_session
    result = None

    if data['wifiAccessPoints']:
        result = search_wifi_ap(session, data)
        if result is not None:
            heka_client.incr('geolocate.wifi_hit')
    else:
        result = search_cell_tower(session, data)
        if result is not None:
            heka_client.incr('geolocate.cell_hit')

    if result is None and request.client_addr:
        result = search_geoip(request.registry.geoip_db, request.client_addr)
        if result is not None:
            heka_client.incr('geolocate.geoip_hit')

    if result is None:
        heka_client.incr('geolocate.miss')
        result = HTTPNotFound()
        result.content_type = 'application/json'
        result.body = NOT_FOUND
        return result

    return {
        "location": {
            "lat": result['lat'],
            "lng": result['lon'],
        },
        "accuracy": float(result['accuracy']),
    }
Ejemplo n.º 13
0
def search_view(request):
    api_key = request.GET.get('key', None)
    heka_client = get_heka_client()

    if api_key is None:
        # TODO: change into a better error response
        heka_client.incr('search.no_api_key')
        return {'status': 'not_found'}

    heka_client.incr('search.api_key')

    data, errors = preprocess_request(
        request,
        schema=SearchSchema(),
        extra_checks=(check_cell_or_wifi, ),
    )

    session = request.db_slave_session
    result = None

    if data['wifi']:
        result = search_wifi(session, data)
        if result is not None:
            heka_client.incr('search.wifi_hit')
    if result is None:
        # no wifi result found, fall back to cell
        result = search_cell(session, data)
        if result is not None:
            heka_client.incr('search.cell_hit')
    if result is None and request.client_addr:
        # no cell or wifi, fall back again to geoip
        result = search_geoip(request.registry.geoip_db,
                              request.client_addr)
        if result is not None:
            heka_client.incr('search.geoip_hit')

    if result is None:
        heka_client.incr('search.miss')
        return {'status': 'not_found'}

    return {
        'status': 'ok',
        'lat': result['lat'],
        'lon': result['lon'],
        'accuracy': result['accuracy'],
    }
Ejemplo n.º 14
0
def search_view(request):
    heka_client = get_heka_client()

    data, errors = preprocess_request(
        request,
        schema=SearchSchema(),
        extra_checks=(check_cell_or_wifi, ),
    )

    session = request.db_slave_session
    result = None

    if data['wifi']:
        result = search_wifi(session, data)
        if result is not None:
            heka_client.incr('search.wifi_hit')
    if result is None:
        # no wifi result found, fall back to cell
        result = search_cell(session, data)
        if result is not None:
            heka_client.incr('search.cell_hit')
    if result is None:
        # no direct cell result found, try cell LAC
        result = search_cell_lac(session, data)
        if result is not None:
            heka_client.incr('search.cell_lac_hit')
    if result is None and request.client_addr:
        # no cell or wifi, fall back again to geoip
        result = search_geoip(request.registry.geoip_db,
                              request.client_addr)
        if result is not None:
            heka_client.incr('search.geoip_hit')

    if result is None:
        heka_client.incr('search.miss')
        return {'status': 'not_found'}

    return {
        'status': 'ok',
        'lat': result['lat'],
        'lon': result['lon'],
        'accuracy': result['accuracy'],
    }
Ejemplo n.º 15
0
        def closure(request, *args, **kwargs):
            api_key = request.GET.get('key', None)
            heka_client = get_heka_client()

            if api_key is None:
                heka_client.incr('%s.no_api_key' % func_name)
                if error_on_invalidkey:
                    result = HTTPBadRequest()
                    result.content_type = 'application/json'
                    result.body = NO_API_KEY
                    return result
            else:
                session = request.db_slave_session
                found_key_filter = session.query(ApiKey)
                found_key_filter = found_key_filter.filter(ApiKey.valid_key == api_key)
                if found_key_filter.count():
                    heka_client.incr('%s.api_key.%s' % (func_name, api_key.replace('.', '__')))
                else:
                    heka_client.incr('%s.unknown_api_key' % func_name)

            return func(request, *args, **kwargs)
Ejemplo n.º 16
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                *join_cellkey(Cell, cell_key))
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            *join_cellkey(Cell, cell_key))
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Ejemplo n.º 17
0
 def heka_client(self):
     return get_heka_client()
Ejemplo n.º 18
0
def process_wifi_measures(session, entries, userid=None,
                          max_measures_per_wifi=11000):
    wifi_measures = []
    wifi_count = defaultdict(int)
    wifi_keys = set([e['key'] for e in entries])

    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    # did we get measures for blacklisted wifis?
    blacked = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(wifi_keys)).all()
    blacked = set([b[0] for b in blacked])

    space_available = {}
    dropped_overflow = 0

    # process entries
    for entry in entries:
        wifi_key = entry['key']

        # check if there's space for new measurement within per-AP maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if wifi_key not in space_available:
            query = session.query(Wifi.total_measures).filter(
                Wifi.key == wifi_key)
            curr = query.first()
            if curr is not None:
                space_available[wifi_key] = max_measures_per_wifi - curr[0]
            else:
                space_available[wifi_key] = max_measures_per_wifi

        if space_available[wifi_key] > 0:
            space_available[wifi_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # convert frequency into channel numbers and remove frequency
        convert_frequency(entry)
        wifi_measures.append(create_wifi_measure(utcnow, entry))
        if wifi_key not in blacked:
            # skip blacklisted wifi AP's
            wifi_count[wifi_key] += 1

    heka_client = get_heka_client()

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.wifi_ingress_overflow",
                         count=dropped_overflow)

    # update user score
    if userid is not None:
        # do we already know about any wifis?
        white_keys = wifi_keys - blacked
        if white_keys:
            wifis = session.query(Wifi.key).filter(Wifi.key.in_(white_keys))
            wifis = dict([(w[0], True) for w in wifis.all()])
        else:
            wifis = {}
        # subtract known wifis from all unique wifis
        new_wifis = len(wifi_count) - len(wifis)
        if new_wifis > 0:
            process_score(userid, new_wifis, session, key='new_wifi')

    # update new/total measure counts
    for wifi_key, num in wifi_count.items():
        stmt = Wifi.__table__.insert(
            on_duplicate='new_measures = new_measures + %s, '
                         'total_measures = total_measures + %s' % (num, num)
        ).values(
            key=wifi_key, created=utcnow,
            new_measures=num, total_measures=num)
        session.execute(stmt)

    heka_client.incr("items.inserted.wifi_measures",
                     count=len(wifi_measures))
    session.add_all(wifi_measures)
    return wifi_measures
Ejemplo n.º 19
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                Cell.radio == cell_key.radio,
                Cell.mcc == cell_key.mcc,
                Cell.mnc == cell_key.mnc,
                Cell.lac == cell_key.lac,
                Cell.cid == cell_key.cid,
                Cell.psc == cell_key.psc)
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            Cell.radio == cell_measure.radio,
            Cell.mcc == cell_measure.mcc,
            Cell.mnc == cell_measure.mnc,
            Cell.lac == cell_measure.lac,
            Cell.cid == cell_measure.cid,
            Cell.psc == cell_measure.psc)
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Ejemplo n.º 20
0
 def heka_client(self):
     return get_heka_client()
Ejemplo n.º 21
0
def process_wifi_measures(session, entries, userid=None,
                          max_measures_per_wifi=11000):
    wifi_measures = []
    wifi_count = defaultdict(int)
    wifi_keys = set([e['key'] for e in entries])

    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    # did we get measures for blacklisted wifis?
    blacked = session.query(WifiBlacklist.key).filter(
        WifiBlacklist.key.in_(wifi_keys)).all()
    blacked = set([b[0] for b in blacked])

    space_available = {}
    dropped_overflow = 0

    # process entries
    for entry in entries:
        wifi_key = entry['key']

        # check if there's space for new measurement within per-AP maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if wifi_key not in space_available:
            query = session.query(Wifi.total_measures).filter(
                Wifi.key == wifi_key)
            curr = query.first()
            if curr is not None:
                space_available[wifi_key] = max_measures_per_wifi - curr[0]
            else:
                space_available[wifi_key] = max_measures_per_wifi

        if space_available[wifi_key] > 0:
            space_available[wifi_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # convert frequency into channel numbers and remove frequency
        convert_frequency(entry)
        wifi_measures.append(create_wifi_measure(utcnow, entry))
        if wifi_key not in blacked:
            # skip blacklisted wifi AP's
            wifi_count[wifi_key] += 1

    heka_client = get_heka_client()

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.wifi_ingress_overflow",
                         count=dropped_overflow)

    # update user score
    if userid is not None:
        # do we already know about any wifis?
        white_keys = wifi_keys - blacked
        if white_keys:
            wifis = session.query(Wifi.key).filter(Wifi.key.in_(white_keys))
            wifis = dict([(w[0], True) for w in wifis.all()])
        else:
            wifis = {}
        # subtract known wifis from all unique wifis
        new_wifis = len(wifi_count) - len(wifis)
        if new_wifis > 0:
            process_score(userid, new_wifis, session, key='new_wifi')

    # update new/total measure counts
    for wifi_key, num in wifi_count.items():
        stmt = Wifi.__table__.insert(
            on_duplicate='new_measures = new_measures + %s, '
                         'total_measures = total_measures + %s' % (num, num)
        ).values(
            key=wifi_key, created=utcnow,
            new_measures=num, total_measures=num)
        session.execute(stmt)

    heka_client.incr("items.inserted.wifi_measures",
                     count=len(wifi_measures))
    session.add_all(wifi_measures)
    return wifi_measures
Ejemplo n.º 22
0
def search_all_sources(session, api_name, data,
                       client_addr=None, geoip_db=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pass-through wifi data
    validated['wifi'] = data.get('wifi', [])

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because we at _least_ want to use the
    # country estimate to filter out bogus requests. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res, countries) = geoip_and_best_guess_country_codes(
        validated['cell'], api_name, client_addr, geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
            ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
            ('cell', 'cell_network', 'cell', search_cell),
            ('wifi', 'wifi', 'wifi', search_wifi)]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' %
                                  (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' %
                                  (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' %
                                  (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Otherwise at least accept the first result we get.
                elif result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']),
                               float(result['lon']), lat, lon) * 1000
                      <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result