Exemple #1
0
def search_cell(session, data):
    sql_null = None  # avoid pep8 warning
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        if cell.get('radio'):
            radio = RADIO_TYPE.get(cell['radio'], -1)

        query = session.query(Cell.lat, Cell.lon).filter(
            Cell.radio == radio).filter(
            Cell.mcc == cell['mcc']).filter(
            Cell.mnc == cell['mnc']).filter(
            Cell.lac == cell['lac']).filter(
            Cell.cid == cell['cid']).filter(
            Cell.lat != sql_null).filter(
            Cell.lon != sql_null
        )
        result = query.first()
        if result is not None:
            cells.append(result)

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c[0] for c in cells]) / length
    avg_lon = sum([c[1] for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': 35000,
    }
Exemple #2
0
def search_cell(session, data):
    sql_null = None  # avoid pep8 warning
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        if cell.get('radio'):
            radio = RADIO_TYPE.get(cell['radio'], -1)

        query = session.query(Cell.lat, Cell.lon).filter(
            Cell.radio == radio).filter(Cell.mcc == cell['mcc']).filter(
                Cell.mnc == cell['mnc']).filter(
                    Cell.lac == cell['lac']).filter(
                        Cell.cid == cell['cid']).filter(
                            Cell.lat != sql_null).filter(Cell.lon != sql_null)
        result = query.first()
        if result is not None:
            cells.append(result)

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c[0] for c in cells]) / length
    avg_lon = sum([c[1] for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': 35000,
    }
Exemple #3
0
def search_cell(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    cell = data['cell'][0]
    if cell.get('radio'):
        radio = RADIO_TYPE.get(cell['radio'], -1)
    mcc = cell['mcc']
    mnc = cell['mnc']
    lac = cell['lac']
    cid = cell['cid']

    query = session.query(Cell)
    query = query.filter(Cell.radio == radio)
    query = query.filter(Cell.mcc == mcc)
    query = query.filter(Cell.mnc == mnc)
    query = query.filter(Cell.cid == cid)

    if lac >= 0:
        query = query.filter(Cell.lac == lac)

    result = query.first()
    if result is None:
        return

    return {
        'lat': quantize(result.lat),
        'lon': quantize(result.lon),
        'accuracy': 35000,
    }
Exemple #4
0
def search_cell_lac(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    lacs = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        cell['cid'] = CELLID_LAC
        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            lacs.append(Network(key, *result))

    if not lacs:
        return

    # take the smallest LAC of any the user is inside
    lac = sorted(lacs, key=operator.attrgetter('range'))[0]

    return {
        'lat': quantize(lac.lat),
        'lon': quantize(lac.lon),
        'accuracy': max(LAC_MIN_ACCURACY, lac.range),
    }
Exemple #5
0
def search_cell(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            cells.append(Network(key, *result))

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c.lat for c in cells]) / length
    avg_lon = sum([c.lon for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      cells, CELL_MIN_ACCURACY),
    }
Exemple #6
0
    def deserialize(self, data, default_radio=None):
        if data:
            if 'radio' in data:
                if isinstance(data['radio'], basestring):
                    data['radio'] = RADIO_TYPE.get(
                        data['radio'], self.fields['radio'].missing)

                # If a default radio was set,
                # and we don't know, use it as fallback
                if (self.is_missing(data, 'radio')
                        and default_radio is not None):
                    data['radio'] = default_radio

                # If the cell id >= 65536 then it must be a umts tower
                if (data.get('cid', 0) >= 65536
                        and data['radio'] == RADIO_TYPE['gsm']):
                    data['radio'] = RADIO_TYPE['umts']

            else:
                data['radio'] = default_radio

            # Treat cid=65535 without a valid lac as an unspecified value
            if (self.is_missing(data, 'lac')
                    and data.get('cid', None) == 65535):
                data['cid'] = self.fields['cid'].missing

        return super(ValidCellBaseSchema, self).deserialize(data)
Exemple #7
0
def process_cell_measure(session, measure_data, entries, userid=None):
    cell_count = defaultdict(int)
    cell_measures = []
    created = decode_datetime(measure_data.get('created', ''))

    # process entries
    for entry in entries:
        cell_measure = create_cell_measure(measure_data, entry)
        # use more specific cell type or
        # fall back to less precise measure
        if entry.get('radio'):
            cell_measure.radio = RADIO_TYPE.get(entry['radio'], -1)
        else:
            cell_measure.radio = measure_data['radio']
        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[CellKey(cell_measure.radio, cell_measure.mcc,
                           cell_measure.mnc, cell_measure.lac,
                           cell_measure.cid)] += 1

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, created, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    session.add_all(cell_measures)
    return cell_measures
Exemple #8
0
def make_cell_import_dict(row):

    def val(key, default):
        if key in row and row[key] != '' and row[key] is not None:
            return row[key]
        else:
            return default

    d = dict()

    d['created'] = datetime.fromtimestamp(
        int(val('created', 0))).replace(tzinfo=UTC)

    d['modified'] = datetime.fromtimestamp(
        int(val('updated', 0))).replace(tzinfo=UTC)

    d['lat'] = float(val('lat', -255))
    d['lon'] = float(val('lon', -255))

    d['radio'] = RADIO_TYPE.get(row['radio'].lower(), -1)

    for k in ['mcc', 'mnc', 'lac', 'cid', 'psc']:
        d[k] = int(val(k, -1))

    d['range'] = int(float(val('range', 0)))

    d['total_measures'] = int(val('samples', -1))
    d['changeable'] = bool(val('changeable', True))
    return normalized_cell_dict(d)
Exemple #9
0
def make_cell_import_dict(row):

    def val(key, default):
        if key in row and row[key] != '' and row[key] is not None:
            return row[key]
        else:
            return default

    d = dict()

    d['created'] = datetime.fromtimestamp(
        int(val('created', 0))).replace(tzinfo=UTC)

    d['modified'] = datetime.fromtimestamp(
        int(val('updated', 0))).replace(tzinfo=UTC)

    d['lat'] = float(val('lat', -255))
    d['lon'] = float(val('lon', -255))

    d['radio'] = RADIO_TYPE.get(row['radio'].lower(), -1)

    for k in ['mcc', 'mnc', 'lac', 'cid', 'psc']:
        d[k] = int(val(k, -1))

    d['range'] = int(float(val('range', 0)))

    d['total_measures'] = int(val('samples', -1))
    d['changeable'] = bool(val('changeable', True))
    return normalized_cell_dict(d)
Exemple #10
0
def process_measure(data, utcnow, session):
    session_objects = []
    measure = Measure()
    measure.created = utcnow
    measure.time = data['time']
    measure.lat = to_precise_int(data['lat'])
    measure.lon = to_precise_int(data['lon'])
    measure.accuracy = data['accuracy']
    measure.altitude = data['altitude']
    measure.altitude_accuracy = data['altitude_accuracy']
    measure.radio = RADIO_TYPE.get(data['radio'], -1)
    # get measure.id set
    session.add(measure)
    session.flush()
    if data.get('cell'):
        cells, cell_data = process_cell(data['cell'], measure)
        measure.cell = dumps(cell_data)
        session_objects.extend(cells)
    if data.get('wifi'):
        # filter out old-style sha1 hashes
        too_long_keys = False
        for w in data['wifi']:
            w['key'] = key = normalize_wifi_key(w['key'])
            if len(key) > 12:
                too_long_keys = True
                break
        if not too_long_keys:
            process_wifi(data['wifi'], measure)
            measure.wifi = dumps(data['wifi'])
    return (measure, session_objects)
Exemple #11
0
def process_measure(report_id, data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # we want to only add those not already present
        for (k, v) in src.items():
            if k not in dst:
                dst[k] = v

    cell_measures = {}
    wifi_measures = {}
    measure_data = dict(
        report_id=report_id,
        lat=data['lat'],
        lon=data['lon'],
        heading=data.get('heading', -1.0),
        speed=data.get('speed', -1.0),
        time=encode_datetime(data['time']),
        accuracy=data.get('accuracy', 0),
        altitude=data.get('altitude', 0),
        altitude_accuracy=data.get('altitude_accuracy', 0),
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Exemple #12
0
def normalized_cell_dict(d, default_radio=-1):
    """
    Returns a normalized copy of the provided cell dict d,
    or None if the dict was invalid.
    """
    if not isinstance(d, dict):  # pragma: no cover
        return None

    d = d.copy()
    if 'radio' in d and isinstance(d['radio'], basestring):
        d['radio'] = RADIO_TYPE.get(d['radio'], -1)

    d = normalized_dict(
        d,
        dict(radio=(MIN_RADIO_TYPE, MAX_RADIO_TYPE, default_radio),
             mcc=(1, 999, REQUIRED),
             mnc=(0, 32767, REQUIRED),
             lac=(1, 65535, -1),
             cid=(1, 268435455, -1),
             psc=(0, 512, -1)))

    if d is None:
        return None

    # Check against the list of all known valid mccs
    if d['mcc'] not in ALL_VALID_MCCS:
        return None

    # If a default radio was set, and we don't know, use it as fallback
    if d['radio'] == -1 and default_radio != -1:
        d['radio'] = default_radio

    # Skip CDMA towers missing lac or cid (no psc on CDMA exists to
    # backfill using inference)
    if d['radio'] == RADIO_TYPE['cdma'] and (d['lac'] < 0 or d['cid'] < 0):
        return None

    # Skip GSM/LTE/UMTS towers with an invalid MNC
    if (d['radio']
            in (RADIO_TYPE['gsm'], RADIO_TYPE['umts'], RADIO_TYPE['lte'])
            and d['mnc'] > 999):
        return None

    # Treat cid=65535 without a valid lac as an unspecified value
    if d['lac'] == -1 and d['cid'] == 65535:
        d['cid'] = -1

    # Must have (lac and cid) or psc (psc-only to use in backfill)
    if (d['lac'] == -1 or d['cid'] == -1) and d['psc'] == -1:
        return None

    # If the cell id >= 65536 then it must be a umts tower
    if d['cid'] >= 65536 and d['radio'] == RADIO_TYPE['gsm']:
        d['radio'] = RADIO_TYPE['umts']

    return d
Exemple #13
0
def process_measure(data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # We want to only add those not already present.
        # We also only want to copy the top-level base measure data
        # and not any nested values like cell or wifi.
        for (k, v) in src.items():
            if k != 'radio' and k not in dst \
               and not isinstance(v, (tuple, list, dict)):
                dst[k] = v

    measure_data = normalized_measure_dict(data)
    if measure_data is None:
        return ([], [])

    cell_measures = {}
    wifi_measures = {}
    measure_radio = RADIO_TYPE.get(data['radio'], -1)

    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:  # pragma: no cover
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:  # pragma: no cover
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:  # pragma: no cover
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Exemple #14
0
def normalized_cell_dict(d, default_radio=-1):
    """
    Returns a normalized copy of the provided cell dict d,
    or None if the dict was invalid.
    """
    if not isinstance(d, dict):  # pragma: no cover
        return None

    d = d.copy()
    if 'radio' in d and isinstance(d['radio'], basestring):
        d['radio'] = RADIO_TYPE.get(d['radio'], -1)

    d = normalized_dict(
        d, dict(radio=(MIN_RADIO_TYPE, MAX_RADIO_TYPE, default_radio),
                mcc=(1, 999, REQUIRED),
                mnc=(0, 32767, REQUIRED),
                lac=(1, 65535, -1),
                cid=(1, 268435455, -1),
                psc=(0, 512, -1)))

    if d is None:
        return None

    # Check against the list of all known valid mccs
    if d['mcc'] not in ALL_VALID_MCCS:
        return None

    # If a default radio was set, and we don't know, use it as fallback
    if d['radio'] == -1 and default_radio != -1:
        d['radio'] = default_radio

    # Skip CDMA towers missing lac or cid (no psc on CDMA exists to
    # backfill using inference)
    if d['radio'] == RADIO_TYPE['cdma'] and (d['lac'] < 0 or d['cid'] < 0):
        return None

    # Skip GSM/LTE/UMTS towers with an invalid MNC
    if (d['radio'] in (
            RADIO_TYPE['gsm'], RADIO_TYPE['umts'], RADIO_TYPE['lte'])
            and d['mnc'] > 999):
        return None

    # Treat cid=65535 without a valid lac as an unspecified value
    if d['lac'] == -1 and d['cid'] == 65535:
        d['cid'] = -1

    # Must have (lac and cid) or psc (psc-only to use in backfill)
    if (d['lac'] == -1 or d['cid'] == -1) and d['psc'] == -1:
        return None

    # If the cell id >= 65536 then it must be a umts tower
    if d['cid'] >= 65536 and d['radio'] == RADIO_TYPE['gsm']:
        d['radio'] = RADIO_TYPE['umts']

    return d
Exemple #15
0
def process_measure(data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # We want to only add those not already present.
        # We also only want to copy the top-level base measure data
        # and not any nested values like cell or wifi.
        for (k, v) in src.items():
            if k != 'radio' and k not in dst \
               and not isinstance(v, (tuple, list, dict)):
                dst[k] = v

    measure_data = normalized_measure_dict(data)
    if measure_data is None:
        return ([], [])

    cell_measures = {}
    wifi_measures = {}
    measure_radio = RADIO_TYPE.get(data['radio'], -1)

    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:  # pragma: no cover
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:  # pragma: no cover
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:  # pragma: no cover
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Exemple #16
0
    def clean_cell_keys(self, data):
        """Pre-process cell data."""
        radio = RADIO_TYPE.get(data.get('radio', ''), -1)
        cell_keys = []
        for cell in data.get(self.data_field, ()):
            cell = CellKeyMixin.validate(cell, default_radio=radio)
            if cell:
                cell_key = CellKeyMixin.to_hashkey(cell)
                cell_keys.append(cell_key)

        return cell_keys
Exemple #17
0
def process_measure(measure_id, data, session):
    cell_measures = []
    wifi_measures = []
    measure_data = dict(
        measure_id=measure_id,
        lat=to_precise_int(data['lat']),
        lon=to_precise_int(data['lon']),
        time=encode_datetime(data['time']),
        accuracy=data['accuracy'],
        altitude=data['altitude'],
        altitude_accuracy=data['altitude_accuracy'],
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            c.update(measure_data)
            # use more specific cell type or
            # fall back to less precise measure
            if c['radio'] != '':
                c['radio'] = RADIO_TYPE.get(c['radio'], -1)
            else:
                c['radio'] = measure_radio
        cell_measures = data['cell']
    if data.get('wifi'):
        # filter out old-style sha1 hashes
        invalid_wifi_key = False
        for w in data['wifi']:
            w['key'] = key = normalize_wifi_key(w['key'])
            if not valid_wifi_pattern(key):
                invalid_wifi_key = True
                break

        if not invalid_wifi_key:
            # flatten measure / wifi data into a single dict
            for w in data['wifi']:
                w.update(measure_data)
            wifi_measures = data['wifi']
    return (cell_measures, wifi_measures)
Exemple #18
0
def process_measure(measure_id, data, session):
    cell_measures = []
    wifi_measures = []
    measure_data = dict(
        measure_id=measure_id,
        lat=to_precise_int(data['lat']),
        lon=to_precise_int(data['lon']),
        time=encode_datetime(data['time']),
        accuracy=data['accuracy'],
        altitude=data['altitude'],
        altitude_accuracy=data['altitude_accuracy'],
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            c.update(measure_data)
            # use more specific cell type or
            # fall back to less precise measure
            if c['radio'] != '':
                c['radio'] = RADIO_TYPE.get(c['radio'], -1)
            else:
                c['radio'] = measure_radio
        cell_measures = data['cell']
    if data.get('wifi'):
        # filter out old-style sha1 hashes
        invalid_wifi_key = False
        for w in data['wifi']:
            w['key'] = key = normalize_wifi_key(w['key'])
            if not valid_wifi_pattern(key):
                invalid_wifi_key = True
                break

        if not invalid_wifi_key:
            # flatten measure / wifi data into a single dict
            for w in data['wifi']:
                w.update(measure_data)
            wifi_measures = data['wifi']
    return (cell_measures, wifi_measures)
Exemple #19
0
def search_cell_lac(session, data):
    sql_null = None  # avoid pep8 warning
    radio = RADIO_TYPE.get(data['radio'], -1)
    lacs = []
    for cell in data['cell']:
        if cell['mcc'] < 1 or cell['mnc'] < 0 or \
           cell['lac'] < 0 or cell['cid'] < 0:
            # Skip over invalid values
            continue

        if cell.get('radio'):
            radio = RADIO_TYPE.get(cell['radio'], -1)

        query = session.query(Cell).filter(
            Cell.radio == radio).filter(
            Cell.mcc == cell['mcc']).filter(
            Cell.mnc == cell['mnc']).filter(
            Cell.lac == cell['lac']).filter(
            Cell.cid == CELLID_LAC).filter(
            Cell.lat != sql_null).filter(
            Cell.lon != sql_null
        )
        result = query.first()
        if result is not None:
            lacs.append(result)

    if not lacs:
        return None

    # take the smallest LAC of any the user is inside
    lac = sorted(lacs, key=operator.attrgetter('range'))[0]

    return {
        'lat': quantize(lac.lat),
        'lon': quantize(lac.lon),
        'accuracy': lac.range,
    }
Exemple #20
0
def search_cell(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        if cell['mcc'] < 1 or cell['mnc'] < 0 or \
           cell['lac'] < 0 or cell['cid'] < 0:
            # Skip over invalid values
            continue

        if cell.get('radio'):
            radio = RADIO_TYPE.get(cell['radio'], -1)

        query = session.query(Cell.lat, Cell.lon).filter(
            Cell.radio == radio).filter(
            Cell.mcc == cell['mcc']).filter(
            Cell.mnc == cell['mnc']).filter(
            Cell.lac == cell['lac']).filter(
            Cell.cid == cell['cid']).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            cells.append(result)

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c[0] for c in cells]) / length
    avg_lon = sum([c[1] for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': 35000,
    }
Exemple #21
0
def process_cell_measure(session, measure_data, entries, userid=None):
    cell_measures = []
    # TODO group by unique cell
    for entry in entries:
        cell = create_cell_measure(measure_data, entry)
        # use more specific cell type or
        # fall back to less precise measure
        if entry.get('radio'):
            cell.radio = RADIO_TYPE.get(entry['radio'], -1)
        else:
            cell.radio = measure_data['radio']
        update_cell_measure_count(cell, session, userid=userid)
        cell_measures.append(cell)
    session.add_all(cell_measures)
    return cell_measures
Exemple #22
0
def countries(session):
    # We group by radio, mcc to take advantage of the index
    # and explicitly specify a small list of all valid radio values
    # to get mysql to actually use the index.
    radios = [v for v in RADIO_TYPE.values() if v >= 0]
    rows = session.query(Cell.radio, Cell.mcc, func.count()).filter(
        Cell.radio.in_(radios)).group_by(Cell.radio, Cell.mcc).all()

    # reverse grouping by mcc, radio
    codes = defaultdict(dict)
    for row in rows:
        codes[row[1]][row[0]] = row[2]

    countries = {}
    for code, item in codes.items():
        names = [(c.name, c.alpha3) for c in mcc(str(code))]
        multiple = bool(len(names) > 1)
        for name, alpha3 in names:
            country = {
                'code': alpha3,
                'name': name,
                'order': transliterate(name[:10].lower()),
                'multiple': multiple,
                'total': 0,
                'gsm': 0,
                'cdma': 0,
                'umts': 0,
                'lte': 0,
            }
            for t, v in item.items():
                country[RADIO_TYPE_INVERSE[t]] = int(v)
            country['total'] = int(sum(item.values()))
            if alpha3 not in countries:
                countries[alpha3] = country
            else:
                # some countries like the US have multiple mcc codes,
                # we merge them here
                for k, v in country.items():
                    if isinstance(v, int):
                        countries[alpha3][k] += v

    return sorted(countries.values(), key=itemgetter('name'))
Exemple #23
0
def process_cell(entries, measure):
    result = []
    cells = []
    for entry in entries:
        cell = CellMeasure(
            measure_id=measure.id, created=measure.created,
            lat=measure.lat, lon=measure.lon, time=measure.time,
            accuracy=measure.accuracy, altitude=measure.altitude,
            altitude_accuracy=measure.altitude_accuracy,
            mcc=entry['mcc'], mnc=entry['mnc'], lac=entry['lac'],
            cid=entry['cid'], psc=entry['psc'], asu=entry['asu'],
            signal=entry['signal'], ta=entry['ta'],
        )
        # use more specific cell type or fall back to less precise measure
        if entry['radio']:
            cell.radio = RADIO_TYPE.get(entry['radio'], -1)
        else:
            cell.radio = measure.radio
        cells.append(cell)
        result.append(entry)
    return (cells, result)
Exemple #24
0
def countries(session):
    # We group by radio, mcc to take advantage of the index
    # and explicitly specify a small list of all valid radio values
    # to get mysql to actually use the index.
    radios = [v for v in RADIO_TYPE.values() if v >= 0]
    rows = session.query(Cell.radio, Cell.mcc, func.count(Cell.id)).filter(
        Cell.radio.in_(radios)).group_by(Cell.radio, Cell.mcc).all()

    # reverse grouping by mcc, radio
    codes = defaultdict(dict)
    for row in rows:
        codes[row[1]][row[0]] = row[2]

    countries = {}
    for code, item in codes.items():
        names = [(c.name, c.alpha3) for c in mcc(str(code))]
        multiple = bool(len(names) > 1)
        for name, alpha3 in names:
            country = {
                'code': alpha3,
                'name': name,
                'order': transliterate(name[:10].lower()),
                'multiple': multiple,
                'total': 0,
                'gsm': 0, 'cdma': 0, 'umts': 0, 'lte': 0,
            }
            for t, v in item.items():
                country[RADIO_TYPE_INVERSE[t]] = int(v)
            country['total'] = int(sum(item.values()))
            if alpha3 not in countries:
                countries[alpha3] = country
            else:
                # some countries like the US have multiple mcc codes,
                # we merge them here
                for k, v in country.items():
                    if isinstance(v, int):
                        countries[alpha3][k] += v

    return sorted(countries.values(), key=itemgetter('name'))
Exemple #25
0
def insert_cell_measure(measure_data, entries, userid=None):
    cell_measures = []
    try:
        with insert_cell_measure.db_session() as session:
            for entry in entries:
                cell = create_cell_measure(measure_data, entry)
                # use more specific cell type or
                # fall back to less precise measure
                if entry.get('radio'):
                    cell.radio = RADIO_TYPE.get(entry['radio'], -1)
                else:
                    cell.radio = measure_data['radio']
                update_cell_measure_count(cell, session, userid=userid)
                cell_measures.append(cell)
            session.add_all(cell_measures)
            session.commit()
        return len(cell_measures)
    except IntegrityError as exc:  # pragma: no cover
        # TODO log error
        return 0
    except Exception as exc:  # pragma: no cover
        raise insert_cell_measure.retry(exc=exc)
Exemple #26
0
def process_measure(data, utcnow, session, userid=None):
    measure = Measure()
    measure.created = utcnow
    measure.time = data["time"]
    measure.lat = to_precise_int(data["lat"])
    measure.lon = to_precise_int(data["lon"])
    measure.accuracy = data["accuracy"]
    measure.altitude = data["altitude"]
    measure.altitude_accuracy = data["altitude_accuracy"]
    measure.radio = RADIO_TYPE.get(data["radio"], -1)
    # get measure.id set
    session.add(measure)
    session.flush()
    measure_data = dict(
        id=measure.id,
        created=encode_datetime(measure.created),
        lat=measure.lat,
        lon=measure.lon,
        time=encode_datetime(measure.time),
        accuracy=measure.accuracy,
        altitude=measure.altitude,
        altitude_accuracy=measure.altitude_accuracy,
        radio=measure.radio,
    )
    if data.get("cell"):
        insert_cell_measure.delay(measure_data, data["cell"], userid=userid)
        measure.cell = dumps(data["cell"])
    if data.get("wifi"):
        # filter out old-style sha1 hashes
        too_long_keys = False
        for w in data["wifi"]:
            w["key"] = key = normalize_wifi_key(w["key"])
            if len(key) > 12:
                too_long_keys = True
                break
        if not too_long_keys:
            insert_wifi_measure.delay(measure_data, data["wifi"], userid=userid)
            measure.wifi = dumps(data["wifi"])
    return measure
Exemple #27
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result
Exemple #28
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None,
                       result_type='position'):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    :param api_key_log: Enable additional api key specific logging?
    :param api_key_name: The metric friendly api key name.
    :param result_type: What kind of result to return, either a lat/lon
                        position or a country estimate.
    """

    if result_type not in ('country', 'position'):
        raise ValueError('Invalid result_type, must be one of '
                         'position or country')

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key)

    found_cells = []

    # Query all cells and OCID cells
    for model in Cell, OCIDCell, CellArea:
        cell_filter = []
        for key in validated['cell']:
            # create a list of 'and' criteria for cell keys
            criterion = join_cellkey(model, key)
            cell_filter.append(and_(*criterion))

        if cell_filter:
            # only do a query if we have cell results, or this will match
            # all rows in the table
            load_fields = ('radio', 'mcc', 'mnc', 'lac', 'lat', 'lon', 'range')
            query = (session.query(model).options(
                load_only(*load_fields)).filter(or_(*cell_filter)).filter(
                    model.lat.isnot(None)).filter(model.lon.isnot(None)))

            try:
                found_cells.extend(query.all())
            except Exception:
                heka_client.raven(RAVEN_ERROR)

    if found_cells:
        # Group all found_cellss by location area
        lacs = defaultdict(list)
        for cell in found_cells:
            cellarea_key = (cell.radio, cell.mcc, cell.mnc, cell.lac)
            lacs[cellarea_key].append(cell)

        def sort_lac(v):
            # use the lac with the most values,
            # or the one with the smallest range
            return (len(v), -min([e.range for e in v]))

        # If we get data from multiple location areas, use the one with the
        # most data points in it. That way a lac with a cell hit will
        # have two entries and win over a lac with only the lac entry.
        lac = sorted(lacs.values(), key=sort_lac, reverse=True)

        for cell in lac[0]:
            # The first entry is the key,
            # used only to distinguish cell from lac
            network = Network(key=None,
                              lat=cell.lat,
                              lon=cell.lon,
                              range=cell.range)
            if type(cell) is CellArea:
                validated['cell_lac_network'].append(network)
            else:
                validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db, stats_client)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field], stats_client,
                              api_name)
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))

    if result_type == 'position':
        rounded_result = {
            'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
            'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
            'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
        }
        stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                            rounded_result['accuracy'])
        return rounded_result
    elif result_type == 'country':
        if countries:
            country = iso3166.countries.get(countries[0])
            return {
                'country_name': country.name,
                'country_code': country.alpha2
            }
Exemple #29
0
def search_all_sources(session, api_name, data,
                       client_addr=None, geoip_db=None):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    """

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pass-through wifi data
    validated['wifi'] = data.get('wifi', [])

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key._replace(cid=CELLID_LAC))

    # Merge all possible cell and lac keys into one list
    all_cell_keys = []
    all_cell_keys.extend(validated['cell'])
    for key in validated['cell_lac']:
        all_cell_keys.append(key)

    # Do a single query for all cells and lacs at the same time
    try:
        all_networks = query_cell_networks(session, all_cell_keys)
    except Exception:
        heka_client.raven(RAVEN_ERROR)
        all_networks = []
    for network in all_networks:
        if network.key == CELLID_LAC:
            validated['cell_lac_network'].append(network)
        else:
            validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res, countries) = geoip_and_best_guess_country_codes(
        validated['cell'], api_name, client_addr, geoip_db)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
            ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
            ('cell', 'cell_network', 'cell', search_cell),
            ('wifi', 'wifi', 'wifi', search_wifi)]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field])
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' %
                                  (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' %
                                  (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' %
                                  (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']),
                               float(result['lon']), lat, lon) * 1000
                      <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    rounded_result = {
        'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
        'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
        'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
    }

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))
    stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                        rounded_result['accuracy'])

    return rounded_result