Exemple #1
0
def update_cell_measure_count(cell_key, count, utcnow, session):
    # only update data for complete record
    if cell_key.radio < 0 or cell_key.mcc < 1 or cell_key.mnc < 0 or \
       cell_key.lac < 0 or cell_key.cid < 0:  # NOQA
        return 0

    # check cell blacklist
    query = session.query(CellBlacklist).filter(
        *join_cellkey(CellBlacklist, cell_key))
    b = query.first()
    if b is not None:
        return 0

    # do we already know about this cell?
    query = session.query(Cell).filter(
        *join_cellkey(Cell, cell_key)).filter(
        Cell.psc == cell_key.psc
    )

    cell = query.first()
    new_cell = 0
    if cell is None:
        new_cell = 1

    stmt = Cell.__table__.insert(
        on_duplicate='new_measures = new_measures + %s, '
                     'total_measures = total_measures + %s' % (count, count)
    ).values(
        created=utcnow, radio=cell_key.radio,
        mcc=cell_key.mcc, mnc=cell_key.mnc, lac=cell_key.lac, cid=cell_key.cid,
        psc=cell_key.psc, new_measures=count, total_measures=count)
    session.execute(stmt)
    return new_cell
Exemple #2
0
def update_cell_measure_count(cell_key, count, utcnow, session):
    # only update data for complete record
    if cell_key.radio < 0 or cell_key.mcc < 1 or cell_key.mnc < 0 or \
       cell_key.lac < 0 or cell_key.cid < 0:  # NOQA
        return 0

    # check cell blacklist
    query = session.query(CellBlacklist).filter(
        *join_cellkey(CellBlacklist, cell_key))
    b = query.first()
    if b is not None:
        return 0

    # do we already know about this cell?
    query = session.query(Cell).filter(
        *join_cellkey(Cell, cell_key)).filter(
        Cell.psc == cell_key.psc
    )

    cell = query.first()
    new_cell = 0
    if cell is None:
        new_cell = 1

    stmt = Cell.__table__.insert(
        on_duplicate='new_measures = new_measures + %s, '
                     'total_measures = total_measures + %s' % (count, count)
    ).values(
        created=utcnow, radio=cell_key.radio,
        mcc=cell_key.mcc, mnc=cell_key.mnc, lac=cell_key.lac, cid=cell_key.cid,
        psc=cell_key.psc, new_measures=count, total_measures=count)
    session.execute(stmt)
    return new_cell
Exemple #3
0
def search_cell(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    cells = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            cells.append(Network(key, *result))

    if not cells:
        return

    length = len(cells)
    avg_lat = sum([c.lat for c in cells]) / length
    avg_lon = sum([c.lon for c in cells]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      cells, CELL_MIN_ACCURACY),
    }
Exemple #4
0
def search_cell_lac(session, data):
    radio = RADIO_TYPE.get(data['radio'], -1)
    lacs = []
    for cell in data['cell']:
        cell = normalized_cell_dict(cell, default_radio=radio)
        if not cell:
            continue

        cell['cid'] = CELLID_LAC
        key = to_cellkey(cell)

        query = session.query(Cell.lat, Cell.lon, Cell.range).filter(
            *join_cellkey(Cell, key)).filter(
            Cell.lat.isnot(None)).filter(
            Cell.lon.isnot(None)
        )
        result = query.first()
        if result is not None:
            lacs.append(Network(key, *result))

    if not lacs:
        return

    # take the smallest LAC of any the user is inside
    lac = sorted(lacs, key=operator.attrgetter('range'))[0]

    return {
        'lat': quantize(lac.lat),
        'lon': quantize(lac.lon),
        'accuracy': max(LAC_MIN_ACCURACY, lac.range),
    }
Exemple #5
0
def cell_location_update(self, min_new=10, max_new=100, batch=10):
    try:
        utcnow = util.utcnow()
        cells = []
        with self.db_session() as session:
            emit_new_measures_metric(self.stats_client, session,
                                     self.shortname, Cell,
                                     min_new, max_new)
            query = session.query(Cell).filter(
                Cell.new_measures >= min_new).filter(
                Cell.new_measures < max_new).filter(
                Cell.cid != CELLID_LAC).limit(batch)
            cells = query.all()
            if not cells:
                return 0
            moving_cells = set()
            updated_lacs = defaultdict(list)
            for cell in cells:
                # skip cells with a missing lac/cid
                # or virtual LAC cells
                if cell.lac == -1 or cell.cid == -1 or \
                   cell.cid == CELLID_LAC:
                    continue

                query = session.query(
                    CellMeasure.lat, CellMeasure.lon, CellMeasure.id).filter(
                    *join_cellkey(CellMeasure, cell))
                # only take the last X new_measures
                query = query.order_by(
                    CellMeasure.created.desc()).limit(
                    cell.new_measures)
                measures = query.all()

                if measures:
                    moving = calculate_new_position(
                        cell, measures, CELL_MAX_DIST_KM, backfill=False)
                    if moving:
                        moving_cells.add(cell)

                    updated_lacs[CellKey(cell.radio, cell.mcc,
                                         cell.mnc, cell.lac,
                                         CELLID_LAC)].append(cell)

            if updated_lacs:
                update_enclosing_lacs(session, updated_lacs,
                                      moving_cells, utcnow)

            if moving_cells:
                # some cells found to be moving too much
                blacklist_and_remove_moving_cells(session, moving_cells)

            session.commit()

        return (len(cells), len(moving_cells))
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #6
0
def cell_unthrottle_measures(self, max_measures, batch=100):
    with self.db_session() as session:
        join_measure = lambda u: join_cellkey(CellMeasure, u)
        n = unthrottle_measures(session=session,
                                station_model=Cell,
                                measure_model=CellMeasure,
                                join_measure=join_measure,
                                max_measures=max_measures,
                                batch=batch)
        self.stats_client.incr("items.cell_unthrottled", n)
Exemple #7
0
def location_update_cell(self, min_new=10, max_new=100, batch=10):
    try:
        cells = []
        redis_client = self.app.redis_client
        with self.db_session() as session:
            emit_new_measures_metric(self.stats_client, session,
                                     self.shortname, Cell, min_new, max_new)
            query = session.query(Cell).filter(
                Cell.new_measures >= min_new).filter(
                    Cell.new_measures < max_new).filter(
                        Cell.cid != CELLID_LAC).limit(batch)
            cells = query.all()
            if not cells:
                return 0
            moving_cells = set()
            updated_lacs = set()
            for cell in cells:
                # skip cells with a missing lac/cid
                # or virtual LAC cells
                if cell.lac == -1 or cell.cid == -1 or \
                   cell.cid == CELLID_LAC:  # pragma: no cover
                    continue

                query = session.query(
                    CellMeasure.lat, CellMeasure.lon,
                    CellMeasure.id).filter(*join_cellkey(CellMeasure, cell))
                # only take the last X new_measures
                query = query.order_by(CellMeasure.created.desc()).limit(
                    cell.new_measures)
                measures = query.all()

                if measures:
                    moving = calculate_new_position(cell, measures,
                                                    CELL_MAX_DIST_KM)
                    if moving:
                        moving_cells.add(cell)

                    updated_lacs.add(
                        CellKey(cell.radio, cell.mcc, cell.mnc, cell.lac,
                                CELLID_LAC))

            if updated_lacs:
                session.on_post_commit(enqueue_lacs, redis_client,
                                       updated_lacs)

            if moving_cells:
                # some cells found to be moving too much
                blacklist_and_remove_moving_cells(session, moving_cells)

            session.commit()

        return (len(cells), len(moving_cells))
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #8
0
def cell_unthrottle_measures(self, max_measures, batch=100):
    try:
        with self.db_session() as session:
            join_measure = lambda u: join_cellkey(CellMeasure, u)
            n = unthrottle_measures(session=session,
                                    station_model=Cell,
                                    measure_model=CellMeasure,
                                    join_measure=join_measure,
                                    max_measures=max_measures,
                                    batch=batch)
            self.stats_client.incr("items.cell_unthrottled", n)
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #9
0
def remove_cell(self, cell_keys):
    cells_removed = 0
    try:
        with self.db_session() as session:
            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete(synchronize_session=False)
            session.commit()
        return cells_removed
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #10
0
def mark_moving_cells(session, moving_cells):
    moving_keys = []
    blacklist = set()
    for cell in moving_cells:
        query = session.query(CellBlacklist).filter(
            *join_cellkey(CellBlacklist, cell))
        b = query.first()
        if b is None:
            key = to_cellkey(cell)._asdict()
            blacklist.add(CellBlacklist(**key))
            moving_keys.append(key)

    get_heka_client().incr("items.blacklisted.cell_moving", len(moving_keys))
    session.add_all(blacklist)
    remove_cell.delay(moving_keys)
Exemple #11
0
def remove_cell(self, cell_keys):
    cells_removed = 0
    try:
        with self.db_session() as session:
            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete(synchronize_session=False)
            session.commit()
        return cells_removed
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #12
0
def backfill_cell_location_update(self, new_cell_measures):
    try:
        utcnow = util.utcnow()
        cells = []
        moving_cells = set()
        updated_lacs = defaultdict(list)
        new_cell_measures = dict(new_cell_measures)
        with self.db_session() as session:
            for tower_tuple, cell_measure_ids in new_cell_measures.items():
                query = session.query(Cell).filter(
                    *join_cellkey(Cell, CellKey(*tower_tuple)))
                cells = query.all()

                if not cells:
                    # This case shouldn't actually occur. The
                    # backfill_cell_location_update is only called
                    # when CellMeasure records are matched against
                    # known Cell records.
                    continue

                for cell in cells:
                    measures = session.query(  # NOQA
                        CellMeasure.lat, CellMeasure.lon).filter(
                        CellMeasure.id.in_(cell_measure_ids)).all()

                    if measures:
                        moving = calculate_new_position(
                            cell, measures, CELL_MAX_DIST_KM, backfill=True)
                        if moving:
                            moving_cells.add(cell)

                        updated_lacs[CellKey(cell.radio, cell.mcc,
                                             cell.mnc, cell.lac,
                                             CELLID_LAC)].append(cell)

            if updated_lacs:
                update_enclosing_lacs(session, updated_lacs,
                                      moving_cells, utcnow)

            if moving_cells:
                # some cells found to be moving too much
                blacklist_and_remove_moving_cells(session, moving_cells)

            session.commit()
        return (len(cells), len(moving_cells))
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #13
0
def mark_moving_cells(session, moving_cells):
    moving_keys = []
    blacklist = set()
    for cell in moving_cells:
        query = session.query(CellBlacklist).filter(
            *join_cellkey(CellBlacklist, cell))
        b = query.first()
        if b is None:
            key = to_cellkey(cell)._asdict()
            blacklist.add(CellBlacklist(**key))
            moving_keys.append(key)

    get_heka_client().incr("items.blacklisted.cell_moving",
                           len(moving_keys))
    session.add_all(blacklist)
    remove_cell.delay(moving_keys)
Exemple #14
0
def cell_trim_excessive_data(self, max_measures, min_age_days=7, batch=10):
    try:
        with self.db_session() as session:
            join_measure = lambda u: join_cellkey(CellMeasure, u)

            n = trim_excessive_data(session=session,
                                    unique_model=Cell,
                                    measure_model=CellMeasure,
                                    join_measure=join_measure,
                                    delstat='deleted_cell',
                                    max_measures=max_measures,
                                    min_age_days=min_age_days,
                                    batch=batch)
            self.heka_client.incr("items.dropped.cell_trim_excessive", n)
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #15
0
def cell_trim_excessive_data(self, max_measures, min_age_days=7, batch=10):
    try:
        with self.db_session() as session:
            join_measure = lambda u: join_cellkey(CellMeasure, u)

            n = trim_excessive_data(session=session,
                                    unique_model=Cell,
                                    measure_model=CellMeasure,
                                    join_measure=join_measure,
                                    delstat='deleted_cell',
                                    max_measures=max_measures,
                                    min_age_days=min_age_days,
                                    batch=batch)
            self.heka_client.incr("items.dropped.cell_trim_excessive", n)
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #16
0
def query_cell_table(model, session, cell_keys):

    cell_filter = []
    for key in cell_keys:
        # create a list of 'and' criteria for cell keys
        criterion = join_cellkey(model, key)
        cell_filter.append(and_(*criterion))

    # Keep the cid to distinguish cell from lac later on
    query = session.query(model.radio, model.mcc, model.mnc, model.lac,
                          model.cid, model.lat, model.lon,
                          model.range).filter(or_(*cell_filter)).filter(
                              model.lat.isnot(None)).filter(
                                  model.lon.isnot(None))

    return query.all()
Exemple #17
0
def cell_location_update(self, min_new=10, max_new=100, batch=10):

    try:
        cells = []
        with self.db_session() as session:
            query = session.query(Cell).filter(
                Cell.new_measures >= min_new).filter(
                Cell.new_measures < max_new).filter(
                Cell.cid != CELLID_LAC).limit(batch)
            cells = query.all()
            if not cells:
                return 0
            moving_cells = set()
            for cell in cells:
                # skip cells with a missing lac/cid
                # or virtual LAC cells
                if cell.lac == -1 or cell.cid == -1 or \
                   cell.cid == CELLID_LAC:
                    continue

                query = session.query(
                    CellMeasure.lat, CellMeasure.lon, CellMeasure.id).filter(
                    *join_cellkey(CellMeasure, cell))
                # only take the last X new_measures
                query = query.order_by(
                    CellMeasure.created.desc()).limit(
                    cell.new_measures)
                measures = query.all()

                if measures:
                    calculate_new_position(cell, measures, moving_cells,
                                           CELL_MAX_DIST_KM,
                                           backfill=False)
                    update_enclosing_lac(session, cell)

            if moving_cells:
                # some cells found to be moving too much
                mark_moving_cells(session, moving_cells)

            session.commit()

        return (len(cells), len(moving_cells))
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #18
0
def cell_location_update(self, min_new=10, max_new=100, batch=10):

    try:
        cells = []
        with self.db_session() as session:
            query = session.query(Cell).filter(
                Cell.new_measures >= min_new).filter(
                    Cell.new_measures < max_new).filter(
                        Cell.cid != CELLID_LAC).limit(batch)
            cells = query.all()
            if not cells:
                return 0
            moving_cells = set()
            for cell in cells:
                # skip cells with a missing lac/cid
                # or virtual LAC cells
                if cell.lac == -1 or cell.cid == -1 or \
                   cell.cid == CELLID_LAC:
                    continue

                query = session.query(
                    CellMeasure.lat,
                    CellMeasure.lon).filter(*join_cellkey(CellMeasure, cell))
                # only take the last X new_measures
                query = query.order_by(CellMeasure.created.desc()).limit(
                    cell.new_measures)
                measures = query.all()

                if measures:
                    calculate_new_position(cell,
                                           measures,
                                           moving_cells,
                                           CELL_MAX_DIST_KM,
                                           backfill=False)
                    update_enclosing_lac(session, cell)

            if moving_cells:
                # some cells found to be moving too much
                mark_moving_cells(session, moving_cells)

            session.commit()
        return (len(cells), len(moving_cells))
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #19
0
def query_cell_table(model, session, cell_keys):

    cell_filter = []
    for key in cell_keys:
        # create a list of 'and' criteria for cell keys
        criterion = join_cellkey(model, key)
        cell_filter.append(and_(*criterion))

    # Keep the cid to distinguish cell from lac later on
    query = session.query(
        model.radio, model.mcc, model.mnc, model.lac, model.cid,
        model.lat, model.lon, model.range).filter(
        or_(*cell_filter)).filter(
        model.lat.isnot(None)).filter(
        model.lon.isnot(None))

    return query.all()
Exemple #20
0
def query_cell_networks(session, cell_keys):
    if not cell_keys:
        return []

    cell_filter = []
    for key in cell_keys:
        # create a list of 'and' criteria for cell keys
        criterion = join_cellkey(Cell, key)
        cell_filter.append(and_(*criterion))

    # Keep the cid to distinguish cell from lac later on
    query = session.query(
        Cell.radio, Cell.mcc, Cell.mnc, Cell.lac, Cell.cid,
        Cell.lat, Cell.lon, Cell.range).filter(
        or_(*cell_filter)).filter(
        Cell.lat.isnot(None)).filter(
        Cell.lon.isnot(None))

    result = query.all()

    if not result:
        return []

    # Group all results by location area
    lacs = defaultdict(list)
    for cell in result:
        lacs[cell[:4]].append(cell)

    def sort_lac(v):
        # use the lac with the most values, or the one with the smallest range
        return (len(v), -min([e[-1] for e in v]))

    # If we get data from multiple location areas, use the one with the
    # most data points in it. That way a lac with a cell hit will
    # have two entries and win over a lac with only the lac entry.
    lac = sorted(lacs.values(), key=sort_lac, reverse=True)

    cells = []
    for cell in lac[0]:
        # The first entry is the key, used only to distinguish cell from lac
        cells.append(Network(*cell[4:]))

    return cells
Exemple #21
0
def remove_cell(self, cell_keys):
    cells_removed = 0
    try:
        with self.db_session() as session:
            changed_lacs = set()

            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete()
                changed_lacs.add(key._replace(cid=CELLID_LAC))

            for key in changed_lacs:
                # Either schedule an update to the enclosing LAC or, if
                # we just removed the last cell in the LAC, remove the LAC
                # entirely.
                query = session.query(Cell).filter(
                    Cell.radio == key.radio,
                    Cell.mcc == key.mcc,
                    Cell.mnc == key.mnc,
                    Cell.lac == key.lac,
                    Cell.cid != CELLID_LAC)
                n = query.count()

                query = session.query(Cell).filter(
                    Cell.radio == key.radio,
                    Cell.mcc == key.mcc,
                    Cell.mnc == key.mnc,
                    Cell.lac == key.lac,
                    Cell.cid == CELLID_LAC)
                if n < 1:
                    query.delete()
                else:
                    lac = query.first()
                    if lac is not None:
                        lac.new_measures += 1

            session.commit()
        return cells_removed
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #22
0
def backfill_cell_location_update(self, new_cell_measures):
    try:
        cells = []
        new_cell_measures = dict(new_cell_measures)
        with self.db_session() as session:
            for tower_tuple, cell_measure_ids in new_cell_measures.items():
                query = session.query(Cell).filter(
                    *join_cellkey(Cell, CellKey(*tower_tuple)))
                cells = query.all()

                if not cells:
                    # This case shouldn't actually occur. The
                    # backfill_cell_location_update is only called
                    # when CellMeasure records are matched against
                    # known Cell records.
                    continue

                moving_cells = set()
                for cell in cells:
                    measures = session.query(  # NOQA
                        CellMeasure.lat, CellMeasure.lon).filter(
                            CellMeasure.id.in_(cell_measure_ids)).all()

                    if measures:
                        calculate_new_position(cell,
                                               measures,
                                               moving_cells,
                                               CELL_MAX_DIST_KM,
                                               backfill=True)
                        update_enclosing_lac(session, cell)

                if moving_cells:
                    # some cells found to be moving too much
                    mark_moving_cells(session, moving_cells)

            session.commit()
        return (len(cells), len(moving_cells))
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #23
0
def backfill_cell_location_update(self, new_cell_measures):
    try:
        cells = []
        new_cell_measures = dict(new_cell_measures)
        with self.db_session() as session:
            for tower_tuple, cell_measure_ids in new_cell_measures.items():
                query = session.query(Cell).filter(
                    *join_cellkey(Cell, CellKey(*tower_tuple)))
                cells = query.all()

                if not cells:
                    # This case shouldn't actually occur. The
                    # backfill_cell_location_update is only called
                    # when CellMeasure records are matched against
                    # known Cell records.
                    continue

                moving_cells = set()
                for cell in cells:
                    measures = session.query(  # NOQA
                        CellMeasure.lat, CellMeasure.lon).filter(
                        CellMeasure.id.in_(cell_measure_ids)).all()

                    if measures:
                        calculate_new_position(cell, measures, moving_cells,
                                               CELL_MAX_DIST_KM,
                                               backfill=True)
                        update_enclosing_lac(session, cell)

                if moving_cells:
                    # some cells found to be moving too much
                    mark_moving_cells(session, moving_cells)

            session.commit()
        return (len(cells), len(moving_cells))
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #24
0
def remove_cell(self, cell_keys):
    cells_removed = 0
    try:
        with self.db_session() as session:
            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete()

                # Either schedule an update to the enclosing LAC or, if
                # we just removed the last cell in the LAC, remove the LAC
                # entirely.
                query = session.query(func.count(Cell.id)).filter(
                    Cell.radio == key.radio,
                    Cell.mcc == key.mcc,
                    Cell.mnc == key.mnc,
                    Cell.lac == key.lac,
                    Cell.cid != CELLID_LAC)

                c = query.first()
                assert c is not None
                n = int(c[0])
                query = session.query(Cell).filter(
                    Cell.radio == key.radio,
                    Cell.mcc == key.mcc,
                    Cell.mnc == key.mnc,
                    Cell.lac == key.lac,
                    Cell.cid == CELLID_LAC)
                if n < 1:
                    query.delete()
                else:
                    query.update({'new_measures': '1'})

            session.commit()
        return cells_removed
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #25
0
def remove_cell(self, cell_keys):
    try:
        cells_removed = 0
        redis_client = self.app.redis_client
        with self.db_session() as session:
            changed_lacs = set()

            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete()
                changed_lacs.add(key._replace(cid=CELLID_LAC))

            if changed_lacs:
                session.on_post_commit(enqueue_lacs, redis_client,
                                       changed_lacs)

            session.commit()
        return cells_removed
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #26
0
def remove_cell(self, cell_keys):
    try:
        cells_removed = 0
        redis_client = self.app.redis_client
        with self.db_session() as session:
            changed_lacs = set()

            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete()
                changed_lacs.add(key._replace(cid=CELLID_LAC))

            if changed_lacs:
                session.on_post_commit(enqueue_lacs,
                                       redis_client, changed_lacs)

            session.commit()
        return cells_removed
    except Exception as exc:  # pragma: no cover
        self.heka_client.raven('error')
        raise self.retry(exc=exc)
Exemple #27
0
def update_enclosing_lacs(session, lacs, moving_cells, utcnow):
    moving_cell_ids = set([c.id for c in moving_cells])
    for lac_key, cells in lacs.items():
        if len(set([c.id for c in cells]) - moving_cell_ids) == 0:
            # All new cells are about to be removed, so don't bother
            # updating the lac
            continue
        q = session.query(Cell).filter(*join_cellkey(Cell, lac_key))
        lac = q.first()
        if lac is not None:
            lac.new_measures += 1
        else:
            lac = Cell(
                radio=lac_key.radio,
                mcc=lac_key.mcc,
                mnc=lac_key.mnc,
                lac=lac_key.lac,
                cid=lac_key.cid,
                new_measures=1,
                total_measures=0,
                created=utcnow,
            )
            session.add(lac)
Exemple #28
0
def remove_cell(self, cell_keys):
    cells_removed = 0
    try:
        with self.db_session() as session:
            for k in cell_keys:
                key = to_cellkey(k)
                query = session.query(Cell).filter(*join_cellkey(Cell, key))
                cells_removed += query.delete()

                # Either schedule an update to the enclosing LAC or, if
                # we just removed the last cell in the LAC, remove the LAC
                # entirely.
                query = session.query(func.count(Cell.id)).filter(
                    Cell.radio == key.radio, Cell.mcc == key.mcc,
                    Cell.mnc == key.mnc, Cell.lac == key.lac,
                    Cell.cid != CELLID_LAC)

                c = query.first()
                assert c is not None
                n = int(c[0])
                query = session.query(Cell).filter(Cell.radio == key.radio,
                                                   Cell.mcc == key.mcc,
                                                   Cell.mnc == key.mnc,
                                                   Cell.lac == key.lac,
                                                   Cell.cid == CELLID_LAC)
                if n < 1:
                    query.delete()
                else:
                    query.update({'new_measures': '1'})

            session.commit()
        return cells_removed
    except IntegrityError as exc:  # pragma: no cover
        self.heka_client.raven('error')
        return 0
    except Exception as exc:  # pragma: no cover
        raise self.retry(exc=exc)
Exemple #29
0
def process_cell_measures(session,
                          entries,
                          userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(
                Cell.total_measures).filter(*join_cellkey(Cell, cell_key))
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(
            Cell.total_measures).filter(*join_cellkey(Cell, cell_key))
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(cell_key, count, utcnow,
                                               session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures", count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Exemple #30
0
def search_all_sources(session,
                       api_name,
                       data,
                       client_addr=None,
                       geoip_db=None,
                       api_key_log=False,
                       api_key_name=None,
                       result_type='position'):
    """
    Common code-path for all lookup APIs, using
    WiFi, cell, cell-lac and GeoIP data sources.

    :param session: A database session for queries.
    :param api_name: A string to use in metrics (for example "geolocate").
    :param data: A dict conforming to the search API.
    :param client_addr: The IP address the request came from.
    :param geoip_db: The geoip database.
    :param api_key_log: Enable additional api key specific logging?
    :param api_key_name: The metric friendly api key name.
    :param result_type: What kind of result to return, either a lat/lon
                        position or a country estimate.
    """

    if result_type not in ('country', 'position'):
        raise ValueError('Invalid result_type, must be one of '
                         'position or country')

    stats_client = get_stats_client()
    heka_client = get_heka_client()

    result = None
    result_metric = None

    validated = {
        'wifi': [],
        'cell': [],
        'cell_lac': set(),
        'cell_network': [],
        'cell_lac_network': [],
    }

    # Pre-process wifi data
    for wifi in data.get('wifi', ()):
        wifi = normalized_wifi_dict(wifi)
        if wifi:
            validated['wifi'].append(wifi)

    # Pre-process cell data
    radio = RADIO_TYPE.get(data.get('radio', ''), -1)
    for cell in data.get('cell', ()):
        cell = normalized_cell_dict(cell, default_radio=radio)
        if cell:
            cell_key = to_cellkey(cell)
            validated['cell'].append(cell_key)
            validated['cell_lac'].add(cell_key)

    found_cells = []

    # Query all cells and OCID cells
    for model in Cell, OCIDCell, CellArea:
        cell_filter = []
        for key in validated['cell']:
            # create a list of 'and' criteria for cell keys
            criterion = join_cellkey(model, key)
            cell_filter.append(and_(*criterion))

        if cell_filter:
            # only do a query if we have cell results, or this will match
            # all rows in the table
            load_fields = ('radio', 'mcc', 'mnc', 'lac', 'lat', 'lon', 'range')
            query = (session.query(model).options(
                load_only(*load_fields)).filter(or_(*cell_filter)).filter(
                    model.lat.isnot(None)).filter(model.lon.isnot(None)))

            try:
                found_cells.extend(query.all())
            except Exception:
                heka_client.raven(RAVEN_ERROR)

    if found_cells:
        # Group all found_cellss by location area
        lacs = defaultdict(list)
        for cell in found_cells:
            cellarea_key = (cell.radio, cell.mcc, cell.mnc, cell.lac)
            lacs[cellarea_key].append(cell)

        def sort_lac(v):
            # use the lac with the most values,
            # or the one with the smallest range
            return (len(v), -min([e.range for e in v]))

        # If we get data from multiple location areas, use the one with the
        # most data points in it. That way a lac with a cell hit will
        # have two entries and win over a lac with only the lac entry.
        lac = sorted(lacs.values(), key=sort_lac, reverse=True)

        for cell in lac[0]:
            # The first entry is the key,
            # used only to distinguish cell from lac
            network = Network(key=None,
                              lat=cell.lat,
                              lon=cell.lon,
                              range=cell.range)
            if type(cell) is CellArea:
                validated['cell_lac_network'].append(network)
            else:
                validated['cell_network'].append(network)

    # Always do a GeoIP lookup because it is cheap and we want to
    # report geoip vs. other data mismatches. We may also use
    # the full GeoIP City-level estimate as well, if all else fails.
    (geoip_res,
     countries) = geoip_and_best_guess_country_codes(validated['cell'],
                                                     api_name, client_addr,
                                                     geoip_db, stats_client)

    # First we attempt a "zoom-in" from cell-lac, to cell
    # to wifi, tightening our estimate each step only so
    # long as it doesn't contradict the existing best-estimate
    # nor the possible countries of origin.

    for (data_field, object_field, metric_name, search_fn) in [
        ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac),
        ('cell', 'cell_network', 'cell', search_cell),
        ('wifi', 'wifi', 'wifi', search_wifi)
    ]:

        if validated[data_field]:
            r = None
            try:
                r = search_fn(session, validated[object_field], stats_client,
                              api_name)
            except Exception:
                heka_client.raven(RAVEN_ERROR)
                stats_client.incr('%s.%s_error' % (api_name, metric_name))

            if r is None:
                stats_client.incr('%s.no_%s_found' % (api_name, metric_name))

            else:
                lat = float(r['lat'])
                lon = float(r['lon'])

                stats_client.incr('%s.%s_found' % (api_name, metric_name))

                # Skip any hit that matches none of the possible countries.
                country_match = False
                for country in countries:
                    if location_is_in_country(lat, lon, country, 1):
                        country_match = True
                        break

                if countries and not country_match:
                    stats_client.incr('%s.anomaly.%s_country_mismatch' %
                                      (api_name, metric_name))

                # Always accept the first result we get.
                if result is None:
                    result = r
                    result_metric = metric_name

                # Or any result that appears to be an improvement over the
                # existing best guess.
                elif (distance(float(result['lat']), float(result['lon']), lat,
                               lon) * 1000 <= result['accuracy']):
                    result = r
                    result_metric = metric_name

                else:
                    stats_client.incr('%s.anomaly.%s_%s_mismatch' %
                                      (api_name, metric_name, result_metric))

    # Fall back to GeoIP if nothing has worked yet. We do not
    # include this in the "zoom-in" loop because GeoIP is
    # frequently _wrong_ at the city level; we only want to
    # accept that estimate if we got nothing better from cell
    # or wifi.
    if not result and geoip_res:
        result = geoip_res
        result_metric = 'geoip'

    # Do detailed logging for some api keys
    if api_key_log and api_key_name:
        api_log_metric = None
        wifi_keys = set([w['key'] for w in validated['wifi']])
        if wifi_keys and \
           len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY:
            # Only count requests as WiFi-based if they contain enough
            # distinct WiFi networks to pass our filters
            if result_metric == 'wifi':
                api_log_metric = 'wifi_hit'
            else:
                api_log_metric = 'wifi_miss'
        elif validated['cell']:
            if result_metric == 'cell':
                api_log_metric = 'cell_hit'
            elif result_metric == 'cell_lac':
                api_log_metric = 'cell_lac_hit'
            else:
                api_log_metric = 'cell_miss'
        else:
            if geoip_res:
                api_log_metric = 'geoip_hit'
            else:
                api_log_metric = 'geoip_miss'
        if api_log_metric:
            stats_client.incr('%s.api_log.%s.%s' %
                              (api_name, api_key_name, api_log_metric))

    if not result:
        stats_client.incr('%s.miss' % api_name)
        return None

    stats_client.incr('%s.%s_hit' % (api_name, result_metric))

    if result_type == 'position':
        rounded_result = {
            'lat': round(result['lat'], DEGREE_DECIMAL_PLACES),
            'lon': round(result['lon'], DEGREE_DECIMAL_PLACES),
            'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES),
        }
        stats_client.timing('%s.accuracy.%s' % (api_name, result_metric),
                            rounded_result['accuracy'])
        return rounded_result
    elif result_type == 'country':
        if countries:
            country = iso3166.countries.get(countries[0])
            return {
                'country_name': country.name,
                'country_code': country.alpha2
            }
Exemple #31
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                *join_cellkey(Cell, cell_key))
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            *join_cellkey(Cell, cell_key))
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures