def update_cell_measure_count(cell_key, count, utcnow, session): # only update data for complete record if cell_key.radio < 0 or cell_key.mcc < 1 or cell_key.mnc < 0 or \ cell_key.lac < 0 or cell_key.cid < 0: # NOQA return 0 # check cell blacklist query = session.query(CellBlacklist).filter( *join_cellkey(CellBlacklist, cell_key)) b = query.first() if b is not None: return 0 # do we already know about this cell? query = session.query(Cell).filter( *join_cellkey(Cell, cell_key)).filter( Cell.psc == cell_key.psc ) cell = query.first() new_cell = 0 if cell is None: new_cell = 1 stmt = Cell.__table__.insert( on_duplicate='new_measures = new_measures + %s, ' 'total_measures = total_measures + %s' % (count, count) ).values( created=utcnow, radio=cell_key.radio, mcc=cell_key.mcc, mnc=cell_key.mnc, lac=cell_key.lac, cid=cell_key.cid, psc=cell_key.psc, new_measures=count, total_measures=count) session.execute(stmt) return new_cell
def search_cell(session, data): radio = RADIO_TYPE.get(data['radio'], -1) cells = [] for cell in data['cell']: cell = normalized_cell_dict(cell, default_radio=radio) if not cell: continue key = to_cellkey(cell) query = session.query(Cell.lat, Cell.lon, Cell.range).filter( *join_cellkey(Cell, key)).filter( Cell.lat.isnot(None)).filter( Cell.lon.isnot(None) ) result = query.first() if result is not None: cells.append(Network(key, *result)) if not cells: return length = len(cells) avg_lat = sum([c.lat for c in cells]) / length avg_lon = sum([c.lon for c in cells]) / length return { 'lat': quantize(avg_lat), 'lon': quantize(avg_lon), 'accuracy': estimate_accuracy(avg_lat, avg_lon, cells, CELL_MIN_ACCURACY), }
def search_cell_lac(session, data): radio = RADIO_TYPE.get(data['radio'], -1) lacs = [] for cell in data['cell']: cell = normalized_cell_dict(cell, default_radio=radio) if not cell: continue cell['cid'] = CELLID_LAC key = to_cellkey(cell) query = session.query(Cell.lat, Cell.lon, Cell.range).filter( *join_cellkey(Cell, key)).filter( Cell.lat.isnot(None)).filter( Cell.lon.isnot(None) ) result = query.first() if result is not None: lacs.append(Network(key, *result)) if not lacs: return # take the smallest LAC of any the user is inside lac = sorted(lacs, key=operator.attrgetter('range'))[0] return { 'lat': quantize(lac.lat), 'lon': quantize(lac.lon), 'accuracy': max(LAC_MIN_ACCURACY, lac.range), }
def cell_location_update(self, min_new=10, max_new=100, batch=10): try: utcnow = util.utcnow() cells = [] with self.db_session() as session: emit_new_measures_metric(self.stats_client, session, self.shortname, Cell, min_new, max_new) query = session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).filter( Cell.cid != CELLID_LAC).limit(batch) cells = query.all() if not cells: return 0 moving_cells = set() updated_lacs = defaultdict(list) for cell in cells: # skip cells with a missing lac/cid # or virtual LAC cells if cell.lac == -1 or cell.cid == -1 or \ cell.cid == CELLID_LAC: continue query = session.query( CellMeasure.lat, CellMeasure.lon, CellMeasure.id).filter( *join_cellkey(CellMeasure, cell)) # only take the last X new_measures query = query.order_by( CellMeasure.created.desc()).limit( cell.new_measures) measures = query.all() if measures: moving = calculate_new_position( cell, measures, CELL_MAX_DIST_KM, backfill=False) if moving: moving_cells.add(cell) updated_lacs[CellKey(cell.radio, cell.mcc, cell.mnc, cell.lac, CELLID_LAC)].append(cell) if updated_lacs: update_enclosing_lacs(session, updated_lacs, moving_cells, utcnow) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def cell_unthrottle_measures(self, max_measures, batch=100): with self.db_session() as session: join_measure = lambda u: join_cellkey(CellMeasure, u) n = unthrottle_measures(session=session, station_model=Cell, measure_model=CellMeasure, join_measure=join_measure, max_measures=max_measures, batch=batch) self.stats_client.incr("items.cell_unthrottled", n)
def location_update_cell(self, min_new=10, max_new=100, batch=10): try: cells = [] redis_client = self.app.redis_client with self.db_session() as session: emit_new_measures_metric(self.stats_client, session, self.shortname, Cell, min_new, max_new) query = session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).filter( Cell.cid != CELLID_LAC).limit(batch) cells = query.all() if not cells: return 0 moving_cells = set() updated_lacs = set() for cell in cells: # skip cells with a missing lac/cid # or virtual LAC cells if cell.lac == -1 or cell.cid == -1 or \ cell.cid == CELLID_LAC: # pragma: no cover continue query = session.query( CellMeasure.lat, CellMeasure.lon, CellMeasure.id).filter(*join_cellkey(CellMeasure, cell)) # only take the last X new_measures query = query.order_by(CellMeasure.created.desc()).limit( cell.new_measures) measures = query.all() if measures: moving = calculate_new_position(cell, measures, CELL_MAX_DIST_KM) if moving: moving_cells.add(cell) updated_lacs.add( CellKey(cell.radio, cell.mcc, cell.mnc, cell.lac, CELLID_LAC)) if updated_lacs: session.on_post_commit(enqueue_lacs, redis_client, updated_lacs) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def cell_unthrottle_measures(self, max_measures, batch=100): try: with self.db_session() as session: join_measure = lambda u: join_cellkey(CellMeasure, u) n = unthrottle_measures(session=session, station_model=Cell, measure_model=CellMeasure, join_measure=join_measure, max_measures=max_measures, batch=batch) self.stats_client.incr("items.cell_unthrottled", n) except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def remove_cell(self, cell_keys): cells_removed = 0 try: with self.db_session() as session: for k in cell_keys: key = to_cellkey(k) query = session.query(Cell).filter(*join_cellkey(Cell, key)) cells_removed += query.delete(synchronize_session=False) session.commit() return cells_removed except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def mark_moving_cells(session, moving_cells): moving_keys = [] blacklist = set() for cell in moving_cells: query = session.query(CellBlacklist).filter( *join_cellkey(CellBlacklist, cell)) b = query.first() if b is None: key = to_cellkey(cell)._asdict() blacklist.add(CellBlacklist(**key)) moving_keys.append(key) get_heka_client().incr("items.blacklisted.cell_moving", len(moving_keys)) session.add_all(blacklist) remove_cell.delay(moving_keys)
def backfill_cell_location_update(self, new_cell_measures): try: utcnow = util.utcnow() cells = [] moving_cells = set() updated_lacs = defaultdict(list) new_cell_measures = dict(new_cell_measures) with self.db_session() as session: for tower_tuple, cell_measure_ids in new_cell_measures.items(): query = session.query(Cell).filter( *join_cellkey(Cell, CellKey(*tower_tuple))) cells = query.all() if not cells: # This case shouldn't actually occur. The # backfill_cell_location_update is only called # when CellMeasure records are matched against # known Cell records. continue for cell in cells: measures = session.query( # NOQA CellMeasure.lat, CellMeasure.lon).filter( CellMeasure.id.in_(cell_measure_ids)).all() if measures: moving = calculate_new_position( cell, measures, CELL_MAX_DIST_KM, backfill=True) if moving: moving_cells.add(cell) updated_lacs[CellKey(cell.radio, cell.mcc, cell.mnc, cell.lac, CELLID_LAC)].append(cell) if updated_lacs: update_enclosing_lacs(session, updated_lacs, moving_cells, utcnow) if moving_cells: # some cells found to be moving too much blacklist_and_remove_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def cell_trim_excessive_data(self, max_measures, min_age_days=7, batch=10): try: with self.db_session() as session: join_measure = lambda u: join_cellkey(CellMeasure, u) n = trim_excessive_data(session=session, unique_model=Cell, measure_model=CellMeasure, join_measure=join_measure, delstat='deleted_cell', max_measures=max_measures, min_age_days=min_age_days, batch=batch) self.heka_client.incr("items.dropped.cell_trim_excessive", n) except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def query_cell_table(model, session, cell_keys): cell_filter = [] for key in cell_keys: # create a list of 'and' criteria for cell keys criterion = join_cellkey(model, key) cell_filter.append(and_(*criterion)) # Keep the cid to distinguish cell from lac later on query = session.query(model.radio, model.mcc, model.mnc, model.lac, model.cid, model.lat, model.lon, model.range).filter(or_(*cell_filter)).filter( model.lat.isnot(None)).filter( model.lon.isnot(None)) return query.all()
def cell_location_update(self, min_new=10, max_new=100, batch=10): try: cells = [] with self.db_session() as session: query = session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).filter( Cell.cid != CELLID_LAC).limit(batch) cells = query.all() if not cells: return 0 moving_cells = set() for cell in cells: # skip cells with a missing lac/cid # or virtual LAC cells if cell.lac == -1 or cell.cid == -1 or \ cell.cid == CELLID_LAC: continue query = session.query( CellMeasure.lat, CellMeasure.lon, CellMeasure.id).filter( *join_cellkey(CellMeasure, cell)) # only take the last X new_measures query = query.order_by( CellMeasure.created.desc()).limit( cell.new_measures) measures = query.all() if measures: calculate_new_position(cell, measures, moving_cells, CELL_MAX_DIST_KM, backfill=False) update_enclosing_lac(session, cell) if moving_cells: # some cells found to be moving too much mark_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def cell_location_update(self, min_new=10, max_new=100, batch=10): try: cells = [] with self.db_session() as session: query = session.query(Cell).filter( Cell.new_measures >= min_new).filter( Cell.new_measures < max_new).filter( Cell.cid != CELLID_LAC).limit(batch) cells = query.all() if not cells: return 0 moving_cells = set() for cell in cells: # skip cells with a missing lac/cid # or virtual LAC cells if cell.lac == -1 or cell.cid == -1 or \ cell.cid == CELLID_LAC: continue query = session.query( CellMeasure.lat, CellMeasure.lon).filter(*join_cellkey(CellMeasure, cell)) # only take the last X new_measures query = query.order_by(CellMeasure.created.desc()).limit( cell.new_measures) measures = query.all() if measures: calculate_new_position(cell, measures, moving_cells, CELL_MAX_DIST_KM, backfill=False) update_enclosing_lac(session, cell) if moving_cells: # some cells found to be moving too much mark_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def query_cell_table(model, session, cell_keys): cell_filter = [] for key in cell_keys: # create a list of 'and' criteria for cell keys criterion = join_cellkey(model, key) cell_filter.append(and_(*criterion)) # Keep the cid to distinguish cell from lac later on query = session.query( model.radio, model.mcc, model.mnc, model.lac, model.cid, model.lat, model.lon, model.range).filter( or_(*cell_filter)).filter( model.lat.isnot(None)).filter( model.lon.isnot(None)) return query.all()
def query_cell_networks(session, cell_keys): if not cell_keys: return [] cell_filter = [] for key in cell_keys: # create a list of 'and' criteria for cell keys criterion = join_cellkey(Cell, key) cell_filter.append(and_(*criterion)) # Keep the cid to distinguish cell from lac later on query = session.query( Cell.radio, Cell.mcc, Cell.mnc, Cell.lac, Cell.cid, Cell.lat, Cell.lon, Cell.range).filter( or_(*cell_filter)).filter( Cell.lat.isnot(None)).filter( Cell.lon.isnot(None)) result = query.all() if not result: return [] # Group all results by location area lacs = defaultdict(list) for cell in result: lacs[cell[:4]].append(cell) def sort_lac(v): # use the lac with the most values, or the one with the smallest range return (len(v), -min([e[-1] for e in v])) # If we get data from multiple location areas, use the one with the # most data points in it. That way a lac with a cell hit will # have two entries and win over a lac with only the lac entry. lac = sorted(lacs.values(), key=sort_lac, reverse=True) cells = [] for cell in lac[0]: # The first entry is the key, used only to distinguish cell from lac cells.append(Network(*cell[4:])) return cells
def remove_cell(self, cell_keys): cells_removed = 0 try: with self.db_session() as session: changed_lacs = set() for k in cell_keys: key = to_cellkey(k) query = session.query(Cell).filter(*join_cellkey(Cell, key)) cells_removed += query.delete() changed_lacs.add(key._replace(cid=CELLID_LAC)) for key in changed_lacs: # Either schedule an update to the enclosing LAC or, if # we just removed the last cell in the LAC, remove the LAC # entirely. query = session.query(Cell).filter( Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid != CELLID_LAC) n = query.count() query = session.query(Cell).filter( Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid == CELLID_LAC) if n < 1: query.delete() else: lac = query.first() if lac is not None: lac.new_measures += 1 session.commit() return cells_removed except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def backfill_cell_location_update(self, new_cell_measures): try: cells = [] new_cell_measures = dict(new_cell_measures) with self.db_session() as session: for tower_tuple, cell_measure_ids in new_cell_measures.items(): query = session.query(Cell).filter( *join_cellkey(Cell, CellKey(*tower_tuple))) cells = query.all() if not cells: # This case shouldn't actually occur. The # backfill_cell_location_update is only called # when CellMeasure records are matched against # known Cell records. continue moving_cells = set() for cell in cells: measures = session.query( # NOQA CellMeasure.lat, CellMeasure.lon).filter( CellMeasure.id.in_(cell_measure_ids)).all() if measures: calculate_new_position(cell, measures, moving_cells, CELL_MAX_DIST_KM, backfill=True) update_enclosing_lac(session, cell) if moving_cells: # some cells found to be moving too much mark_moving_cells(session, moving_cells) session.commit() return (len(cells), len(moving_cells)) except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def remove_cell(self, cell_keys): cells_removed = 0 try: with self.db_session() as session: for k in cell_keys: key = to_cellkey(k) query = session.query(Cell).filter(*join_cellkey(Cell, key)) cells_removed += query.delete() # Either schedule an update to the enclosing LAC or, if # we just removed the last cell in the LAC, remove the LAC # entirely. query = session.query(func.count(Cell.id)).filter( Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid != CELLID_LAC) c = query.first() assert c is not None n = int(c[0]) query = session.query(Cell).filter( Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid == CELLID_LAC) if n < 1: query.delete() else: query.update({'new_measures': '1'}) session.commit() return cells_removed except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def remove_cell(self, cell_keys): try: cells_removed = 0 redis_client = self.app.redis_client with self.db_session() as session: changed_lacs = set() for k in cell_keys: key = to_cellkey(k) query = session.query(Cell).filter(*join_cellkey(Cell, key)) cells_removed += query.delete() changed_lacs.add(key._replace(cid=CELLID_LAC)) if changed_lacs: session.on_post_commit(enqueue_lacs, redis_client, changed_lacs) session.commit() return cells_removed except Exception as exc: # pragma: no cover self.heka_client.raven('error') raise self.retry(exc=exc)
def update_enclosing_lacs(session, lacs, moving_cells, utcnow): moving_cell_ids = set([c.id for c in moving_cells]) for lac_key, cells in lacs.items(): if len(set([c.id for c in cells]) - moving_cell_ids) == 0: # All new cells are about to be removed, so don't bother # updating the lac continue q = session.query(Cell).filter(*join_cellkey(Cell, lac_key)) lac = q.first() if lac is not None: lac.new_measures += 1 else: lac = Cell( radio=lac_key.radio, mcc=lac_key.mcc, mnc=lac_key.mnc, lac=lac_key.lac, cid=lac_key.cid, new_measures=1, total_measures=0, created=utcnow, ) session.add(lac)
def remove_cell(self, cell_keys): cells_removed = 0 try: with self.db_session() as session: for k in cell_keys: key = to_cellkey(k) query = session.query(Cell).filter(*join_cellkey(Cell, key)) cells_removed += query.delete() # Either schedule an update to the enclosing LAC or, if # we just removed the last cell in the LAC, remove the LAC # entirely. query = session.query(func.count(Cell.id)).filter( Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid != CELLID_LAC) c = query.first() assert c is not None n = int(c[0]) query = session.query(Cell).filter(Cell.radio == key.radio, Cell.mcc == key.mcc, Cell.mnc == key.mnc, Cell.lac == key.lac, Cell.cid == CELLID_LAC) if n < 1: query.delete() else: query.update({'new_measures': '1'}) session.commit() return cells_removed except IntegrityError as exc: # pragma: no cover self.heka_client.raven('error') return 0 except Exception as exc: # pragma: no cover raise self.retry(exc=exc)
def process_cell_measures(session, entries, userid=None, max_measures_per_cell=11000): cell_count = defaultdict(int) cell_measures = [] utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) dropped_malformed = 0 dropped_overflow = 0 space_available = {} # process entries for entry in entries: cell_measure = create_cell_measure(utcnow, entry) if not cell_measure: dropped_malformed += 1 continue cell_key = to_cellkey_psc(cell_measure) # check if there's space for new measurement within per-cell maximum # note: old measures gradually expire, so this is an intake-rate limit if cell_key not in space_available: query = session.query( Cell.total_measures).filter(*join_cellkey(Cell, cell_key)) curr = query.first() if curr is not None: space_available[cell_key] = max_measures_per_cell - curr[0] else: space_available[cell_key] = max_measures_per_cell if space_available[cell_key] > 0: space_available[cell_key] -= 1 else: dropped_overflow += 1 continue # Possibly drop measure if we're receiving them too # quickly for this cell. query = session.query( Cell.total_measures).filter(*join_cellkey(Cell, cell_key)) total_measures = query.first() if total_measures is not None: if total_measures[0] > max_measures_per_cell: dropped_overflow += 1 continue cell_measures.append(cell_measure) # group per unique cell cell_count[cell_key] += 1 heka_client = get_heka_client() if dropped_malformed != 0: heka_client.incr("items.dropped.cell_ingress_malformed", count=dropped_malformed) if dropped_overflow != 0: heka_client.incr("items.dropped.cell_ingress_overflow", count=dropped_overflow) # update new/total measure counts new_cells = 0 for cell_key, count in cell_count.items(): new_cells += update_cell_measure_count(cell_key, count, utcnow, session) # update user score if userid is not None and new_cells > 0: process_score(userid, new_cells, session, key='new_cell') heka_client.incr("items.inserted.cell_measures", count=len(cell_measures)) session.add_all(cell_measures) return cell_measures
def search_all_sources(session, api_name, data, client_addr=None, geoip_db=None, api_key_log=False, api_key_name=None, result_type='position'): """ Common code-path for all lookup APIs, using WiFi, cell, cell-lac and GeoIP data sources. :param session: A database session for queries. :param api_name: A string to use in metrics (for example "geolocate"). :param data: A dict conforming to the search API. :param client_addr: The IP address the request came from. :param geoip_db: The geoip database. :param api_key_log: Enable additional api key specific logging? :param api_key_name: The metric friendly api key name. :param result_type: What kind of result to return, either a lat/lon position or a country estimate. """ if result_type not in ('country', 'position'): raise ValueError('Invalid result_type, must be one of ' 'position or country') stats_client = get_stats_client() heka_client = get_heka_client() result = None result_metric = None validated = { 'wifi': [], 'cell': [], 'cell_lac': set(), 'cell_network': [], 'cell_lac_network': [], } # Pre-process wifi data for wifi in data.get('wifi', ()): wifi = normalized_wifi_dict(wifi) if wifi: validated['wifi'].append(wifi) # Pre-process cell data radio = RADIO_TYPE.get(data.get('radio', ''), -1) for cell in data.get('cell', ()): cell = normalized_cell_dict(cell, default_radio=radio) if cell: cell_key = to_cellkey(cell) validated['cell'].append(cell_key) validated['cell_lac'].add(cell_key) found_cells = [] # Query all cells and OCID cells for model in Cell, OCIDCell, CellArea: cell_filter = [] for key in validated['cell']: # create a list of 'and' criteria for cell keys criterion = join_cellkey(model, key) cell_filter.append(and_(*criterion)) if cell_filter: # only do a query if we have cell results, or this will match # all rows in the table load_fields = ('radio', 'mcc', 'mnc', 'lac', 'lat', 'lon', 'range') query = (session.query(model).options( load_only(*load_fields)).filter(or_(*cell_filter)).filter( model.lat.isnot(None)).filter(model.lon.isnot(None))) try: found_cells.extend(query.all()) except Exception: heka_client.raven(RAVEN_ERROR) if found_cells: # Group all found_cellss by location area lacs = defaultdict(list) for cell in found_cells: cellarea_key = (cell.radio, cell.mcc, cell.mnc, cell.lac) lacs[cellarea_key].append(cell) def sort_lac(v): # use the lac with the most values, # or the one with the smallest range return (len(v), -min([e.range for e in v])) # If we get data from multiple location areas, use the one with the # most data points in it. That way a lac with a cell hit will # have two entries and win over a lac with only the lac entry. lac = sorted(lacs.values(), key=sort_lac, reverse=True) for cell in lac[0]: # The first entry is the key, # used only to distinguish cell from lac network = Network(key=None, lat=cell.lat, lon=cell.lon, range=cell.range) if type(cell) is CellArea: validated['cell_lac_network'].append(network) else: validated['cell_network'].append(network) # Always do a GeoIP lookup because it is cheap and we want to # report geoip vs. other data mismatches. We may also use # the full GeoIP City-level estimate as well, if all else fails. (geoip_res, countries) = geoip_and_best_guess_country_codes(validated['cell'], api_name, client_addr, geoip_db, stats_client) # First we attempt a "zoom-in" from cell-lac, to cell # to wifi, tightening our estimate each step only so # long as it doesn't contradict the existing best-estimate # nor the possible countries of origin. for (data_field, object_field, metric_name, search_fn) in [ ('cell_lac', 'cell_lac_network', 'cell_lac', search_cell_lac), ('cell', 'cell_network', 'cell', search_cell), ('wifi', 'wifi', 'wifi', search_wifi) ]: if validated[data_field]: r = None try: r = search_fn(session, validated[object_field], stats_client, api_name) except Exception: heka_client.raven(RAVEN_ERROR) stats_client.incr('%s.%s_error' % (api_name, metric_name)) if r is None: stats_client.incr('%s.no_%s_found' % (api_name, metric_name)) else: lat = float(r['lat']) lon = float(r['lon']) stats_client.incr('%s.%s_found' % (api_name, metric_name)) # Skip any hit that matches none of the possible countries. country_match = False for country in countries: if location_is_in_country(lat, lon, country, 1): country_match = True break if countries and not country_match: stats_client.incr('%s.anomaly.%s_country_mismatch' % (api_name, metric_name)) # Always accept the first result we get. if result is None: result = r result_metric = metric_name # Or any result that appears to be an improvement over the # existing best guess. elif (distance(float(result['lat']), float(result['lon']), lat, lon) * 1000 <= result['accuracy']): result = r result_metric = metric_name else: stats_client.incr('%s.anomaly.%s_%s_mismatch' % (api_name, metric_name, result_metric)) # Fall back to GeoIP if nothing has worked yet. We do not # include this in the "zoom-in" loop because GeoIP is # frequently _wrong_ at the city level; we only want to # accept that estimate if we got nothing better from cell # or wifi. if not result and geoip_res: result = geoip_res result_metric = 'geoip' # Do detailed logging for some api keys if api_key_log and api_key_name: api_log_metric = None wifi_keys = set([w['key'] for w in validated['wifi']]) if wifi_keys and \ len(filter_bssids_by_similarity(wifi_keys)) >= MIN_WIFIS_IN_QUERY: # Only count requests as WiFi-based if they contain enough # distinct WiFi networks to pass our filters if result_metric == 'wifi': api_log_metric = 'wifi_hit' else: api_log_metric = 'wifi_miss' elif validated['cell']: if result_metric == 'cell': api_log_metric = 'cell_hit' elif result_metric == 'cell_lac': api_log_metric = 'cell_lac_hit' else: api_log_metric = 'cell_miss' else: if geoip_res: api_log_metric = 'geoip_hit' else: api_log_metric = 'geoip_miss' if api_log_metric: stats_client.incr('%s.api_log.%s.%s' % (api_name, api_key_name, api_log_metric)) if not result: stats_client.incr('%s.miss' % api_name) return None stats_client.incr('%s.%s_hit' % (api_name, result_metric)) if result_type == 'position': rounded_result = { 'lat': round(result['lat'], DEGREE_DECIMAL_PLACES), 'lon': round(result['lon'], DEGREE_DECIMAL_PLACES), 'accuracy': round(result['accuracy'], DEGREE_DECIMAL_PLACES), } stats_client.timing('%s.accuracy.%s' % (api_name, result_metric), rounded_result['accuracy']) return rounded_result elif result_type == 'country': if countries: country = iso3166.countries.get(countries[0]) return { 'country_name': country.name, 'country_code': country.alpha2 }
def process_cell_measures(session, entries, userid=None, max_measures_per_cell=11000): cell_count = defaultdict(int) cell_measures = [] utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) dropped_malformed = 0 dropped_overflow = 0 space_available = {} # process entries for entry in entries: cell_measure = create_cell_measure(utcnow, entry) if not cell_measure: dropped_malformed += 1 continue cell_key = to_cellkey_psc(cell_measure) # check if there's space for new measurement within per-cell maximum # note: old measures gradually expire, so this is an intake-rate limit if cell_key not in space_available: query = session.query(Cell.total_measures).filter( *join_cellkey(Cell, cell_key)) curr = query.first() if curr is not None: space_available[cell_key] = max_measures_per_cell - curr[0] else: space_available[cell_key] = max_measures_per_cell if space_available[cell_key] > 0: space_available[cell_key] -= 1 else: dropped_overflow += 1 continue # Possibly drop measure if we're receiving them too # quickly for this cell. query = session.query(Cell.total_measures).filter( *join_cellkey(Cell, cell_key)) total_measures = query.first() if total_measures is not None: if total_measures[0] > max_measures_per_cell: dropped_overflow += 1 continue cell_measures.append(cell_measure) # group per unique cell cell_count[cell_key] += 1 heka_client = get_heka_client() if dropped_malformed != 0: heka_client.incr("items.dropped.cell_ingress_malformed", count=dropped_malformed) if dropped_overflow != 0: heka_client.incr("items.dropped.cell_ingress_overflow", count=dropped_overflow) # update new/total measure counts new_cells = 0 for cell_key, count in cell_count.items(): new_cells += update_cell_measure_count( cell_key, count, utcnow, session) # update user score if userid is not None and new_cells > 0: process_score(userid, new_cells, session, key='new_cell') heka_client.incr("items.inserted.cell_measures", count=len(cell_measures)) session.add_all(cell_measures) return cell_measures