def blocklist_stations(self, moving): moving_keys = [] new_block_values = [] for station_key, block in moving: moving_keys.append(station_key) if block: block.time = self.utcnow block.count += 1 else: block_key = CellBlocklist.to_hashkey(station_key) new_block_values.append(dict( time=self.utcnow, count=1, **block_key.__dict__ )) if new_block_values: # do a batch insert of new blocks stmt = CellBlocklist.__table__.insert( mysql_on_duplicate='time = time' # no-op ) # but limit the batch depending on each model ins_batch = CellBlocklist._insert_batch for i in range(0, len(new_block_values), ins_batch): batch_values = new_block_values[i:i + ins_batch] self.session.execute(stmt.values(batch_values)) if moving_keys: self.stats_client.incr( 'data.station.blocklist', len(moving_keys), tags=['type:%s' % self.station_type, 'action:add', 'reason:moving']) self.remove_task.delay(moving_keys)
def new_stations(self, name, station_keys): if len(station_keys) == 0: return 0 # assume all stations are unknown unknown_keys = set(station_keys) if name == 'wifi': # there is only one combined table structure shards = defaultdict(list) for mac in unknown_keys: shards[WifiShard.shard_model(mac)].append(mac) for shard, macs in shards.items(): query = (self.session.query(shard.mac) .filter(shard.mac.in_(macs))) unknown_keys -= set([r.mac for r in query.all()]) elif name == 'cell': # first check the station table, which is more likely to contain # stations station_iter = Cell.iterkeys( self.session, list(unknown_keys), # only load the columns required for the hashkey extra=lambda query: query.options( load_only(*tuple(Cell._hashkey_cls._fields)))) # subtract all stations which are found in the station table unknown_keys -= set([sta.hashkey() for sta in station_iter]) if len(unknown_keys) == 0: # pragma: no cover return 0 # Only check the blocklist table for the still unknown keys. # There is no need to check for the already found keys again. block_iter = CellBlocklist.iterkeys( self.session, list(unknown_keys), # only load the columns required for the hashkey extra=lambda query: query.options( load_only(*tuple(CellBlocklist._hashkey_cls._fields)))) # subtract all stations which are found in the blocklist table unknown_keys -= set([block.hashkey() for block in block_iter]) return len(unknown_keys)
def blocklisted_stations(self, station_keys): blocklist = {} for block in CellBlocklist.iterkeys( self.session, list(station_keys)): blocklist[block.hashkey()] = self.blocklisted_station(block) return blocklist