def test_grid_bytes(self): lat = 12000 lon = 34000 grid = encode_datamap_grid(lat, lon) model = DataMap.shard_model(lat, lon) self.session.add(model(grid=grid)) self.session.flush() result = self.session.query(model).first() self.assertEqual(result.grid, (lat, lon))
def _queue(self, pairs): grids = defaultdict(list) for lat, lon in pairs: lat, lon = DataMap.scale(lat, lon) shard_id = DataMap.shard_id(lat, lon) grids[shard_id].append(encode_datamap_grid(lat, lon)) for shard_id, values in grids.items(): queue = self.celery_app.data_queues['update_datamap_' + shard_id] queue.enqueue(list(values))
def _queue(self, pairs): grids = defaultdict(list) for lat, lon in pairs: lat, lon = DataMap.scale(lat, lon) shard_id = DataMap.shard_id(lat, lon) grids[shard_id].append(encode_datamap_grid(lat, lon)) for shard_id, values in grids.items(): queue = self.celery_app.data_queues['update_datamap_' + shard_id] queue.enqueue(list(values), json=False)
def __call__(self, batch=1000): queue = self.task.app.data_queues['update_datamap_' + self.shard_id] today = util.utcnow().date() grids = queue.dequeue(batch=batch, json=False) grids = list(set(grids)) if not grids or not self.shard: return 0 load_fields = ('grid', 'modified') rows = (self.session.query(self.shard) .filter(self.shard.grid.in_(grids)) .options(load_only(*load_fields))).all() outdated = set() skip = set() for row in rows: grid = encode_datamap_grid(*row.grid) if row.modified == today: skip.add(grid) else: outdated.add(grid) new_values = [] update_values = [] for grid in grids: if grid in skip: continue elif grid in outdated: update_values.append( {'grid': grid, 'modified': today}) else: new_values.append( {'grid': grid, 'created': today, 'modified': today}) if new_values: # do a batch insert of new grids stmt = self.shard.__table__.insert( mysql_on_duplicate='modified = modified' # no-op ) self.session.execute(stmt.values(new_values)) if update_values: # do a batch update of grids self.session.bulk_update_mappings(self.shard, update_values) if queue.enough_data(batch=batch): self.task.apply_async( kwargs={'batch': batch, 'shard_id': self.shard_id}, countdown=2, expires=10) return len(grids)
def process_datamap(self, pipe, positions): grids = set() for lat, lon in positions: if lat is not None and lon is not None: grids.add(DataMap.scale(lat, lon)) shards = defaultdict(set) for lat, lon in grids: shards[DataMap.shard_id(lat, lon)].add(encode_datamap_grid(lat, lon)) for shard_id, values in shards.items(): queue = self.task.app.data_queues['update_datamap_' + shard_id] queue.enqueue(list(values), pipe=pipe)
def process_datamap(self, pipe, positions): grids = set() for lat, lon in positions: if lat is not None and lon is not None: grids.add(DataMap.scale(lat, lon)) shards = defaultdict(set) for lat, lon in grids: shards[DataMap.shard_id(lat, lon)].add( encode_datamap_grid(lat, lon)) for shard_id, values in shards.items(): queue = self.task.app.data_queues['update_datamap_' + shard_id] queue.enqueue(list(values), pipe=pipe)
def test_encode_datamap_grid(self): assert ( encode_datamap_grid(-90000, -180000) == b"\x00\x00\x00\x00\x00\x00\x00\x00" ) assert encode_datamap_grid(-90000, -180000, codec="base64") == b"AAAAAAAAAAA=" assert encode_datamap_grid(0, 0) == b"\x00\x01_\x90\x00\x02\xbf " assert encode_datamap_grid(0, 0, codec="base64") == b"AAFfkAACvyA=" assert ( encode_datamap_grid(90.0, 180.0, scale=True) == b"\x00\x02\xbf \x00\x05~@" ) assert encode_datamap_grid(90000, 180000) == b"\x00\x02\xbf \x00\x05~@" assert encode_datamap_grid(90000, 180000, codec="base64") == b"AAK/IAAFfkA="
def test_encode_datamap_grid(self): assert (encode_datamap_grid( -90000, -180000) == b'\x00\x00\x00\x00\x00\x00\x00\x00') assert (encode_datamap_grid( -90000, -180000, codec='base64') == b'AAAAAAAAAAA=') assert (encode_datamap_grid(0, 0) == b'\x00\x01_\x90\x00\x02\xbf ') assert (encode_datamap_grid(0, 0, codec='base64') == b'AAFfkAACvyA=') assert (encode_datamap_grid( 90.0, 180.0, scale=True) == b'\x00\x02\xbf \x00\x05~@') assert (encode_datamap_grid( 90000, 180000) == b'\x00\x02\xbf \x00\x05~@') assert (encode_datamap_grid( 90000, 180000, codec='base64') == b'AAK/IAAFfkA=')
def _update_shards(self, session, grids): today = util.utcnow().date() load_fields = ('grid', 'modified') rows = (session.query( self.shard).filter(self.shard.grid.in_(grids)).options( load_only(*load_fields))).all() outdated = set() skip = set() for row in rows: grid = encode_datamap_grid(*row.grid) if row.modified == today: skip.add(grid) else: outdated.add(grid) new_values = [] update_values = [] for grid in grids: if grid in skip: continue elif grid in outdated: update_values.append({'grid': grid, 'modified': today}) else: new_values.append({ 'grid': grid, 'created': today, 'modified': today }) if new_values: # do a batch insert of new grids stmt = self.shard.__table__.insert( mysql_on_duplicate='modified = modified' # no-op ) session.execute(stmt.values(new_values)) if update_values: # do a batch update of grids session.bulk_update_mappings(self.shard, update_values)
def _update_shards_with_session(self, session, grids): today = util.utcnow().date() rows = session.execute( select([self.shard_table.c.grid, self.shard_table.c.modified ]).where(self.shard_table.c.grid.in_( grids)).with_for_update()).fetchall() outdated = set() skip = set() for row in rows: grid = encode_datamap_grid(*row.grid) if row.modified == today: skip.add(grid) else: outdated.add(grid) new_values = [] update_values = [] for grid in grids: if grid in skip: continue elif grid in outdated: update_values.append({"grid": grid, "modified": today}) else: new_values.append({ "grid": grid, "created": today, "modified": today }) if new_values: # do a batch insert of new grids session.execute( self.shard.__table__.insert().values(new_values) # If there was an unexpected insert, log warning instead of error .prefix_with("IGNORE", dialect="mysql")) if update_values: # do a batch update of grids session.bulk_update_mappings(self.shard, update_values)
def _update_shards(self, session, grids): today = util.utcnow().date() load_fields = ('grid', 'modified') rows = (session.query(self.shard) .filter(self.shard.grid.in_(grids)) .options(load_only(*load_fields))).all() outdated = set() skip = set() for row in rows: grid = encode_datamap_grid(*row.grid) if row.modified == today: skip.add(grid) else: outdated.add(grid) new_values = [] update_values = [] for grid in grids: if grid in skip: continue elif grid in outdated: update_values.append( {'grid': grid, 'modified': today}) else: new_values.append( {'grid': grid, 'created': today, 'modified': today}) if new_values: # do a batch insert of new grids stmt = self.shard.__table__.insert( mysql_on_duplicate='modified = modified' # no-op ) session.execute(stmt.values(new_values)) if update_values: # do a batch update of grids session.bulk_update_mappings(self.shard, update_values)
def __init__(self, lat, lon): self.grid = encode_datamap_grid(*DataMap.scale(lat, lon)) self.num = 0