def _to_json_value(self): dct = {} for name in self._column_names(): value = getattr(self, name, None) if value is not None: dct[name] = value dct = encode_radio_dict(dct) return dct
def process_reports(self, reports, userid=None): positions = set() cell_observations = [] wifi_observations = [] for i, report in enumerate(reports): report['report_id'] = uuid.uuid1() cell, wifi = self.process_report(report) cell_observations.extend(cell) wifi_observations.extend(wifi) if (cell or wifi) and report.get('lat') and report.get('lon'): positions.add((report['lat'], report['lon'])) if cell_observations: # group by and create task per cell key self.stats_client.incr('items.uploaded.cell_observations', len(cell_observations)) if self.api_key and self.api_key.log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'cell_observations' % self.api_key.name, len(cell_observations)) cells = defaultdict(list) for obs in cell_observations: cells[CellObservation.to_hashkey(obs)].append(obs) # Create a task per group of 100 cell keys at a time. # Grouping them helps in avoiding per-task overhead. cells = list(cells.values()) batch_size = 100 countdown = 0 for i in range(0, len(cells), batch_size): values = [] for observations in cells[i:i + batch_size]: values.extend([encode_radio_dict(o) for o in observations]) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_cell_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 if wifi_observations: # group by WiFi key self.stats_client.incr('items.uploaded.wifi_observations', len(wifi_observations)) if self.api_key and self.api_key.log: self.stats_client.incr( 'items.api_log.%s.uploaded.' 'wifi_observations' % self.api_key.name, len(wifi_observations)) wifis = defaultdict(list) for obs in wifi_observations: wifis[WifiObservation.to_hashkey(obs)].append(obs) # Create a task per group of 100 WiFi keys at a time. # We tend to get a huge number of unique WiFi networks per # batch upload, with one to very few observations per WiFi. # Grouping them helps in avoiding per-task overhead. wifis = list(wifis.values()) batch_size = 100 countdown = 0 for i in range(0, len(wifis), batch_size): values = [] for observations in wifis[i:i + batch_size]: values.extend(observations) # insert observations, expire the task if it wasn't processed # after six hours to avoid queue overload, also delay # each task by one second more, to get a more even workload # and avoid parallel updates of the same underlying stations self.insert_wifi_task.apply_async( args=[values], kwargs={'userid': userid}, expires=21600, countdown=countdown) countdown += 1 self.process_mapstat(positions) self.process_score(userid, positions)
def _to_json_value(self): dct = super(CellReport, self)._to_json_value() dct = encode_radio_dict(dct) return dct