Esempio n. 1
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 2
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if existing['ta'] > cell['ta'] or \
                       (existing['signal'] != 0 and
                        existing['signal'] < cell['signal']) or \
                       existing['asu'] < cell['asu']:
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if existing['signal'] != 0 and \
                       existing['signal'] < wifi['signal']:
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 3
0
    def process_reports(self, reports, userid=None):
        positions = []
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if cell or wifi:
                positions.append({
                    'lat': report['lat'],
                    'lon': report['lon'],
                })

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key_name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 5 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 5
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key_name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 20 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 20
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if userid is not None:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey.location,
                time=util.utcnow().date())
            Score.incr(self.session, scorekey, len(positions))
        if positions:
            self.process_mapstat(positions)
Esempio n. 4
0
    def process_reports(self, reports, userid=None):
        positions = set()
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if (cell or wifi) and report.get('lat') and report.get('lon'):
                positions.add((report['lat'], report['lon']))

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key.name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key and self.api_key.log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key.name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 100 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 100
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        self.process_mapstat(positions)
        self.process_score(userid, positions)
Esempio n. 5
0
    def process_report(self, data):
        def add_missing_dict_entries(dst, src):
            # x.update(y) overwrites entries in x with those in y;
            # We want to only add those not already present.
            # We also only want to copy the top-level base report data
            # and not any nested values like cell or wifi.
            for (key, value) in src.items():
                if key != 'radio' and key not in dst \
                   and not isinstance(value, (tuple, list, dict)):
                    dst[key] = value

        def better_cell_obs(new, old):
            comparators = [
                ('ta', operator.lt),
                ('signal', operator.gt),
                ('asu', operator.gt),
            ]
            for field, better in comparators:
                if (None not in (old[field], new[field]) and
                        better(new[field], old[field])):
                    return True
            return False

        def better_wifi_obs(new, old):
            if (None not in (old['signal'], new['signal']) and
                    new['signal'] > old['signal']):
                return True
            return False

        report_data = Report.validate(data)
        if report_data is None:
            return ([], [])

        cell_observations = {}
        wifi_observations = {}

        if data.get('cell'):
            # flatten report / cell data into a single dict
            for cell in data['cell']:
                # only validate the additional fields
                cell = CellReport.validate(cell)
                if cell is None:
                    continue
                add_missing_dict_entries(cell, report_data)
                cell_key = CellObservation.to_hashkey(cell)
                if cell_key in cell_observations:
                    existing = cell_observations[cell_key]
                    if better_cell_obs(cell, existing):
                        cell_observations[cell_key] = cell
                else:
                    cell_observations[cell_key] = cell
        cell_observations = cell_observations.values()

        # flatten report / wifi data into a single dict
        if data.get('wifi'):
            for wifi in data['wifi']:
                # only validate the additional fields
                wifi = WifiReport.validate(wifi)
                if wifi is None:
                    continue
                add_missing_dict_entries(wifi, report_data)
                wifi_key = WifiObservation.to_hashkey(wifi)
                if wifi_key in wifi_observations:
                    existing = wifi_observations[wifi_key]
                    if better_wifi_obs(wifi, existing):
                        wifi_observations[wifi_key] = wifi
                else:
                    wifi_observations[wifi_key] = wifi
            wifi_observations = wifi_observations.values()
        return (cell_observations, wifi_observations)
Esempio n. 6
0
def process_observations(observations,
                         session,
                         userid=None,
                         api_key_log=False,
                         api_key_name=None):
    stats_client = get_stats_client()
    positions = []
    cell_observations = []
    wifi_observations = []
    for i, obs in enumerate(observations):
        obs['report_id'] = uuid.uuid1()
        cell, wifi = process_observation(obs, session)
        cell_observations.extend(cell)
        wifi_observations.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': obs['lat'],
                'lon': obs['lon'],
            })

    if cell_observations:
        # group by and create task per cell key
        stats_client.incr('items.uploaded.cell_observations',
                          len(cell_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.cell_observations' % api_key_name,
                len(cell_observations))

        cells = defaultdict(list)
        for obs in cell_observations:
            cells[CellObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 5 cell keys at a time.
        # Grouping them helps in avoiding per-task overhead.
        cells = list(cells.values())
        batch_size = 5
        countdown = 0
        for i in range(0, len(cells), batch_size):
            values = []
            for observations in cells[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_cell.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if wifi_observations:
        # group by WiFi key
        stats_client.incr('items.uploaded.wifi_observations',
                          len(wifi_observations))
        if api_key_log:
            stats_client.incr(
                'items.api_log.%s.uploaded.wifi_observations' % api_key_name,
                len(wifi_observations))

        wifis = defaultdict(list)
        for obs in wifi_observations:
            wifis[WifiObservation.to_hashkey(obs)].append(obs)

        # Create a task per group of 20 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few observations per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 20
        countdown = 0
        for i in range(0, len(wifis), batch_size):
            values = []
            for observations in wifis[i:i + batch_size]:
                values.extend(observations)
            # insert observations, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_wifi.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, positions)