Exemple #1
0
    def process_score(self, userid, positions, new_station_count):
        if userid is None or len(positions) <= 0:
            return

        queue = self.task.app.data_queues['update_score']
        scores = []

        key = Score.to_hashkey(
            userid=userid,
            key=ScoreKey.location,
            time=None)
        scores.append({'hashkey': key, 'value': len(positions)})

        for name, score_key in (('cell', ScoreKey.new_cell),
                                ('wifi', ScoreKey.new_wifi)):
            count = new_station_count[name]
            if count <= 0:
                continue
            key = Score.to_hashkey(
                userid=userid,
                key=score_key,
                time=None)
            scores.append({'hashkey': key, 'value': count})

        queue.enqueue(scores)
Exemple #2
0
    def insert(self, entries, userid=None):
        all_observations = []
        drop_counter = defaultdict(int)
        new_stations = 0

        # Process entries and group by validated station key
        station_observations = defaultdict(list)
        for entry in entries:
            self.pre_process_entry(entry)

            obs = self.observation_model.create(**entry)
            if not obs:
                drop_counter['malformed'] += 1
                continue

            station_observations[obs.hashkey()].append(obs)

        # Process observations one station at a time
        for key, observations in station_observations.items():
            first_blacklisted = None
            incomplete = False
            station = self.station_model.querykey(self.session, key).first()

            if station is None:
                # Drop observations for blacklisted stations.
                blacklisted, first_blacklisted = self.blacklisted_station(key)
                if blacklisted:
                    drop_counter['blacklisted'] += len(observations)
                    continue

                incomplete = self.incomplete_observation(key)
                if not incomplete:
                    # We discovered an actual new complete station.
                    new_stations += 1

            # Accept all observations
            all_observations.extend(observations)
            num = len(observations)

            # Accept incomplete observations, just don't make stations for them
            # (station creation is a side effect of count-updating)
            if not incomplete and num > 0:
                self.create_or_update_station(station, key, num,
                                              first_blacklisted)

        # Credit the user with discovering any new stations.
        if userid is not None and new_stations > 0:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey['new_' + self.station_type],
                time=self.utcnow.date())
            Score.incr(self.session, scorekey, new_stations)

        added = len(all_observations)
        self.emit_stats(added, drop_counter)

        self.session.add_all(all_observations)
        return added
Exemple #3
0
    def insert(self, entries, userid=None):
        all_observations = []
        drop_counter = defaultdict(int)
        new_stations = 0

        # Process entries and group by validated station key
        station_observations = defaultdict(list)
        for entry in entries:
            self.pre_process_entry(entry)

            obs = self.observation_model.create(**entry)
            if not obs:
                drop_counter['malformed'] += 1
                continue

            station_observations[obs.hashkey()].append(obs)

        # Process observations one station at a time
        for key, observations in station_observations.items():
            first_blacklisted = None
            incomplete = False
            station = self.station_model.getkey(self.session, key)

            if station is None:
                # Drop observations for blacklisted stations.
                blacklisted, first_blacklisted = self.blacklisted_station(key)
                if blacklisted:
                    drop_counter['blacklisted'] += len(observations)
                    continue

                incomplete = self.incomplete_observation(key)
                if not incomplete:
                    # We discovered an actual new complete station.
                    new_stations += 1

            # Accept all observations
            all_observations.extend(observations)
            num = len(observations)

            # Accept incomplete observations, just don't make stations for them
            # (station creation is a side effect of count-updating)
            if not incomplete and num > 0:
                self.create_or_update_station(station, key, num,
                                              first_blacklisted)

        # Credit the user with discovering any new stations.
        if userid is not None and new_stations > 0:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey['new_' + self.station_type],
                time=self.utcnow.date())
            Score.incr(self.session, scorekey, new_stations)

        added = len(all_observations)
        self.emit_stats(added, drop_counter)

        self.session.add_all(all_observations)
        return added
Exemple #4
0
    def queue_scores(self, userid, new_stations):
        # Credit the user with discovering any new stations.
        if userid is None or new_stations <= 0:
            return

        queue = self.task.app.data_queues["update_score"]
        key = Score.to_hashkey(userid=userid, key=self.station_score, time=None)
        queue.enqueue([{"hashkey": key, "value": int(new_stations)}])
Exemple #5
0
    def process_score(self, userid, positions):
        if userid is None or len(positions) <= 0:
            return

        queue = self.task.app.data_queues['update_score']
        key = Score.to_hashkey(
            userid=userid,
            key=ScoreKey.location,
            time=None)
        queue.enqueue([{'hashkey': key, 'value': len(positions)}])
Exemple #6
0
    def process_reports(self, reports, userid=None):
        positions = []
        cell_observations = []
        wifi_observations = []
        for i, report in enumerate(reports):
            report['report_id'] = uuid.uuid1()
            cell, wifi = self.process_report(report)
            cell_observations.extend(cell)
            wifi_observations.extend(wifi)
            if cell or wifi:
                positions.append({
                    'lat': report['lat'],
                    'lon': report['lon'],
                })

        if cell_observations:
            # group by and create task per cell key
            self.stats_client.incr('items.uploaded.cell_observations',
                                   len(cell_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'cell_observations' % self.api_key_name,
                    len(cell_observations))

            cells = defaultdict(list)
            for obs in cell_observations:
                cells[CellObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 5 cell keys at a time.
            # Grouping them helps in avoiding per-task overhead.
            cells = list(cells.values())
            batch_size = 5
            countdown = 0
            for i in range(0, len(cells), batch_size):
                values = []
                for observations in cells[i:i + batch_size]:
                    values.extend([encode_radio_dict(o) for o in observations])
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_cell_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if wifi_observations:
            # group by WiFi key
            self.stats_client.incr('items.uploaded.wifi_observations',
                                   len(wifi_observations))
            if self.api_key_log:
                self.stats_client.incr(
                    'items.api_log.%s.uploaded.'
                    'wifi_observations' % self.api_key_name,
                    len(wifi_observations))

            wifis = defaultdict(list)
            for obs in wifi_observations:
                wifis[WifiObservation.to_hashkey(obs)].append(obs)

            # Create a task per group of 20 WiFi keys at a time.
            # We tend to get a huge number of unique WiFi networks per
            # batch upload, with one to very few observations per WiFi.
            # Grouping them helps in avoiding per-task overhead.
            wifis = list(wifis.values())
            batch_size = 20
            countdown = 0
            for i in range(0, len(wifis), batch_size):
                values = []
                for observations in wifis[i:i + batch_size]:
                    values.extend(observations)
                # insert observations, expire the task if it wasn't processed
                # after six hours to avoid queue overload, also delay
                # each task by one second more, to get a more even workload
                # and avoid parallel updates of the same underlying stations
                self.insert_wifi_task.apply_async(
                    args=[values],
                    kwargs={'userid': userid},
                    expires=21600,
                    countdown=countdown)
                countdown += 1

        if userid is not None:
            scorekey = Score.to_hashkey(
                userid=userid,
                key=ScoreKey.location,
                time=util.utcnow().date())
            Score.incr(self.session, scorekey, len(positions))
        if positions:
            self.process_mapstat(positions)
Exemple #7
0
    def test_insert_observations_invalid_lac(self):
        session = self.session
        schema = ValidCellKeySchema()
        time = util.utcnow() - timedelta(days=1)
        today = util.utcnow().date()

        session.add(
            Cell(radio=Radio.gsm,
                 mcc=FRANCE_MCC,
                 mnc=2,
                 lac=3,
                 cid=4,
                 new_measures=2,
                 total_measures=5))
        session.add(Score(key=ScoreKey.new_cell, userid=1, time=today,
                          value=7))
        session.flush()

        obs = dict(created=time,
                   lat=PARIS_LAT,
                   lon=PARIS_LON,
                   time=time,
                   accuracy=0,
                   altitude=0,
                   altitude_accuracy=0,
                   radio=int(Radio.gsm))
        entries = [
            {
                "mcc": FRANCE_MCC,
                "mnc": 2,
                "lac": constants.MAX_LAC_ALL + 1,
                "cid": constants.MAX_CID_ALL + 1,
                "psc": 5,
                "asu": 8
            },
            {
                "mcc": FRANCE_MCC,
                "mnc": 2,
                "lac": schema.fields['lac'].missing,
                "cid": schema.fields['cid'].missing,
                "psc": 5,
                "asu": 8
            },
        ]
        for e in entries:
            e.update(obs)

        result = insert_measures_cell.delay(entries, userid=1)
        self.assertEqual(result.get(), 2)

        observations = session.query(CellObservation).all()
        self.assertEqual(len(observations), 2)
        self.assertEqual(set([o.lac for o in observations]),
                         set([schema.fields['lac'].missing]))
        self.assertEqual(set([o.cid for o in observations]),
                         set([schema.fields['cid'].missing]))

        # Nothing should change in the initially created Cell record
        cells = session.query(Cell).all()
        self.assertEqual(len(cells), 1)
        self.assertEqual(set([c.new_measures for c in cells]), set([2]))
        self.assertEqual(set([c.total_measures for c in cells]), set([5]))