Example #1
0
def process_measure(report_id, data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # we want to only add those not already present
        for (k, v) in src.items():
            if k not in dst:
                dst[k] = v

    cell_measures = {}
    wifi_measures = {}
    measure_data = dict(
        report_id=report_id,
        lat=data['lat'],
        lon=data['lon'],
        heading=data.get('heading', -1.0),
        speed=data.get('speed', -1.0),
        time=encode_datetime(data['time']),
        accuracy=data.get('accuracy', 0),
        altitude=data.get('altitude', 0),
        altitude_accuracy=data.get('altitude_accuracy', 0),
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Example #2
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Example #3
0
def process_measures(items, session, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(60)

    # get enough auto-increment ids assigned
    measures = []
    for i in range(len(items)):
        measure = Measure()
        measures.append(measure)
        session.add(measure)
    # TODO switch unique measure id to a uuid, so we don't have to do
    # get these from a savepoint here
    session.flush()

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        cell, wifi = process_measure(measures[i].id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        positions.append({
            'lat': to_precise_int(item['lat']),
            'lon': to_precise_int(item['lon']),
        })

    heka_client = get_heka_client()

    if cell_measures:
        # group by and create task per cell key
        heka_client.incr("items.uploaded.cell_measures",
                         len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            insert_cell_measures.delay(values, userid=userid)

    if wifi_measures:
        # group by and create task per wifi key
        heka_client.incr("items.uploaded.wifi_measures",
                         len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        for values in wifis.values():
            insert_wifi_measures.delay(values, userid=userid)

    if userid is not None:
        process_score(userid, len(items), session)
    if positions:
        process_mapstat(positions, session, userid=userid)
Example #4
0
def process_measure(data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # We want to only add those not already present.
        # We also only want to copy the top-level base measure data
        # and not any nested values like cell or wifi.
        for (k, v) in src.items():
            if k != 'radio' and k not in dst \
               and not isinstance(v, (tuple, list, dict)):
                dst[k] = v

    measure_data = normalized_measure_dict(data)
    if measure_data is None:
        return ([], [])

    cell_measures = {}
    wifi_measures = {}
    measure_radio = RADIO_TYPE.get(data['radio'], -1)

    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:  # pragma: no cover
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:  # pragma: no cover
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:  # pragma: no cover
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Example #5
0
def process_measure(data, session):
    def add_missing_dict_entries(dst, src):
        # x.update(y) overwrites entries in x with those in y;
        # We want to only add those not already present.
        # We also only want to copy the top-level base measure data
        # and not any nested values like cell or wifi.
        for (k, v) in src.items():
            if k != 'radio' and k not in dst \
               and not isinstance(v, (tuple, list, dict)):
                dst[k] = v

    measure_data = normalized_measure_dict(data)
    if measure_data is None:
        return ([], [])

    cell_measures = {}
    wifi_measures = {}
    measure_radio = RADIO_TYPE.get(data['radio'], -1)

    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            add_missing_dict_entries(c, measure_data)
            c = normalized_cell_measure_dict(c, measure_radio)
            if c is None:  # pragma: no cover
                continue
            key = to_cellkey_psc(c)
            if key in cell_measures:  # pragma: no cover
                existing = cell_measures[key]
                if existing['ta'] > c['ta'] or \
                   (existing['signal'] != 0 and
                    existing['signal'] < c['signal']) or \
                   existing['asu'] < c['asu']:
                    cell_measures[key] = c
            else:
                cell_measures[key] = c
    cell_measures = cell_measures.values()

    # flatten measure / wifi data into a single dict
    if data.get('wifi'):
        for w in data['wifi']:
            add_missing_dict_entries(w, measure_data)
            w = normalized_wifi_measure_dict(w)
            if w is None:
                continue
            key = w['key']
            if key in wifi_measures:  # pragma: no cover
                existing = wifi_measures[key]
                if existing['signal'] != 0 and \
                   existing['signal'] < w['signal']:
                    wifi_measures[key] = w
            else:
                wifi_measures[key] = w
        wifi_measures = wifi_measures.values()
    return (cell_measures, wifi_measures)
Example #6
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                Cell.radio == cell_key.radio,
                Cell.mcc == cell_key.mcc,
                Cell.mnc == cell_key.mnc,
                Cell.lac == cell_key.lac,
                Cell.cid == cell_key.cid,
                Cell.psc == cell_key.psc)
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            Cell.radio == cell_measure.radio,
            Cell.mcc == cell_measure.mcc,
            Cell.mnc == cell_measure.mnc,
            Cell.lac == cell_measure.lac,
            Cell.cid == cell_measure.cid,
            Cell.psc == cell_measure.psc)
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Example #7
0
def process_cell_measures(session, entries, userid=None,
                          max_measures_per_cell=11000):
    cell_count = defaultdict(int)
    cell_measures = []
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)

    dropped_malformed = 0
    dropped_overflow = 0
    space_available = {}

    # process entries
    for entry in entries:

        cell_measure = create_cell_measure(utcnow, entry)
        if not cell_measure:
            dropped_malformed += 1
            continue

        cell_key = to_cellkey_psc(cell_measure)

        # check if there's space for new measurement within per-cell maximum
        # note: old measures gradually expire, so this is an intake-rate limit
        if cell_key not in space_available:
            query = session.query(Cell.total_measures).filter(
                *join_cellkey(Cell, cell_key))
            curr = query.first()
            if curr is not None:
                space_available[cell_key] = max_measures_per_cell - curr[0]
            else:
                space_available[cell_key] = max_measures_per_cell

        if space_available[cell_key] > 0:
            space_available[cell_key] -= 1
        else:
            dropped_overflow += 1
            continue

        # Possibly drop measure if we're receiving them too
        # quickly for this cell.
        query = session.query(Cell.total_measures).filter(
            *join_cellkey(Cell, cell_key))
        total_measures = query.first()
        if total_measures is not None:
            if total_measures[0] > max_measures_per_cell:
                dropped_overflow += 1
                continue

        cell_measures.append(cell_measure)
        # group per unique cell
        cell_count[cell_key] += 1

    heka_client = get_heka_client()

    if dropped_malformed != 0:
        heka_client.incr("items.dropped.cell_ingress_malformed",
                         count=dropped_malformed)

    if dropped_overflow != 0:
        heka_client.incr("items.dropped.cell_ingress_overflow",
                         count=dropped_overflow)

    # update new/total measure counts
    new_cells = 0
    for cell_key, count in cell_count.items():
        new_cells += update_cell_measure_count(
            cell_key, count, utcnow, session)

    # update user score
    if userid is not None and new_cells > 0:
        process_score(userid, new_cells, session, key='new_cell')

    heka_client.incr("items.inserted.cell_measures",
                     count=len(cell_measures))
    session.add_all(cell_measures)
    return cell_measures
Example #8
0
def process_measures(items, session, userid=None):
    stats_client = get_stats_client()
    utcnow = util.utcnow()
    utcmin = utcnow - datetime.timedelta(60)

    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item = process_time(item, utcnow, utcmin)
        report_id = uuid.uuid1().hex
        cell, wifi = process_measure(report_id, item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': item['lat'],
                'lon': item['lon'],
            })

    if cell_measures:
        # group by and create task per cell key
        stats_client.incr("items.uploaded.cell_measures",
                          len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        for values in cells.values():
            # insert measures, expire the task if it wasn't processed
            # after two hours to avoid queue overload
            insert_cell_measures.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=7200)

    if wifi_measures:
        # group by WiFi key
        stats_client.incr("items.uploaded.wifi_measures",
                          len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        # Create a task per group of 5 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few measures per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 5
        for i in range(0, len(wifis), batch_size):
            values = []
            for measures in wifis[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after two hours to avoid queue overload
            insert_wifi_measures.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=7200)

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, utcnow, positions)
Example #9
0
def process_measures(items, session, userid=None):
    stats_client = get_stats_client()
    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item['report_id'] = uuid.uuid1().hex
        cell, wifi = process_measure(item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': item['lat'],
                'lon': item['lon'],
            })

    if cell_measures:
        # group by and create task per cell key
        stats_client.incr("items.uploaded.cell_measures",
                          len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        # Create a task per group of 5 cell keys at a time.
        # Grouping them helps in avoiding per-task overhead.
        cells = list(cells.values())
        batch_size = 5
        countdown = 0
        for i in range(0, len(cells), batch_size):
            values = []
            for measures in cells[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_cell.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=21600,
                countdown=countdown)
            countdown += 1

    if wifi_measures:
        # group by WiFi key
        stats_client.incr("items.uploaded.wifi_measures",
                          len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        # Create a task per group of 20 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few measures per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 20
        countdown = 0
        for i in range(0, len(wifis), batch_size):
            values = []
            for measures in wifis[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_wifi.apply_async(
                args=[values],
                kwargs={'userid': userid},
                expires=21600,
                countdown=countdown)
            countdown += 1

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, positions)
Example #10
0
def process_measures(items, session, userid=None):
    stats_client = get_stats_client()
    positions = []
    cell_measures = []
    wifi_measures = []
    for i, item in enumerate(items):
        item['report_id'] = uuid.uuid1().hex
        cell, wifi = process_measure(item, session)
        cell_measures.extend(cell)
        wifi_measures.extend(wifi)
        if cell or wifi:
            positions.append({
                'lat': item['lat'],
                'lon': item['lon'],
            })

    if cell_measures:
        # group by and create task per cell key
        stats_client.incr("items.uploaded.cell_measures", len(cell_measures))
        cells = defaultdict(list)
        for measure in cell_measures:
            cells[to_cellkey_psc(measure)].append(measure)

        # Create a task per group of 5 cell keys at a time.
        # Grouping them helps in avoiding per-task overhead.
        cells = list(cells.values())
        batch_size = 5
        countdown = 0
        for i in range(0, len(cells), batch_size):
            values = []
            for measures in cells[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_cell.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if wifi_measures:
        # group by WiFi key
        stats_client.incr("items.uploaded.wifi_measures", len(wifi_measures))
        wifis = defaultdict(list)
        for measure in wifi_measures:
            wifis[measure['key']].append(measure)

        # Create a task per group of 20 WiFi keys at a time.
        # We tend to get a huge number of unique WiFi networks per
        # batch upload, with one to very few measures per WiFi.
        # Grouping them helps in avoiding per-task overhead.
        wifis = list(wifis.values())
        batch_size = 20
        countdown = 0
        for i in range(0, len(wifis), batch_size):
            values = []
            for measures in wifis[i:i + batch_size]:
                values.extend(measures)
            # insert measures, expire the task if it wasn't processed
            # after six hours to avoid queue overload, also delay
            # each task by one second more, to get a more even workload
            # and avoid parallel updates of the same underlying stations
            insert_measures_wifi.apply_async(args=[values],
                                             kwargs={'userid': userid},
                                             expires=21600,
                                             countdown=countdown)
            countdown += 1

    if userid is not None:
        process_score(userid, len(positions), session)
    if positions:
        process_mapstat(session, positions)