Example #1
0
def load_file(session, source_file, batch_size=100, userid=None):
    utcnow = util.utcnow()
    utcmin = utcnow - datetime.timedelta(120)

    with open(source_file, 'r') as fd:
        reader = csv.reader(fd, delimiter='\t', quotechar=None)

        counter = 0
        items = []
        for fields in reader:
            try:
                time = int(fields[0])
                if time == 0:  # pragma: no cover
                    # unknown time gets an old date
                    time = utcmin
                else:
                    # convert from unixtime to utc
                    time = datetime.datetime.utcfromtimestamp(time)

                key = normalized_wifi_key(str(fields[1]))
                if not valid_wifi_pattern(key):  # pragma: no cover
                    continue

                lat = float(fields[2])
                lon = float(fields[3])
                accuracy = int(fields[4])
                altitude = int(fields[5])
                altitude_accuracy = int(fields[6])
                channel = int(fields[7])
                signal = int(fields[8])

                wifi = dict(
                    key=key,
                    channel=channel,
                    signal=signal,
                )
                data = dict(
                    lat=lat,
                    lon=lon,
                    time=time,
                    accuracy=accuracy,
                    altitude=altitude,
                    altitude_accuracy=altitude_accuracy,
                    radio='',
                    cell=(),
                    wifi=[wifi],

                    # not sure if the importer has an actual file
                    # specification anywhere
                    heading=-255,
                    speed=-255,
                )
            except (ValueError, IndexError):
                continue

            items.append(data)
            counter += 1

            # flush every batch_size records
            if counter % batch_size == 0:
                process_measures(items, session, userid=userid)
                items = []
                session.flush()
                print('Added %s records.' % counter)

    # process the remaining items
    process_measures(items, session, userid=userid)
    print('Added %s records.' % counter)

    session.flush()
    return counter
Example #2
0
def search_wifi(session, data):

    # Estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).
    def signal_strength(w):
        if 'signal' in w:
            return int(w['signal'])
        else:
            return -100

    wifi_signals = dict([(normalized_wifi_key(w['key']),
                          signal_strength(w))
                         for w in data['wifi']])
    wifi_keys = set(wifi_signals.keys())

    if not any(wifi_keys):
        # No valid normalized keys.
        return None
    if len(wifi_keys) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough keys.
        return None
    query = session.query(Wifi.key, Wifi.lat, Wifi.lon, Wifi.range).filter(
        Wifi.key.in_(wifi_keys)).filter(
        Wifi.lat.isnot(None)).filter(
        Wifi.lon.isnot(None))
    wifis = query.all()
    if len(wifis) < MIN_WIFIS_IN_QUERY:
        # We didn't get enough matches.
        return None

    wifis = [Network(normalized_wifi_key(w[0]), w[1], w[2], w[3])
             for w in wifis]

    # Sort networks by signal strengths in query.
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key],
                                wifi_signals[a.key]))

    clusters = []

    # The first loop forms a set of clusters by distance,
    # preferring the cluster with the stronger signal strength
    # if there's a tie.
    for w in wifis:

        # Try to assign w to a cluster (but at most one).
        for c in clusters:
            for n in c:
                if distance(quantize(n.lat),
                            quantize(n.lon),
                            quantize(w.lat),
                            quantize(w.lon)) <= MAX_WIFI_CLUSTER_KM:
                    c.append(w)
                    w = None
                    break

            if w is None:
                break

        # If w didn't adhere to any cluster, make a new one.
        if w is not None:
            clusters.append([w])

    # The second loop selects a cluster and estimates the position of that
    # cluster. The selected cluster is the one with the most points, larger
    # than MIN_WIFIS_IN_CLUSTER; its position is estimated taking up-to
    # MAX_WIFIS_IN_CLUSTER worth of points from the cluster, which is
    # pre-sorted in signal-strength order due to the way we built the
    # clusters.
    #
    # The reasoning here is that if we have >1 cluster at all, we probably
    # have some bad data -- likely an AP or set of APs associated with a
    # single antenna that moved -- since a user shouldn't be able to hear
    # multiple groups 500m apart.
    #
    # So we're trying to select a cluster that's most-likely good data,
    # which we assume to be the one with the most points in it.
    #
    # The reason we take a subset of those points when estimating location
    # is that we're doing a (non-weighted) centroid calculation, which is
    # itself unbalanced by distant elements. Even if we did a weighted
    # centroid here, using radio intensity as a proxy for distance has an
    # error that increases significantly with distance, so we'd have to
    # underweight pretty heavily.

    clusters = [c for c in clusters if len(c) > MIN_WIFIS_IN_CLUSTER]

    if len(clusters) == 0:
        return None

    clusters.sort(lambda a, b: cmp(len(b), len(a)))
    cluster = clusters[0]
    sample = cluster[:min(len(cluster), MAX_WIFIS_IN_CLUSTER)]
    length = len(sample)
    avg_lat = sum([n.lat for n in sample]) / length
    avg_lon = sum([n.lon for n in sample]) / length
    return {
        'lat': quantize(avg_lat),
        'lon': quantize(avg_lon),
        'accuracy': estimate_accuracy(avg_lat, avg_lon,
                                      sample, WIFI_MIN_ACCURACY),
    }
Example #3
0
def search_wifi(session, data):

    # estimate signal strength at -100 dBm if none is provided,
    # which is worse than the 99th percentile of wifi dBms we
    # see in practice (-98).
    def signal_strength(w):
        if 'signal' in w:
            return int(w['signal'])
        else:
            return -100

    wifi_signals = dict([(normalized_wifi_key(w['key']),
                          signal_strength(w))
                         for w in data['wifi']])
    wifi_keys = set(wifi_signals.keys())

    if not any(wifi_keys):
        # no valid normalized keys
        return None
    if len(wifi_keys) < 3:
        # we didn't even get three keys, bail out
        return None
    query = session.query(Wifi.key, Wifi.lat, Wifi.lon).filter(
        Wifi.key.in_(wifi_keys)).filter(
        Wifi.lat.isnot(None)).filter(
        Wifi.lon.isnot(None))
    wifis = query.all()
    if len(wifis) < 3:
        # we got fewer than three actual matches
        return None

    wifis = [Network(normalized_wifi_key(w[0]), w[1], w[2]) for w in wifis]

    # sort networks by signal strengths in query
    wifis.sort(lambda a, b: cmp(wifi_signals[b.key],
                                wifi_signals[a.key]))

    clusters = []

    for w in wifis:
        # try to assign w to a cluster (but at most one)
        for c in clusters:
            for n in c:
                if distance(quantize(n.lat), quantize(n.lon),
                            quantize(w.lat), quantize(w.lon)) <= MAX_DIST:
                    c.append(w)
                    w = None
                    break

            if len(c) >= 3:
                # if we have a cluster with more than 3
                # networks in it, return its centroid.
                length = len(c)
                avg_lat = sum([n.lat for n in c]) / length
                avg_lon = sum([n.lon for n in c]) / length
                return {
                    'lat': quantize(avg_lat),
                    'lon': quantize(avg_lon),
                    'accuracy': 500,
                }

            if w is None:
                break

        # if w didn't adhere to any cluster, make a new one
        if w is not None:
            clusters.append([w])

    # if we didn't get any clusters with >3 networks,
    # the query is a bunch of outliers; give up and
    # let the next location method try.
    return None