Beispiel #1
0
    def __init__(self,
                 regions_file=REGIONS_FILE,
                 buffer_file=REGIONS_BUFFER_FILE):
        self._buffered_shapes = {}
        self._prepared_shapes = {}
        self._shapes = {}
        self._tree_ids = {}
        self._radii = {}

        with util.gzip_open(regions_file, 'r') as fd:
            regions_data = simplejson.load(fd)

        genc_regions = frozenset([rec.alpha2 for rec in genc.REGIONS])
        for feature in regions_data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._shapes[code] = shape
                self._prepared_shapes[code] = prepared.prep(shape)
                self._radii[code] = feature['properties']['radius']

        with util.gzip_open(buffer_file, 'r') as fd:
            buffer_data = simplejson.load(fd)

        i = 0
        envelopes = []
        for feature in buffer_data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._buffered_shapes[code] = prepared.prep(shape)
                # Collect rtree index entries, and maintain a separate id to
                # code mapping. We don't use index object support as it
                # requires un/pickling the object entries on each lookup.
                if isinstance(shape, geometry.base.BaseMultipartGeometry):
                    # Index bounding box of individual polygons instead of
                    # the multipolygon, to avoid issues with regions crossing
                    # the -180.0/+180.0 longitude boundary.
                    for geom in shape.geoms:
                        envelopes.append((i, geom.envelope.bounds, None))
                        self._tree_ids[i] = code
                        i += 1
                else:
                    envelopes.append((i, shape.envelope.bounds, None))
                    self._tree_ids[i] = code
                    i += 1

        props = index.Property()
        props.fill_factor = 0.9
        props.leaf_capacity = 20
        self._tree = index.Index(envelopes,
                                 interleaved=True, properties=props)
        for envelope in envelopes:
            self._tree.insert(*envelope)
        self._valid_regions = frozenset(self._shapes.keys())
Beispiel #2
0
    def __init__(self,
                 regions_file=REGIONS_FILE,
                 buffer_file=REGIONS_BUFFER_FILE):
        self._buffered_shapes = {}
        self._prepared_shapes = {}
        self._shapes = {}
        self._tree_ids = {}
        self._radii = {}

        with util.gzip_open(regions_file, 'r') as fd:
            regions_data = simplejson.load(fd)

        genc_regions = frozenset([rec.alpha2 for rec in genc.REGIONS])
        for feature in regions_data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._shapes[code] = shape
                self._prepared_shapes[code] = prepared.prep(shape)
                self._radii[code] = feature['properties']['radius']

        with util.gzip_open(buffer_file, 'r') as fd:
            buffer_data = simplejson.load(fd)

        i = 0
        envelopes = []
        for feature in buffer_data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._buffered_shapes[code] = prepared.prep(shape)
                # Collect rtree index entries, and maintain a separate id to
                # code mapping. We don't use index object support as it
                # requires un/pickling the object entries on each lookup.
                if isinstance(shape, geometry.base.BaseMultipartGeometry):
                    # Index bounding box of individual polygons instead of
                    # the multipolygon, to avoid issues with regions crossing
                    # the -180.0/+180.0 longitude boundary.
                    for geom in shape.geoms:
                        envelopes.append((i, geom.envelope.bounds, None))
                        self._tree_ids[i] = code
                        i += 1
                else:
                    envelopes.append((i, shape.envelope.bounds, None))
                    self._tree_ids[i] = code
                    i += 1

        props = index.Property()
        props.fill_factor = 0.9
        props.leaf_capacity = 20
        self._tree = index.Index(envelopes, interleaved=True, properties=props)
        for envelope in envelopes:
            self._tree.insert(*envelope)
        self._valid_regions = frozenset(self._shapes.keys())
Beispiel #3
0
    def test_files(self, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row["time"], modified=row["time"]
            )
            session.add(data)
        session.flush()

        lines = []
        rows = 0

        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DataMap.shards().items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath, shard.__tablename__, _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert sorted(os.listdir(tiles)) == ["0", "1", "2"]
            assert sorted(os.listdir(os.path.join(tiles, "0", "0"))) == [
                "0.png",
                "*****@*****.**",
            ]

        assert rows == 18
        assert len(lines) == 18
        lats = [round(float(line[0]), 2) for line in lines]
        longs = [round(float(line[1]), 2) for line in lines]
        assert set(lats) == set([-10.0, 0.0, 12.35])
        assert set(longs) == set([-11.0, 12.35])
Beispiel #4
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()

        on_duplicate = (
            '`modified` = values(`modified`)'
            ', `lat` = values(`lat`)'
            ', `lon` = values(`lon`)'
            ', `psc` = values(`psc`)'
            ', `max_lat` = values(`max_lat`)'
            ', `min_lat` = values(`min_lat`)'
            ', `max_lon` = values(`max_lon`)'
            ', `min_lon` = values(`min_lon`)'
            ', `radius` = values(`radius`)'
            ', `samples` = values(`samples`)'
        )

        table_insert = self.cell_model.__table__.insert(
            mysql_on_duplicate=on_duplicate)

        def commit_batch(rows):
            result = session.execute(table_insert, rows)
            count = result.rowcount
            # apply trick to avoid querying for existing rows,
            # MySQL claims 1 row for an inserted row, 2 for an updated row
            inserted_rows = 2 * len(rows) - count
            changed_rows = count - len(rows)
            assert inserted_rows + changed_rows == len(rows)
            StatCounter(self.stat_key, today).incr(pipe, inserted_rows)

        areaids = set()

        with util.gzip_open(filename, 'r') as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                csv_reader = csv.reader(gzip_file)
                parse_row = partial(self.make_import_dict,
                                    self.cell_model.validate,
                                    self.import_spec)
                rows = []
                for row in csv_reader:
                    # skip any header row
                    if (csv_reader.line_num == 1 and
                            row[0] == 'radio'):  # pragma: no cover
                        continue

                    data = parse_row(row)
                    if data is not None:
                        rows.append(data)
                        areaids.add((int(data['radio']), data['mcc'],
                                    data['mnc'], data['lac']))

                    if len(rows) == self.batch_size:  # pragma: no cover
                        commit_batch(rows)
                        session.flush()
                        rows = []

                if rows:
                    commit_batch(rows)

        self.area_queue.enqueue(
            [encode_cellarea(*id_) for id_ in areaids], json=False)
Beispiel #5
0
    def get_csv(self, lo=1, hi=10, time=1408604686):
        cell = self.cell
        line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},'
                         '{lat:.7f},1,1,1,{time},{time},')
        lines = [line_template.format(
            mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=i * 1010, psc='',
            lon=cell.lon + i * 0.002,
            lat=cell.lat + i * 0.001,
            time=time)
            for i in range(lo, hi)]
        # add bad lines
        lines.append(line_template.format(
            mcc=cell.mcc, mnc=cell.mnc,
            lac='', cid='', psc=12,
            lon=cell.lon, lat=cell.lat, time=time,
        ))
        lines.append(line_template.format(
            mcc=cell.mcc, mnc=cell.mnc,
            lac='', cid='', psc='',
            lon=cell.lon, lat=cell.lat, time=time,
        ))
        txt = '\n'.join(lines)

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'import.csv.gz')
            with util.gzip_open(path, 'w') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    gzip_file.write(txt)
            yield path
Beispiel #6
0
def main(argv, _db=None):
    parser = argparse.ArgumentParser(
        prog=argv[0],
        description=(
            "Import from public cell data into a local dev environment. "
            "See https://location.services.mozilla.com/downloads"),
    )
    parser.add_argument("filename", help="Path to the csv.gz import file.")

    args = parser.parse_args(argv[1:])

    if not settings("local_dev_env"):
        print("This script can only be run in a local dev environment.")
        print("Set LOCAL_DEV_ENV=True in your environment.")
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if not os.path.isfile(filename):
        print("File %s not found." % filename)
        return 1

    configure_logging()
    celery_app = get_eager_celery_app()
    init_worker(celery_app)
    cellarea_queue = celery_app.data_queues["update_cellarea"]

    with db_worker_session(celery_app.db, commit=False) as session:
        with gzip_open(filename, "r") as file_handle:
            read_stations_from_csv(session, file_handle,
                                   celery_app.redis_client, cellarea_queue)
    return 0
Beispiel #7
0
    def test_files(self, db, session):  # pragma: no cover
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon),
                                                 created=row['time'],
                                                 modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath,
                                     shard.__tablename__,
                                     _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(
                tiles, '0', '0'))) == ['0.png', '*****@*****.**'])

        assert rows == 18
        assert len(lines) == 18
        assert (set([round(float(l[0]), 2)
                     for l in lines]) == set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2)
                     for l in lines]) == set([-11.0, 12.35]))
Beispiel #8
0
    def test_files(self, db_rw, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row['time'], modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        db_url = str(db_rw.engine.url)
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(
                    db_url, filepath, shard.__tablename__,
                    _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(tiles, '0', '0'))) ==
                    ['0.png', '*****@*****.**'])

        assert rows == 36
        assert len(lines) == 36
        assert (set([round(float(l[0]), 2) for l in lines]) ==
                set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2) for l in lines]) ==
                set([-11.0, 12.35]))
Beispiel #9
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = ("radio", "cid", "lat", "lon", "mnc", "mcc", "lac")
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {
            "radio": Radio.wcdma,
            "mcc": base_cell.mcc,
            "mnc": base_cell.mnc,
            "lac": base_cell.lac,
        }
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell["lat"] = "%.7f" % cell["lat"]
            cell["lon"] = "%.7f" % cell["lon"]

            cell["radio"] = "UMTS"
            cell_strings = [(field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(
            cid=220,
            created=long_ago,
            modified=long_ago,
            last_seen=long_ago.date(),
            **cell_key,
        )
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, "export.csv.gz")
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, "r") as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = next(reader)
                    assert "area" in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value)
                            for (field, value) in exported_cell.items()
                            if field in cell_fixture_fields
                        ]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Beispiel #10
0
def dump_file(datatype, session, filename, lat=None, lon=None, radius=None):
    model = {
        'blue': BlueShard,
        'cell': CellShard,
        'wifi': WifiShard,
    }
    where = where_area(lat, lon, radius)
    with util.gzip_open(filename, 'w') as fd:
        dump_model(model[datatype], session, fd, where=where)
    return 0
Beispiel #11
0
    def __init__(self, json_file=JSON_FILE):
        self._buffered_shapes = {}
        self._prepared_shapes = {}
        self._shapes = {}
        self._tree_ids = {}
        self._radii = {}

        with util.gzip_open(json_file, 'r') as fd:
            data = simplejson.load(fd)

        genc_regions = frozenset([rec.alpha2 for rec in genc.REGIONS])
        for feature in data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._shapes[code] = shape
                self._prepared_shapes[code] = prepared.prep(shape)
                self._radii[code] = feature['properties']['radius']

        i = 0
        envelopes = []
        for code, shape in self._shapes.items():
            # Build up region buffers, to create shapes that include all of
            # the coastal areas and boundaries of the regions and anywhere
            # a cell signal could still be recorded. The value is in decimal
            # degrees (1.0 == ~100km) but calculations don't take projection
            # / WSG84 into account.
            # After buffering remove any parts that crosses the -180.0/+180.0
            # longitude boundary to the east or west.
            buffered = (shape.buffer(0.5)
                             .difference(DATELINE_EAST)
                             .difference(DATELINE_WEST))
            self._buffered_shapes[code] = prepared.prep(buffered)

            # Collect rtree index entries, and maintain a separate id to
            # code mapping. We don't use index object support as it
            # requires un/pickling the object entries on each lookup.
            if isinstance(buffered, geometry.base.BaseMultipartGeometry):
                # Index bounding box of individual polygons instead of
                # the multipolygon, to avoid issues with regions crossing
                # the -180.0/+180.0 longitude boundary.
                for geom in buffered.geoms:
                    envelopes.append((i, geom.envelope.bounds, None))
                    self._tree_ids[i] = code
                    i += 1
            else:
                envelopes.append((i, buffered.envelope.bounds, None))
                self._tree_ids[i] = code
                i += 1

        props = index.Property()
        props.fill_factor = 0.9
        props.leaf_capacity = 20
        self._tree = index.Index(envelopes, interleaved=True, properties=props)
        self._valid_regions = frozenset(self._shapes.keys())
Beispiel #12
0
def dump_file(datatype, session, filename,
              lat=None, lon=None, radius=None):
    model = {
        'blue': BlueShard,
        'cell': CellShard,
        'wifi': WifiShard,
    }
    where = where_area(lat, lon, radius)
    with util.gzip_open(filename, 'w') as fd:
        dump_model(model[datatype], session, fd, where=where)
    return 0
Beispiel #13
0
    def __init__(self, json_file=JSON_FILE):
        self._buffered_shapes = {}
        self._prepared_shapes = {}
        self._shapes = {}
        self._tree_ids = {}
        self._radii = {}

        with util.gzip_open(json_file, 'r') as fd:
            data = simplejson.load(fd)

        genc_regions = frozenset([rec.alpha2 for rec in genc.REGIONS])
        for feature in data['features']:
            code = feature['properties']['alpha2']
            if code in genc_regions:
                shape = geometry.shape(feature['geometry'])
                self._shapes[code] = shape
                self._prepared_shapes[code] = prepared.prep(shape)
                self._radii[code] = feature['properties']['radius']

        i = 0
        envelopes = []
        for code, shape in self._shapes.items():
            # Build up region buffers, to create shapes that include all of
            # the coastal areas and boundaries of the regions and anywhere
            # a cell signal could still be recorded. The value is in decimal
            # degrees (1.0 == ~100km) but calculations don't take projection
            # / WSG84 into account.
            # After buffering remove any parts that crosses the -180.0/+180.0
            # longitude boundary to the east or west.
            buffered = (shape.buffer(0.5).difference(DATELINE_EAST).difference(
                DATELINE_WEST))
            self._buffered_shapes[code] = prepared.prep(buffered)

            # Collect rtree index entries, and maintain a separate id to
            # code mapping. We don't use index object support as it
            # requires un/pickling the object entries on each lookup.
            if isinstance(buffered, geometry.base.BaseMultipartGeometry):
                # Index bounding box of individual polygons instead of
                # the multipolygon, to avoid issues with regions crossing
                # the -180.0/+180.0 longitude boundary.
                for geom in buffered.geoms:
                    envelopes.append((i, geom.envelope.bounds, None))
                    self._tree_ids[i] = code
                    i += 1
            else:
                envelopes.append((i, buffered.envelope.bounds, None))
                self._tree_ids[i] = code
                i += 1

        props = index.Property()
        props.fill_factor = 0.9
        props.leaf_capacity = 20
        self._tree = index.Index(envelopes, interleaved=True, properties=props)
        self._valid_regions = frozenset(self._shapes.keys())
Beispiel #14
0
    def test_local_export(self):
        cell_fixture_fields = ('radio', 'cid', 'lat', 'lon', 'mnc', 'mcc',
                               'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {
            'radio': Radio.wcdma,
            'mcc': base_cell.mcc,
            'mnc': base_cell.mnc,
            'lac': base_cell.lac
        }
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid,
                        lat=base_cell.lat,
                        lon=base_cell.lon,
                        **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [(field, str(value))
                            for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        self.session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(self.session, path)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    self.assertTrue('area' in header.values())

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value)
                            for (field, value) in exported_cell.items()
                            if field in cell_fixture_fields
                        ]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    self.assertEqual(cells, exported_cells)
Beispiel #15
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(cid=220, created=long_ago, modified=long_ago,
                         last_seen=long_ago.date(), **cell_key)
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = next(reader)
                    assert 'area' in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Beispiel #16
0
def import_stations(session, pipe, filename, fields):
    today = util.utcnow().date()

    def commit_batch(ins, rows, commit=True):
        result = session.execute(ins, rows)
        count = result.rowcount
        # apply trick to avoid querying for existing rows,
        # MySQL claims 1 row for an inserted row, 2 for an updated row
        inserted_rows = 2 * len(rows) - count
        changed_rows = count - len(rows)
        assert inserted_rows + changed_rows == len(rows)
        StatCounter(StatKey.unique_ocid_cell, today).incr(pipe, inserted_rows)
        if commit:
            session.commit()
        else:  # pragma: no cover
            session.flush()

    with util.gzip_open(filename, 'r') as gzip_file:
        csv_reader = csv.DictReader(gzip_file, fields)
        batch = 10000
        rows = []
        area_keys = set()
        ins = OCIDCell.__table__.insert(
            on_duplicate=((
                'changeable = values(changeable), '
                'modified = values(modified), '
                'total_measures = values(total_measures), '
                'lat = values(lat), '
                'lon = values(lon), '
                'psc = values(psc), '
                '`range` = values(`range`)')))

        for row in csv_reader:
            # skip any header row
            if csv_reader.line_num == 1 and \
               'radio' in row.values():  # pragma: no cover
                continue

            data = make_ocid_cell_import_dict(row)
            if data is not None:
                rows.append(data)
                area_keys.add(CellArea.to_hashkey(data))

            if len(rows) == batch:  # pragma: no cover
                commit_batch(ins, rows, commit=False)
                rows = []

        if rows:
            commit_batch(ins, rows)

        for area_key in area_keys:
            update_area.delay(area_key, cell_type='ocid')
Beispiel #17
0
    def test_files(self):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row["time"], modified=row["time"])
            self.session.add(data)
        self.session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DATAMAP_SHARDS.items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(None, filepath, shard.__tablename__, _db_rw=_make_db(), _session=self.session)

                if not result:
                    self.assertFalse(os.path.isfile(filepath))
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                self.assertTrue(os.path.isdir(quadfolder))
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            self.assertEqual(sorted(os.listdir(tiles)), ["0", "1", "2"])
            self.assertEqual(sorted(os.listdir(os.path.join(tiles, "0", "0"))), ["0.png", "*****@*****.**"])

        self.assertEqual(rows, 36)
        self.assertEqual(len(lines), 36)
        self.assertEqual(set([round(float(l[0]), 2) for l in lines]), set([-10.0, 0.0, 12.35]))
        self.assertEqual(set([round(float(l[1]), 2) for l in lines]), set([-11.0, 12.35]))
Beispiel #18
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(cid=220, created=long_ago, modified=long_ago,
                         last_seen=long_ago.date(), **cell_key)
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    assert 'area' in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Beispiel #19
0
def main(argv):  # pragma: no cover
    os.system('ogr2ogr -f GeoJSON '
              '-select "%s" -segmentize 0.1 data/temp.geojson '
              'data/ne_50m_admin_0_map_subunits.dbf' % ', '.join(PROPERTIES))
    with open('data/temp.geojson', 'r') as fd:
        jsondata = fd.read()
    os.remove('data/temp.geojson')
    data = json.loads(jsondata)
    simplified = simplify(data['features'])
    output = to_geojson(simplified)
    with util.gzip_open('ichnaea/regions.geojson.gz', 'w',
                        compresslevel=7) as fd:
        fd.write(output)
Beispiel #20
0
def main(argv):  # pragma: no cover
    os.system('ogr2ogr -f GeoJSON '
              '-select "%s" -segmentize 0.1 data/temp.geojson '
              'data/ne_50m_admin_0_map_subunits.dbf' % ', '.join(PROPERTIES))
    with open('data/temp.geojson', 'r') as fd:
        jsondata = fd.read()
    os.remove('data/temp.geojson')
    data = json.loads(jsondata)
    simplified = simplify(data['features'])
    output = to_geojson(simplified)
    with util.gzip_open('ichnaea/regions.geojson.gz',
                        'w', compresslevel=7) as fd:
        fd.write(output)
Beispiel #21
0
def main(argv):
    parser = argparse.ArgumentParser(
        prog=argv[0], description="Create region GeoJSON files.")

    # implicitly parse and react to -h/--help
    parser.parse_args(argv[1:])

    os.system("ogr2ogr -f GeoJSON "
              '-select "%s" -segmentize 0.1 data/temp.geojson '
              "data/ne_50m_admin_0_map_subunits.dbf" % ", ".join(PROPERTIES))
    with open("data/temp.geojson", "r") as fd:
        jsondata = fd.read()
    os.remove("data/temp.geojson")
    data = json.loads(jsondata)
    simplified = simplify(data["features"])
    region_collection, buffer_collection = to_geojson(simplified)

    with util.gzip_open(geocode.REGIONS_FILE, "w", compresslevel=7) as fd:
        fd.write(region_collection)

    with util.gzip_open(geocode.REGIONS_BUFFER_FILE, "w",
                        compresslevel=7) as fd:
        fd.write(buffer_collection)
Beispiel #22
0
def main(argv):  # pragma: no cover
    parser = argparse.ArgumentParser(
        prog=argv[0], description='Create region GeoJSON files.')

    # implicitly parse and react to -h/--help
    parser.parse_args(argv[1:])

    os.system('ogr2ogr -f GeoJSON '
              '-select "%s" -segmentize 0.1 data/temp.geojson '
              'data/ne_50m_admin_0_map_subunits.dbf' % ', '.join(PROPERTIES))
    with open('data/temp.geojson', 'r') as fd:
        jsondata = fd.read()
    os.remove('data/temp.geojson')
    data = json.loads(jsondata)
    simplified = simplify(data['features'])
    region_collection, buffer_collection = to_geojson(simplified)

    with util.gzip_open(geocode.REGIONS_FILE, 'w', compresslevel=7) as fd:
        fd.write(region_collection)

    with util.gzip_open(geocode.REGIONS_BUFFER_FILE, 'w',
                        compresslevel=7) as fd:
        fd.write(buffer_collection)
Beispiel #23
0
def main(argv):  # pragma: no cover
    parser = argparse.ArgumentParser(
        prog=argv[0], description='Create region GeoJSON files.')

    # implicitly parse and react to -h/--help
    parser.parse_args(argv[1:])

    os.system('ogr2ogr -f GeoJSON '
              '-select "%s" -segmentize 0.1 data/temp.geojson '
              'data/ne_50m_admin_0_map_subunits.dbf' % ', '.join(PROPERTIES))
    with open('data/temp.geojson', 'r') as fd:
        jsondata = fd.read()
    os.remove('data/temp.geojson')
    data = json.loads(jsondata)
    simplified = simplify(data['features'])
    region_collection, buffer_collection = to_geojson(simplified)

    with util.gzip_open(geocode.REGIONS_FILE,
                        'w', compresslevel=7) as fd:
        fd.write(region_collection)

    with util.gzip_open(geocode.REGIONS_BUFFER_FILE,
                        'w', compresslevel=7) as fd:
        fd.write(buffer_collection)
Beispiel #24
0
    def test_local_export(self):
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellFactory(cid=210, lat=None, lon=None, **cell_key)
        self.session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(self.session, path)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    self.assertTrue('area' in header.values())
                    self.assertEqual(header, CELL_HEADER_DICT)

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    self.assertEqual(cells, exported_cells)
Beispiel #25
0
def export_file(filename, tablename, _db=None, _session=None):
    today = util.utcnow().date()
    one_year_ago = today - timedelta(days=365)
    one_year_ago = one_year_ago.strftime('%Y-%m-%d')
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE modified >= '{modified}'
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' '))
    db = configure_db('ro', _db=_db)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #26
0
    def get_csv(self, lo=1, hi=10, time=1408604686):
        cell = self.cell
        line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},'
                         '{lat:.7f},1,1,1,{time},{time},')
        lines = [
            line_template.format(mcc=cell.mcc,
                                 mnc=cell.mnc,
                                 lac=cell.lac,
                                 cid=i * 1010,
                                 psc='',
                                 lon=cell.lon + i * 0.002,
                                 lat=cell.lat + i * 0.001,
                                 time=time) for i in range(lo, hi)
        ]
        # add bad lines
        lines.append(
            line_template.format(
                mcc=cell.mcc,
                mnc=cell.mnc,
                lac='',
                cid='',
                psc=12,
                lon=cell.lon,
                lat=cell.lat,
                time=time,
            ))
        lines.append(
            line_template.format(
                mcc=cell.mcc,
                mnc=cell.mnc,
                lac='',
                cid='',
                psc='',
                lon=cell.lon,
                lat=cell.lat,
                time=time,
            ))
        txt = '\n'.join(lines)

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'import.csv.gz')
            with util.gzip_open(path, 'w') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    gzip_file.write(txt)
            yield path
Beispiel #27
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
'''.format(tablename=tablename).replace('\n', ' '))

    db = configure_db('ro', transport='sync', _db=_db)
    min_grid = b''
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #28
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text("""\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
""".format(tablename=tablename).replace("\n", " "))

    db = configure_db("ro", _db=_db, pool=False)
    min_grid = b""
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, "w", compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #29
0
    def _export(self, session, datatype, expected_keys, restrict=False):
        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, datatype + ".tar.gz")
            if restrict:
                dump.dump_file(datatype,
                               session,
                               path,
                               lat=GB_LAT,
                               lon=GB_LON,
                               radius=25000)
            else:
                dump.dump_file(datatype, session, path)

            assert os.path.isfile(path)
            with util.gzip_open(path, "r") as fd:
                lines = fd.readlines()
                assert len(lines) == len(expected_keys) + 1
                for key in expected_keys:
                    assert [True for line in lines if key in line] == [True]
Beispiel #30
0
def write_stations_to_csv(session, table, columns,
                          cond, path, make_dict, fields):
    with util.gzip_open(path, 'w') as gzip_file:
        writer = csv.DictWriter(gzip_file, fields, extrasaction='ignore')
        limit = 10000
        offset = 0
        # Write header row
        writer.writerow(CELL_HEADER_DICT)
        while True:
            query = (select(columns=columns).where(cond)
                                            .limit(limit)
                                            .offset(offset)
                                            .order_by(table.c.created))
            rows = session.execute(query).fetchall()
            if rows:
                writer.writerows([make_dict(row) for row in rows])
                offset += limit
            else:
                break
Beispiel #31
0
def export_file(db_url, filename, tablename, _db_rw=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename).replace('\n', ' '))
    db = configure_db(db_url, _db=_db_rw)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.engine.pool.dispose()
    return result_rows
Beispiel #32
0
def dump_file(datatype, session, filename, lat=None, lon=None, radius=None):
    model = {"blue": BlueShard, "cell": CellShard, "wifi": WifiShard}
    where = where_area(lat, lon, radius)
    with util.gzip_open(filename, "w") as fd:
        dump_model(model[datatype], session, fd, where=where)
    return 0
Beispiel #33
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()
        area_keys = set()

        def commit_batch(ins, rows, commit=True):
            result = session.execute(ins, rows)
            count = result.rowcount
            # apply trick to avoid querying for existing rows,
            # MySQL claims 1 row for an inserted row, 2 for an updated row
            inserted_rows = 2 * len(rows) - count
            changed_rows = count - len(rows)
            assert inserted_rows + changed_rows == len(rows)
            StatCounter(self.stat_key, today).incr(pipe, inserted_rows)
            if commit:
                session.commit()
            else:  # pragma: no cover
                session.flush()

        with util.gzip_open(filename, "r") as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                csv_reader = csv.DictReader(gzip_file, CELL_FIELDS)
                rows = []
                on_duplicate = (
                    "modified = values(modified), "
                    "total_measures = values(total_measures), "
                    "lat = values(lat), "
                    "lon = values(lon), "
                    "psc = values(psc), "
                    "`range` = values(`range`)"
                )
                if self.cell_type == "ocid":
                    on_duplicate += ", changeable = values(changeable)"
                elif self.cell_type == "cell":  # pragma: no cover
                    on_duplicate += (
                        ", max_lat = values(max_lat)"
                        ", min_lat = values(min_lat)"
                        ", max_lon = values(max_lon)"
                        ", min_lon = values(min_lon)"
                    )

                ins = self.cell_model.__table__.insert(mysql_on_duplicate=on_duplicate)

                for row in csv_reader:
                    # skip any header row
                    if csv_reader.line_num == 1 and "radio" in row.values():  # pragma: no cover
                        continue

                    data = self.make_import_dict(row)
                    if data is not None:
                        rows.append(data)
                        area_keys.add(self.area_model.to_hashkey(data))

                    if len(rows) == self.batch_size:  # pragma: no cover
                        commit_batch(ins, rows, commit=False)
                        rows = []

                if rows:
                    commit_batch(ins, rows)

        area_keys = list(area_keys)
        for i in range(0, len(area_keys), self.area_batch_size):
            area_batch = area_keys[i : i + self.area_batch_size]
            self.update_area_task.delay(area_batch, cell_type=self.cell_type)
Beispiel #34
0
def write_stations_to_csv(session, path, today,
                          start_time=None, end_time=None):
    where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL'
    if start_time is not None and end_time is not None:
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))
    else:
        # limit to cells modified in the last 12 months
        one_year = today - timedelta(days=365)
        where = where + ' AND modified >= "%s"' % one_year.strftime('%Y-%m-%d')

    header_row = [
        'radio', 'mcc', 'net', 'area', 'cell', 'unit',
        'lon', 'lat', 'range', 'samples', 'changeable',
        'created', 'updated', 'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = '''SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `cellid`
LIMIT :l
OFFSET :o
'''

    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                offset = 0
                limit = 25000
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(o=offset, l=limit)).fetchall()
                    if rows:
                        buf = '\r\n'.join([row.cell_value for row in rows])
                        if buf:
                            buf += '\r\n'
                        gzip_file.write(buf)
                        offset += limit
                    else:
                        break
Beispiel #35
0
def write_stations_to_csv(session, path, start_time=None, end_time=None):
    where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL'
    if None not in (start_time, end_time):
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))

    header_row = [
        'radio',
        'mcc',
        'net',
        'area',
        'cell',
        'unit',
        'lon',
        'lat',
        'range',
        'samples',
        'changeable',
        'created',
        'updated',
        'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = '''SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid`
LIMIT :l
OFFSET :o
'''

    limit = 10000
    offset = 0
    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(o=offset, l=limit)).fetchall()
                    if rows:
                        buf = '\r\n'.join([row.cell_value for row in rows])
                        if buf:
                            buf += '\r\n'
                        gzip_file.write(buf)
                        offset += limit
                    else:
                        break
Beispiel #36
0
    def import_stations(self, session, pipe, filename):
        today = util.utcnow().date()
        shards = self.cell_model.shards()

        on_duplicate = ('`modified` = values(`modified`)'
                        ', `lat` = values(`lat`)'
                        ', `lon` = values(`lon`)'
                        ', `psc` = values(`psc`)'
                        ', `max_lat` = values(`max_lat`)'
                        ', `min_lat` = values(`min_lat`)'
                        ', `max_lon` = values(`max_lon`)'
                        ', `min_lon` = values(`min_lon`)'
                        ', `radius` = values(`radius`)'
                        ', `samples` = values(`samples`)')

        def commit_batch(rows):
            all_inserted_rows = 0
            for shard_id, shard_rows in rows.items():
                table_insert = shards[shard_id].__table__.insert(
                    mysql_on_duplicate=on_duplicate)

                result = session.execute(table_insert, shard_rows)
                count = result.rowcount
                # apply trick to avoid querying for existing rows,
                # MySQL claims 1 row for an inserted row, 2 for an updated row
                inserted_rows = 2 * len(shard_rows) - count
                changed_rows = count - len(shard_rows)
                assert inserted_rows + changed_rows == len(shard_rows)
                all_inserted_rows += inserted_rows
            StatCounter(self.stat_key, today).incr(pipe, all_inserted_rows)

        areaids = set()

        with util.gzip_open(filename, 'r') as gzip_wrapper:
            with gzip_wrapper as gzip_file:
                cell_model = self.cell_model
                csv_reader = csv.reader(gzip_file)
                parse_row = partial(self.make_import_dict,
                                    self.cell_model.validate, self.import_spec)

                rows = defaultdict(list)
                row_count = 0
                for row in csv_reader:
                    # skip any header row
                    if (csv_reader.line_num == 1
                            and row[0] == 'radio'):  # pragma: no cover
                        continue

                    data = parse_row(row)
                    if data is not None:
                        rows[cell_model.shard_id(data['radio'])].append(data)
                        row_count += 1
                        areaids.add((int(data['radio']), data['mcc'],
                                     data['mnc'], data['lac']))

                    if row_count == self.batch_size:  # pragma: no cover
                        commit_batch(rows)
                        session.flush()
                        rows = defaultdict(list)
                        row_count = 0

                if rows:
                    commit_batch(rows)

        self.area_queue.enqueue([encode_cellarea(*id_) for id_ in areaids],
                                json=False)
Beispiel #37
0
def write_stations_to_csv(session, path, start_time=None, end_time=None):
    where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL'
    if None not in (start_time, end_time):
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))

    header_row = [
        'radio', 'mcc', 'net', 'area', 'cell', 'unit',
        'lon', 'lat', 'range', 'samples', 'changeable',
        'created', 'updated', 'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    table = Cell.__tablename__
    stmt = '''SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid`
LIMIT :l
OFFSET :o
''' % (table, where)
    stmt = text(stmt)

    limit = 10000
    offset = 0
    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            while True:
                rows = session.execute(
                    stmt.bindparams(o=offset, l=limit)).fetchall()
                if rows:
                    buf = '\r\n'.join([row.cell_value for row in rows])
                    if buf:
                        buf += '\r\n'
                    gzip_file.write(buf)
                    offset += limit
                else:
                    break
Beispiel #38
0
def write_stations_to_csv(session,
                          path,
                          today,
                          start_time=None,
                          end_time=None):
    linesep = "\r\n"

    where = "lat IS NOT NULL AND lon IS NOT NULL"
    if start_time is not None and end_time is not None:
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = "%Y-%m-%d %H:%M:%S"
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))
    else:
        # limit to cells modified in the last 12 months
        one_year = today - timedelta(days=365)
        where = where + ' AND modified >= "%s"' % one_year.strftime("%Y-%m-%d")

    header_row = ",".join(_FIELD_NAMES) + linesep

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = """SELECT
    `cellid`,
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s AND `cellid` > :cellid
ORDER BY `cellid`
LIMIT :limit
"""

    with util.gzip_open(path, "w", compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                min_cellid = ""
                limit = 25000
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(limit=limit,
                                              cellid=min_cellid)).fetchall()
                    if rows:
                        buf = "".join(row.cell_value + linesep for row in rows)
                        gzip_file.write(buf)
                        min_cellid = rows[-1].cellid
                    else:
                        break
Beispiel #39
0
def write_stations_to_csv(session, path, start_time=None, end_time=None):
    where = "radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL"
    if None not in (start_time, end_time):
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = "%Y-%m-%d %H:%M:%S"
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))

    header_row = [
        "radio",
        "mcc",
        "net",
        "area",
        "cell",
        "unit",
        "lon",
        "lat",
        "range",
        "samples",
        "changeable",
        "created",
        "updated",
        "averageSignal",
    ]
    header_row = ",".join(header_row) + "\n"

    table = Cell.__tablename__
    stmt = """SELECT
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`range`, "0"),
        COALESCE(`total_measures`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s
ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid`
LIMIT :l
OFFSET :o
""" % (
        table,
        where,
    )
    stmt = text(stmt)

    limit = 10000
    offset = 0
    with util.gzip_open(path, "w", compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            while True:
                rows = session.execute(stmt.bindparams(o=offset, l=limit)).fetchall()
                if rows:
                    buf = "\r\n".join([row.cell_value for row in rows])
                    if buf:
                        buf += "\r\n"
                    gzip_file.write(buf)
                    offset += limit
                else:
                    break
def write_stations_to_csv(session,
                          path,
                          today,
                          start_time=None,
                          end_time=None):
    where = 'lat IS NOT NULL AND lon IS NOT NULL'
    if start_time is not None and end_time is not None:
        where = where + ' AND modified >= "%s" AND modified < "%s"'
        fmt = '%Y-%m-%d %H:%M:%S'
        where = where % (start_time.strftime(fmt), end_time.strftime(fmt))
    else:
        # limit to cells modified in the last 12 months
        one_year = today - timedelta(days=365)
        where = where + ' AND modified >= "%s"' % one_year.strftime('%Y-%m-%d')

    header_row = [
        'radio',
        'mcc',
        'net',
        'area',
        'cell',
        'unit',
        'lon',
        'lat',
        'range',
        'samples',
        'changeable',
        'created',
        'updated',
        'averageSignal',
    ]
    header_row = ','.join(header_row) + '\n'

    tables = [shard.__tablename__ for shard in CellShard.shards().values()]
    stmt = '''SELECT
    `cellid`,
    CONCAT_WS(",",
        CASE radio
            WHEN 0 THEN "GSM"
            WHEN 2 THEN "UMTS"
            WHEN 3 THEN "LTE"
            ELSE ""
        END,
        `mcc`,
        `mnc`,
        `lac`,
        `cid`,
        COALESCE(`psc`, ""),
        ROUND(`lon`, 7),
        ROUND(`lat`, 7),
        COALESCE(`radius`, "0"),
        COALESCE(`samples`, "0"),
        "1",
        COALESCE(UNIX_TIMESTAMP(`created`), ""),
        COALESCE(UNIX_TIMESTAMP(`modified`), ""),
        ""
    ) AS `cell_value`
FROM %s
WHERE %s AND `cellid` > :cellid
ORDER BY `cellid`
LIMIT :limit
'''

    with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper:
        with gzip_wrapper as gzip_file:
            gzip_file.write(header_row)
            for table in tables:
                table_stmt = text(stmt % (table, where))
                min_cellid = ''
                limit = 25000
                while True:
                    rows = session.execute(
                        table_stmt.bindparams(limit=limit,
                                              cellid=min_cellid)).fetchall()
                    if rows:
                        buf = '\r\n'.join([row.cell_value for row in rows])
                        if buf:
                            buf += '\r\n'
                        gzip_file.write(buf)
                        min_cellid = rows[-1].cellid
                    else:
                        break