def test_files(self): today = util.utcnow().date() rows = [ dict(time=today, lat=12.345, lon=12.345), dict(time=today, lat=0, lon=12.345), dict(time=today, lat=-10.000, lon=-11.000), ] for row in rows: lat, lon = DataMap.scale(row["lat"], row["lon"]) data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row["time"], modified=row["time"]) self.session.add(data) self.session.flush() lines = [] rows = 0 with util.selfdestruct_tempdir() as temp_dir: quaddir = os.path.join(temp_dir, "quadtrees") os.mkdir(quaddir) shapes = os.path.join(temp_dir, "shapes") tiles = os.path.join(temp_dir, "tiles") for shard_id, shard in DATAMAP_SHARDS.items(): filename = "map_%s.csv.gz" % shard_id filepath = os.path.join(temp_dir, filename) result = export_file(None, filepath, shard.__tablename__, _db_rw=_make_db(), _session=self.session) if not result: self.assertFalse(os.path.isfile(filepath)) continue rows += result with util.gzip_open(filepath, "r") as fd: written = fd.read() lines.extend([line.split(",") for line in written.split()]) encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR) quadfolder = os.path.join(quaddir, "map_" + shard_id) self.assertTrue(os.path.isdir(quadfolder)) self._check_quadtree(quadfolder) merge_files(quaddir, shapes, DATAMAPS_DIR) self._check_quadtree(shapes) render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT) self.assertEqual(sorted(os.listdir(tiles)), ["0", "1", "2"]) self.assertEqual(sorted(os.listdir(os.path.join(tiles, "0", "0"))), ["0.png", "*****@*****.**"]) self.assertEqual(rows, 36) self.assertEqual(len(lines), 36) self.assertEqual(set([round(float(l[0]), 2) for l in lines]), set([-10.0, 0.0, 12.35])) self.assertEqual(set([round(float(l[1]), 2) for l in lines]), set([-11.0, 12.35]))
def export_files(pool, db_url, csvdir): # pragma: no cover jobs = [] result_rows = 0 for shard_id, shard in sorted(DATAMAP_SHARDS.items()): # sorting the shards prefers the north which contains more # data points than the south filename = os.path.join(csvdir, 'map_%s.csv.gz' % shard_id) jobs.append(pool.apply_async(export_file, (db_url, filename, shard.__tablename__))) for job in jobs: result_rows += job.get() return result_rows