def test_decode_datamap_grid(self): self.assertEqual( decode_datamap_grid(b'\x00\x00\x00\x00\x00\x00\x00\x00'), (-90000, -180000)) self.assertEqual( decode_datamap_grid(b'AAAAAAAAAAA=', codec='base64'), (-90000, -180000)) self.assertEqual( decode_datamap_grid(b'\x00\x01_\x90\x00\x02\xbf '), (0, 0)) self.assertEqual( decode_datamap_grid(b'AAFfkAACvyA=', codec='base64'), (0, 0)) self.assertEqual( decode_datamap_grid(b'\x00\x02\xbf \x00\x05~@'), (90000, 180000)) self.assertEqual( decode_datamap_grid(b'\x00\x02\xbf \x00\x05~@', scale=True), (90.0, 180.0)) self.assertEqual( decode_datamap_grid(b'AAK/IAAFfkA=', codec='base64'), (90000, 180000)) self.assertEqual( decode_datamap_grid(b'AAK/IAAFfkA=', scale=True, codec='base64'), (90.0, 180.0))
def export_file(filename, tablename, _db=None, _session=None): today = util.utcnow().date() one_year_ago = today - timedelta(days=365) one_year_ago = one_year_ago.strftime('%Y-%m-%d') # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE modified >= '{modified}' LIMIT :limit OFFSET :offset '''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' ')) db = configure_db('ro', _db=_db) offset = 0 limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, offset=offset)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) offset += limit if not result_rows: os.remove(filename) db.close() return result_rows
def export_file(filename, tablename, _db=None, _session=None): # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit '''.format(tablename=tablename).replace('\n', ' ')) db = configure_db('ro', transport='sync', _db=_db) min_grid = b'' limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, grid=min_grid)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) min_grid = rows[-1].grid if not result_rows: os.remove(filename) db.close() return result_rows
def export_file(filename, tablename, _db=None, _session=None): # this is executed in a worker process stmt = text("""\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit """.format(tablename=tablename).replace("\n", " ")) db = configure_db("ro", _db=_db, pool=False) min_grid = b"" limit = 200000 result_rows = 0 with util.gzip_open(filename, "w", compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, grid=min_grid)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) min_grid = rows[-1].grid if not result_rows: os.remove(filename) db.close() return result_rows
def test_decode_datamap_grid(self): assert decode_datamap_grid(b"\x00\x00\x00\x00\x00\x00\x00\x00") == ( -90000, -180000, ) assert decode_datamap_grid(b"AAAAAAAAAAA=", codec="base64") == (-90000, -180000) assert decode_datamap_grid(b"\x00\x01_\x90\x00\x02\xbf ") == (0, 0) assert decode_datamap_grid(b"AAFfkAACvyA=", codec="base64") == (0, 0) assert decode_datamap_grid(b"\x00\x02\xbf \x00\x05~@") == (90000, 180000) assert decode_datamap_grid(b"\x00\x02\xbf \x00\x05~@", scale=True) == ( 90.0, 180.0, ) assert decode_datamap_grid(b"AAK/IAAFfkA=", codec="base64") == (90000, 180000) assert decode_datamap_grid(b"AAK/IAAFfkA=", scale=True, codec="base64") == ( 90.0, 180.0, )
def export_file(db_url, filename, tablename, _db_rw=None, _session=None): # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} LIMIT :limit OFFSET :offset '''.format(tablename=tablename).replace('\n', ' ')) db = configure_db(db_url, _db=_db_rw) offset = 0 limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, offset=offset)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) offset += limit if not result_rows: os.remove(filename) db.engine.pool.dispose() return result_rows
def export_to_csv(filename, csv_dir, tablename, row_limit=None, file_limit=None): """ Export a datamap table to a CSV file. :param filename: An output file ending in .csv :param csv_dir: The output directory :param tablename: The name of the datamap table to export :param row_limit: The number of rows to fetch at a time :param file_limit: The number of output rows before rotating files :return: A tuple (rows exported, files created) Each database row is turned into 0 to 6 similar CSV rows by random_points(), based on how recently they were recorded. If file_limit is not reached, the output file will the filename. If file_limit is reached, the output files will have a serial number and be based on the filename. For example, "map.csv" will become "map_0001.csv", "map_0002.csv", etc. """ stmt = text( """\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit """.format( tablename=tablename ).replace( "\n", " " ) ) db = configure_db("ro", pool=False) min_grid = b"" row_limit = row_limit or 200_000 file_limit = file_limit or 10_000_000 result_rows = 0 file_path = os.path.join(csv_dir, filename) fd = open(file_path, "w") file_count = 1 file_rows = 0 orig_filename = filename orig_file_path = file_path assert filename.endswith(".csv") try: with db_worker_session(db, commit=False) as session: while True: result = session.execute( stmt.bindparams(limit=row_limit, grid=min_grid) ) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) # Rotate the file when needed file_rows += len(lines) if result_rows >= file_limit: fd.close() file_count += 1 file_rows = 0 filename = "sub" + orig_filename.replace( ".csv", f"_{file_count:04}.csv" ) file_path = os.path.join(csv_dir, filename) fd = open(file_path, "w") min_grid = rows[-1].grid finally: fd.close() if not file_rows: os.remove(file_path) file_count -= 1 if file_count > 1: # Rename first file to serial CSV format filename = "sub" + orig_filename.replace(".csv", "_0001.csv") file_path = os.path.join(csv_dir, filename) os.rename(orig_file_path, file_path) db.close() return result_rows, file_count