Beispiel #1
0
    def test_decode_datamap_grid(self):
        self.assertEqual(
            decode_datamap_grid(b'\x00\x00\x00\x00\x00\x00\x00\x00'),
            (-90000, -180000))
        self.assertEqual(
            decode_datamap_grid(b'AAAAAAAAAAA=', codec='base64'),
            (-90000, -180000))

        self.assertEqual(
            decode_datamap_grid(b'\x00\x01_\x90\x00\x02\xbf '),
            (0, 0))
        self.assertEqual(
            decode_datamap_grid(b'AAFfkAACvyA=', codec='base64'),
            (0, 0))

        self.assertEqual(
            decode_datamap_grid(b'\x00\x02\xbf \x00\x05~@'),
            (90000, 180000))
        self.assertEqual(
            decode_datamap_grid(b'\x00\x02\xbf \x00\x05~@', scale=True),
            (90.0, 180.0))
        self.assertEqual(
            decode_datamap_grid(b'AAK/IAAFfkA=', codec='base64'),
            (90000, 180000))
        self.assertEqual(
            decode_datamap_grid(b'AAK/IAAFfkA=', scale=True, codec='base64'),
            (90.0, 180.0))
Beispiel #2
0
def export_file(filename, tablename, _db=None, _session=None):
    today = util.utcnow().date()
    one_year_ago = today - timedelta(days=365)
    one_year_ago = one_year_ago.strftime('%Y-%m-%d')
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE modified >= '{modified}'
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' '))
    db = configure_db('ro', _db=_db)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #3
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
'''.format(tablename=tablename).replace('\n', ' '))

    db = configure_db('ro', transport='sync', _db=_db)
    min_grid = b''
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #4
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text("""\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
""".format(tablename=tablename).replace("\n", " "))

    db = configure_db("ro", _db=_db, pool=False)
    min_grid = b""
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, "w", compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Beispiel #5
0
    def test_decode_datamap_grid(self):
        assert decode_datamap_grid(b"\x00\x00\x00\x00\x00\x00\x00\x00") == (
            -90000,
            -180000,
        )
        assert decode_datamap_grid(b"AAAAAAAAAAA=", codec="base64") == (-90000, -180000)

        assert decode_datamap_grid(b"\x00\x01_\x90\x00\x02\xbf ") == (0, 0)
        assert decode_datamap_grid(b"AAFfkAACvyA=", codec="base64") == (0, 0)

        assert decode_datamap_grid(b"\x00\x02\xbf \x00\x05~@") == (90000, 180000)
        assert decode_datamap_grid(b"\x00\x02\xbf \x00\x05~@", scale=True) == (
            90.0,
            180.0,
        )
        assert decode_datamap_grid(b"AAK/IAAFfkA=", codec="base64") == (90000, 180000)
        assert decode_datamap_grid(b"AAK/IAAFfkA=", scale=True, codec="base64") == (
            90.0,
            180.0,
        )
Beispiel #6
0
def export_file(db_url, filename, tablename, _db_rw=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename).replace('\n', ' '))
    db = configure_db(db_url, _db=_db_rw)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.engine.pool.dispose()
    return result_rows
Beispiel #7
0
def export_to_csv(filename, csv_dir, tablename, row_limit=None, file_limit=None):
    """
    Export a datamap table to a CSV file.

    :param filename: An output file ending in .csv
    :param csv_dir: The output directory
    :param tablename: The name of the datamap table to export
    :param row_limit: The number of rows to fetch at a time
    :param file_limit: The number of output rows before rotating files
    :return: A tuple (rows exported, files created)

    Each database row is turned into 0 to 6 similar CSV rows by
    random_points(), based on how recently they were recorded.

    If file_limit is not reached, the output file will the filename.
    If file_limit is reached, the output files will have a serial number and
    be based on the filename. For example, "map.csv" will become "map_0001.csv",
    "map_0002.csv", etc.
    """
    stmt = text(
        """\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
""".format(
            tablename=tablename
        ).replace(
            "\n", " "
        )
    )

    db = configure_db("ro", pool=False)
    min_grid = b""
    row_limit = row_limit or 200_000
    file_limit = file_limit or 10_000_000

    result_rows = 0
    file_path = os.path.join(csv_dir, filename)
    fd = open(file_path, "w")
    file_count = 1
    file_rows = 0
    orig_filename = filename
    orig_file_path = file_path
    assert filename.endswith(".csv")
    try:
        with db_worker_session(db, commit=False) as session:
            while True:
                result = session.execute(
                    stmt.bindparams(limit=row_limit, grid=min_grid)
                )
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)

                # Rotate the file when needed
                file_rows += len(lines)
                if result_rows >= file_limit:
                    fd.close()
                    file_count += 1
                    file_rows = 0
                    filename = "sub" + orig_filename.replace(
                        ".csv", f"_{file_count:04}.csv"
                    )
                    file_path = os.path.join(csv_dir, filename)
                    fd = open(file_path, "w")

                min_grid = rows[-1].grid
    finally:
        fd.close()

    if not file_rows:
        os.remove(file_path)
        file_count -= 1

    if file_count > 1:
        # Rename first file to serial CSV format
        filename = "sub" + orig_filename.replace(".csv", "_0001.csv")
        file_path = os.path.join(csv_dir, filename)
        os.rename(orig_file_path, file_path)

    db.close()
    return result_rows, file_count