コード例 #1
0
    def run(self):
        if self.outfile and not self.use_stdout:
            with self.outfile.open("w"):
                pass  # create or truncate file, but don't write anything to it yet
        with sqlite3.connect(self.mbtiles) as conn:
            results = []
            if self.show_size:
                sql = "SELECT cnt, dups.tile_id, LENGTH(tile_data) FROM (" \
                      "  SELECT tile_id, COUNT(*) as cnt FROM map " \
                      "  GROUP BY tile_id HAVING cnt > ?" \
                      ") dups JOIN images ON images.tile_id = dups.tile_id"
                sql_opts = [self.min_dup_count]
                if self.zoom:
                    sql += f" WHERE zoom_level=?"
                    sql_opts.append(self.zoom)
            else:
                sql_opts = []
                sql = "SELECT COUNT(*) cnt, tile_id FROM map"
                if self.zoom:
                    sql += f" WHERE zoom_level=?"
                    sql_opts.append(self.zoom)
                sql += " GROUP BY tile_id HAVING cnt > ?"
                sql_opts.append(self.min_dup_count)
            for vals in query(conn, sql, sql_opts):
                results.append(vals)
            results.sort(reverse=True)
        size = None
        examples = None
        for vals in results:
            if len(vals) == 3:
                count, tile_id, size = vals
            else:
                count, tile_id = vals
            if self.show_examples:
                example_sql = "select zoom_level, tile_column, tile_row from map " \
                              "where tile_id = ? limit 5"
                examples = [
                    f'{z}/{x}/{y}'
                    for z, x, y in query(conn, example_sql, [tile_id])
                ]
            if self.verbose:
                res = f"{tile_id} x {count:,}"
                if self.show_size:
                    res += f', {size:,} bytes'
                if self.show_examples:
                    res += ', examples: ' + ', '.join(examples)
                print_err(res)

        results = [v[1] for v in results]
        if self.use_stdout:
            for v in results:
                print(v)
        elif self.outfile:
            with self.outfile.open("a") as f:
                f.writelines([str(v) + '\n' for v in results])

        return results
コード例 #2
0
 def get_value(self, name):
     with sqlite3.connect(self.mbtiles) as conn:
         cursor = conn.cursor()
         cursor.execute("SELECT value FROM metadata WHERE name=?", [name])
         row = cursor.fetchone()
         if row is None:
             print_err(f"Metadata field '{name}' is not found")
             exit(1)
         print(row[0])
コード例 #3
0
 async def load_hash(self, session: ClientSession, verbose: bool):
     if not self.url_hash:
         return
     try:
         if verbose:
             print(f"Getting md5 checksum from {self.url_hash}")
         hsh = (await fetch(session, self.url_hash)).strip().split(' ')[0]
         if not re.match(r'^[a-fA-F0-9]{32}$', hsh):
             raise ValueError(f"Invalid md5 hash '{hsh}'")
         self.hash = hsh
     except Exception as ex:
         print_err(f"Unable to load md5 hash for {self.to_str(True)}: {ex}")
コード例 #4
0
 async def load_metadata(self, session: ClientSession, verbose: bool):
     if not self.url:
         return
     try:
         if verbose:
             print(f"Getting content length for {self.url}")
         async with session.head(self.url) as resp:
             if resp.status >= 400:
                 raise ValueError(f"Status={resp.status} for HEAD request")
             if 'Content-Length' in resp.headers:
                 self.file_len = int(resp.headers['Content-Length'])
     except Exception as ex:
         print_err(f"Unable to load metadata for {self}: {ex}")
コード例 #5
0
 async def init(self, session: ClientSession, verbose: bool):
     """initialize the self.sources with the relevant Source objects
     by parsing the mirror's HTML page, and getting all <a> tags"""
     try:
         sources = await self.get_sources(session, verbose)
         if not sources:
             raise ValueError(f"No sources found")
         await load_sources(sources, session, verbose)
         if len(sources) > 1 and sources[0].hash == sources[1].hash:
             del sources[0]  # latest is the same as the last one
         self.sources = sources
     except Exception as ex:
         print_err(f"Unable to use {self.country} source {self.url}: {ex}")
コード例 #6
0
    def parse_hrefs(self, items: List[tuple], verbose) -> List['Source']:
        """Convert a list of (name, href) tuples to a list of valid sources,
        including only the two most recent ones, plus the 'latest' if available."""
        all_sources: Dict[str, Source] = {}
        for name, href in sorted(items):
            m = self.re_name.match(name)
            if not m:
                if verbose:
                    print(f"Ignoring unexpected name '{name}' from {self.url}")
                continue
            try:
                url = href if '/' in href else (self.url + href)
                date = m.group(1)
                is_md5 = bool(m.group(2))
                dt = None if date == 'latest' else datetime.strptime(
                    date, '%y%m%d')
                if not is_md5:
                    if date in all_sources:
                        raise ValueError(f"{date} already already exists")
                    all_sources[date] = Source(name, url, dt, self)
                else:
                    if date not in all_sources:
                        raise ValueError(
                            f"md5 file exists, but data file does not")
                    all_sources[date].url_hash = url
            except Exception as ex:
                print_err(f'WARN: {ex}, while parsing {name} from {self.url}')

        # get the last 2 sources that have dates in the name, as well as the "latest"
        latest = all_sources.pop('latest', None)
        result = [
            all_sources[k]
            for k in list(sorted(all_sources.keys(), reverse=True))[:2]
        ]
        if latest:
            result.insert(0, latest)
        return result
コード例 #7
0
    def run(self):
        with sqlite3.connect(self.mbtiles) as conn:
            limit_to_keys = not self.outfile
            if self.outfile and not self.use_stdout:
                with self.outfile.open("w"):
                    pass  # create or truncate file, but don't write anything to it yet
            keyed_tiles = 0
            nokey_tiles = 0
            cursor = conn.cursor()
            key_stats = self.keys
            for with_key, without_key in self.tile_batches(
                    conn, limit_to_keys):
                without_key.sort()
                if with_key:
                    with_key.sort()
                    for val in with_key:
                        key_stats[val[3]] += 1
                    cursor.executemany(
                        'INSERT OR IGNORE INTO map'
                        '(zoom_level, tile_column, tile_row, tile_id)'
                        ' VALUES(?,?,?,?)', with_key)
                    keyed_tiles += cursor.rowcount
                    conn.commit()
                if without_key:
                    if self.use_stdout:
                        for v in without_key:
                            print(v, end='')
                    else:
                        with self.outfile.open("a") as f:
                            f.writelines(without_key)
                    nokey_tiles += len(without_key)

            if self.verbose:
                for k, c in key_stats.items():
                    print_err(f"{k} - added {c:,}")
                print_err(f'Total imputed tiles: {keyed_tiles:,}')
                if nokey_tiles:
                    print_err(
                        f'Total tiles need to be generated: {nokey_tiles:,}')
コード例 #8
0
async def run_aria2c(aria2c_args, dry_run, md5, urls, args, area_id):
    params = ['aria2c']
    if md5:
        params.append(f'--checksum=md5={md5}')
    if len(urls) > 1 and not any(
        (v for v in aria2c_args if v == '-s' or v.startswith('--split'))):
        # user has not passed -s or --split, so use as many streams as urls
        params.append(f'--split={len(urls)}')
    if not any((v for v in aria2c_args if v.startswith('--http-accept-gzip'))):
        # user has not passed --http-accept-gzip, so always specify we accept gzip
        params.append('--http-accept-gzip')
    if not any(
        (v for v in aria2c_args if v == '-U' or v.startswith('--user-agent'))):
        # user has not set a custom user agent, set one
        params.append(f'--user-agent={USER_AGENT}')
    if args.output:
        assert_conflict_args("--output", aria2c_args, '-d', '--dir', '-o',
                             '--out', '-i', '--input-file',
                             '--auto-file-renaming', '-Z',
                             '--force-sequential', '--allow-overwrite')
        out_path = Path(args.output).resolve()
        out_path.parent.mkdir(parents=True, exist_ok=True)
        params.append(f'--dir={out_path.parent}')
        params.append(f'--out={out_path.name}')
        params.append('--auto-file-renaming=false')
        if args.force:
            params.append('--allow-overwrite=true')

    extra_env = None
    if args['--make-dc']:
        assert_conflict_args("--make-dc", aria2c_args,
                             '--on-download-complete')
        area_id, min_zoom, max_zoom, dc_ver = normalize_make_dc(
            area_id, args['--minzoom'], args['--maxzoom'], args['--dc-ver'])
        extra_env = {
            "DOWNLOAD_OSM_DC_FILE": str(Path(args['--make-dc'])),
            "OSM_AREA_NAME": str(area_id),
            "MIN_ZOOM": str(min_zoom),
            "MAX_ZOOM": str(max_zoom),
            "MAKE_DC_VERSION": str(dc_ver),
        }
        params.append("--on-download-complete")
        params.append(__file__)

    params.extend(aria2c_args)
    params.extend(urls)
    print(f"\n  {subprocess.list2cmdline(params)}")
    if args.verbose and extra_env:
        env_str = ', '.join((f'{k}={v}' for k, v in extra_env.items()))
        print(f"  Setting environment vars: {env_str}")
    capture_output = False
    for flag in ('--on-bt-download-complete', '--on-download-pause',
                 '--on-download-complete', '--on-download-start',
                 '--on-download-error', '--on-download-stop'):
        if any((v for v in params if v.startswith(flag))):
            capture_output = True
            break
    if args.verbose:
        if capture_output:
            print("  capturing stdout/stderr to wait for subprocess exit")
        else:
            print("  aria2c output will be printed directly to terminal")
    # Make sure to print/flush everything to STDOUT before running subprocess
    print("", flush=True)

    if not dry_run:
        # Use capture_output to ensure that callback finishes before run() returns
        # This is only needed if any callbacks are used
        if extra_env:
            env = os.environ.copy()
            env.update(extra_env)
        else:
            env = None
        res = subprocess.run(params, env=env, capture_output=capture_output)
        ret = res.returncode
        if capture_output:
            stdout = res.stdout.decode('utf-8')
            if stdout:
                print(stdout)
            stderr = res.stderr.decode('utf-8')
            if stderr:
                print_err(stderr)
            # Callbacks do not report errors, so detect it
            if ret == 0 and stderr and "Traceback (most recent call last)" in stderr:
                ret = 1
        return ret
    else:
        print("Data is not downloaded because of the --dry-run parameter")
        if args['--make-dc']:
            print("docker-compose file generation was skipped")
        return 0
コード例 #9
0
ファイル: pgutils.py プロジェクト: lazaa32/openmaptiles-tools
 def print_message(msg: asyncpg.PostgresLogMessage):
     try:
         # noinspection PyUnresolvedReferences
         print_err(f'  {msg.severity}: {msg.message} @ {msg.context}')
     except AttributeError:
         print_err(f'  {msg}')