def batch(func, iterable, chunksize=1000, total=None, progress=True): # This is the main reporter instance. reporter = context.get('reporter') pool = (ProcessPoolExecutor if config.get('BATCH_EXECUTOR') == 'process' else ThreadPoolExecutor) bar = Bar(total=total, throttle=timedelta(seconds=1)) workers = int(config.get('WORKERS', os.cpu_count())) chunk = [] count = 0 def loop(): for reports in executor.map(collect_report, repeat(func), chunk): reporter.merge(reports) if progress: bar() with pool(max_workers=workers) as executor: for item in iterable: if not item: continue chunk.append(item) count += 1 if count % 10000 == 0: loop() chunk = [] if chunk: loop()
def connect(self): # Deal with connection kwargs at connect time only, because we want # to be able to instantiate the db object bedore patching the # connection kwargs: peewee instanciate it at python parse time, while # we want to set connection kwargs after parsing command line. self.init(self.prefix + config.DB_NAME, user=config.get('DB_USER'), password=config.get('DB_PASSWORD'), host=config.get('DB_HOST'), port=config.get('DB_PORT')) super().connect()
def __getattr__(self, attr): if not self.obj: db = PostgresqlExtDatabase( self.prefix + config.DB_NAME, user=config.get('DB_USER'), password=config.get('DB_PASSWORD'), host=config.get('DB_HOST'), port=config.get('DB_PORT'), autorollback=True, ) self.initialize(db) return getattr(self.obj, attr)
def connect(self): # Deal with connection kwargs at connect time only, because we want # to be able to instantiate the db object bedore patching the # connection kwargs: peewee instanciate it at python parse time, while # we want to set connection kwargs after parsing command line. self.init( self.prefix + config.DB_NAME, user=config.get('DB_USER'), password=config.get('DB_PASSWORD'), host=config.get('DB_HOST'), port=config.get('DB_PORT') ) super().connect()
def batch(func, iterable, chunksize=1000, max_value=None): bar = Bar(max_value=max_value).start() workers = int(config.get("WORKERS", os.cpu_count())) with ThreadPoolExecutor(max_workers=workers) as executor: for i, res in enumerate(executor.map(func, iterable)): bar.update(i) bar.finish()
def __call__(self, *args, **kwargs): """Run command.""" reporter = Reporter(config.get('VERBOSE')) context.set('reporter', reporter) try: self.command(*args, **kwargs) except KeyboardInterrupt: pass finally: # Display reports, if any. print(reporter) filepath = config.get('REPORT_TO') if filepath: try: with Path(filepath).open('w') as f: f.write(str(reporter)) except (OSError, IOError) as e: print('Unable to write report to', filepath) print(e)
def reporting(self): if self._reports: sys.stdout.write('\n# Reports:') for name, items in self._reports.items(): sys.stdout.write('\n- {}: {}'.format(name, len(items))) verbosity = config.get('VERBOSE') if verbosity: for item, level in items: if verbosity >= level: sys.stdout.write('\n . {}'.format(item)) sys.stdout.write('\n')
def collect_report(func, *args, **kwargs): # This is a process reporter instance. reporter = context.get('reporter') if not reporter: # In thread mode, reporter is not shared with subthreads. reporter = Reporter(config.get('VERBOSE')) context.set('reporter', reporter) func(*args, **kwargs) reports = reporter._reports.copy() reporter.clear() return reports
def report(self, name, item, level): verbosity = config.get('VERBOSE') if verbosity: if name not in self._reports: self._reports[name] = [] self._reports[name].append((item, level)) else: # Only track totals. if name not in self._reports: self._reports[name] = 0 self._reports[name] += 1
def reporting(self): if self._reports: sys.stdout.write('\n# Reports:') for name, items in self._reports.items(): verbosity = config.get('VERBOSE') total = len(items) if verbosity else items sys.stdout.write('\n- {}: {}'.format(name, total)) if verbosity: for item, level in items: if verbosity >= level: sys.stdout.write('\n . {}'.format(item)) sys.stdout.write('\n')
def session(func, *args, **kwargs): # TODO make configurable from command line qs = User.select().select(User.is_staff == True) username = config.get('SESSION_USER') if username: qs = qs.where(User.username == username) try: user = qs.get() except User.DoesNotExist: abort('Admin user not found {}'.format(username or '')) session = Session.create(user=user) context.set('session', session) return func(*args, **kwargs)
def session(func, *args, **kwargs): session = context.get('session') if not session: qs = User.select().where(User.is_staff == True) username = config.get('SESSION_USER') if username: qs = qs.where(User.username == username) try: user = qs.get() except User.DoesNotExist: abort('Admin user not found {}'.format(username or '')) session = Session.create(user=user, contributor_type='admin') context.set('session', session) return func(*args, **kwargs)
def __call__(self, *args, **kwargs): """Run command.""" reporter = Reporter(config.get('VERBOSE')) context.set('reporter', reporter) for func in self._on_before_call: func(self, args, kwargs) try: self.command(*args, **kwargs) except KeyboardInterrupt: pass else: for func in self._on_after_call: func(self, args, kwargs) finally: # Display reports, if any. print(reporter)
def batch(func, iterable, chunksize=1000, total=None, progress=True): # This is the main reporter instance. reporter = context.get('reporter') bar = Bar(total=total, throttle=timedelta(seconds=1)) workers = int(config.get('WORKERS', os.cpu_count())) with ChunkedPool(processes=workers) as pool: try: for results, reports in pool.imap_unordered( func, iterable, chunksize): reporter.merge(reports) bar(step=len(results)) yield from results bar.finish() except Exception as e: print("\n" + e.args[0]) pool.terminate()
def batch(func, iterable, chunksize=1000, total=None): bar = Bar(total=total) workers = int(config.get('WORKERS', os.cpu_count())) with ThreadPoolExecutor(max_workers=workers) as executor: count = 0 chunk = [] for item in iterable: if not item: continue chunk.append(item) count += 1 if count % 10000 == 0: for r in executor.map(func, chunk): bar() chunk = [] if chunk: for r in executor.map(func, chunk): bar()
def bbox(): limit = min( int(request.args.get('limit', CollectionEndpoint.DEFAULT_LIMIT)), CollectionEndpoint.MAX_LIMIT) bbox = get_bbox(request.args) unique = request.args.get('unique') dbname = config.DB_NAME user = config.get('DB_USER') password = config.get('DB_PASSWORD') host = config.get('DB_HOST') if (host is None): host = "localhost" port = config.get('DB_PORT') connectString = "dbname='{}' user='******' password='******' host='{}' port='{}'".format( dbname, user, password, host, port) conn = psycopg2.connect(connectString) cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) if unique == 'true': cur.execute( """SELECT count(distinct housenumber_id) FROM position WHERE center && ST_MakeEnvelope(%(west)s, %(south)s, %(east)s, %(north)s)""", { "west": bbox["west"], "south": bbox["south"], "east": bbox["east"], "north": bbox["north"] }) response = {"collection": [], "total": cur.fetchone()["count"]} cur.execute( """SELECT p.id as pos_id, p.name as pos_name, st_x(center) as pos_x, st_y(center) as pos_y, p.kind as pos_kind, p.positioning as pos_positioning, p.source as pos_source, p.source_kind as pos_source_kind, p.version as pos_version, h.id as hn_id, h.number as hn_number, h.ordinal as hn_ordinal, h.version as hn_version, po.id as post_id, po.name as post_name, po.code as post_code, g.id as group_id, g.addressing as group_addressing, g.alias as group_alias, g.fantoir as group_fantoir, g.ign as group_ign, g.kind as group_kind, g.laposte as group_laposte, g.name as group_name FROM position as p LEFT JOIN housenumber as h on (h.pk = p.housenumber_id) LEFT JOIN "group" as g on (h.parent_id = g.pk) LEFT JOIN postcode as po on (h.postcode_id = po.pk) WHERE center && ST_MakeEnvelope(%(west)s, %(south)s, %(east)s, %(north)s) AND p.deleted_at is null AND h.deleted_at is null and g.deleted_at is null AND p.modified_at = (select max(modified_at) from position where housenumber_id=h.pk and deleted_at is null) limit %(limit)s""", { "limit": limit, "west": bbox["west"], "south": bbox["south"], "east": bbox["east"], "north": bbox["north"] }) else: cur.execute( """SELECT count(*) FROM position WHERE center && ST_MakeEnvelope(%(west)s, %(south)s, %(east)s, %(north)s)""", { "west": bbox["west"], "south": bbox["south"], "east": bbox["east"], "north": bbox["north"] }) response = {"collection": [], "total": cur.fetchone()["count"]} cur.execute( """SELECT p.id as pos_id, p.name as pos_name, st_x(center) as pos_x, st_y(center) as pos_y, p.kind as pos_kind, p.positioning as pos_positioning, p.source as pos_source, p.source_kind as pos_source_kind, p.version as pos_version, h.id as hn_id, h.number as hn_number, h.ordinal as hn_ordinal, h.version as hn_version, po.id as post_id, po.name as post_name, po.code as post_code, g.id as group_id, g.addressing as group_addressing, g.alias as group_alias, g.fantoir as group_fantoir, g.ign as group_ign, g.kind as group_kind, g.laposte as group_laposte, g.name as group_name FROM position as p LEFT JOIN housenumber as h on (h.pk = p.housenumber_id) LEFT JOIN "group" as g on (h.parent_id = g.pk) LEFT JOIN postcode as po on (h.postcode_id = po.pk) WHERE center && ST_MakeEnvelope(%(west)s, %(south)s, %(east)s, %(north)s) AND p.deleted_at is null AND h.deleted_at is null and g.deleted_at is null limit %(limit)s""", { "limit": limit, "west": bbox["west"], "south": bbox["south"], "east": bbox["east"], "north": bbox["north"] }) for row in cur: occ = { "id": row["pos_id"], "name": row["pos_name"], "center": { "type": "Point", "coordinates": [row["pos_x"], row["pos_y"]] }, "kind": row["pos_kind"], "positioning": row["pos_positioning"], "source": row["pos_source"], "source_kind": row["pos_source_kind"], "version": row["pos_version"], "housenumber": { "id": row["hn_id"], "number": row["hn_number"], "ordinal": row["hn_ordinal"], "postcode": { "id": row["post_id"], "name": row["post_name"], "code": row["post_code"] }, "group": { "id": row["group_id"], "addressing": row["group_addressing"], "alias": row["group_alias"], "fantoir": row["group_fantoir"], "ign": row["group_ign"], "kind": row["group_kind"], "laposte": row["group_laposte"], "name": row["group_name"] }, "version": row["hn_version"] } } response["collection"].append(occ) return response, 200