def main(): settings = startup.read_settings() Log.start(settings.debug) try: with Multithread(update_repo, threads=10, outbound=False) as multi: for repo in Random.combination(settings.param.repos): multi.execute([{"repos": repo, "settings": settings}]) finally: Log.stop()
def main(): settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with startup.SingleInstance(flavor_id=settings.args.filename): try: full_etl(settings) finally: Log.stop()
def main(): settings = startup.read_settings(defs={ "name": ["--restart", "--reset", "--redo"], "help": "force a reprocessing of all data", "action": "store_true", "dest": "restart" }) Log.start(settings.debug) try: with startup.SingleInstance(flavor_id=settings.args.filename): if settings.args.restart: reviews = Cluster(settings.destination).create_index(settings.destination) else: reviews = Cluster(settings.destination).get_proto(settings.destination) bugs = Cluster(settings.source).get_index(settings.source) with FromES(bugs) as esq: es_max_bug = esq.query({ "from": "private_bugs", "select": {"name": "max_bug", "value": "bug_id", "aggregate": "maximum"} }) #PROBE WHAT RANGE OF BUGS IS LEFT TO DO (IN EVENT OF FAILURE) with FromES(reviews) as esq: es_min_bug = esq.query({ "from": "reviews", "select": {"name": "min_bug", "value": "bug_id", "aggregate": "minimum"} }) batch_size = coalesce(bugs.settings.batch_size, settings.size, 1000) threads = coalesce(settings.threads, 4) Log.note(str(settings.min_bug)) min_bug = int(coalesce(settings.min_bug, 0)) max_bug = int(coalesce(settings.max_bug, Math.min(es_min_bug + batch_size * threads, es_max_bug))) with ThreadedQueue(reviews, batch_size=coalesce(reviews.settings.batch_size, 100)) as sink: func = functools.partial(full_etl, settings, sink) with Multithread(func, threads=threads) as m: m.inbound.silent = True Log.note("bugs from {{min}} to {{max}}, step {{step}}", { "min": min_bug, "max": max_bug, "step": batch_size }) m.execute(reversed([{"bugs": range(s, e)} for s, e in qb.intervals(min_bug, max_bug, size=1000)])) if settings.args.restart: reviews.add_alias() reviews.delete_all_but_self() finally: Log.stop()
def test_read_blobber_file(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_read_blobber_file_profile.tab" } } Log.start(debug_settings) verify_blobber_file( 0, "jetpack-package_raw.log", "http://mozilla-releng-blobs.s3.amazonaws.com/blobs/try/sha512/2d6892a08b84499c0e8cc0b81a32c830f6505fc2812a61e136ae4eb2ecfde0aac3e6358e9d27b76171869e0cc4368418e4dfca9378e69982681213354a2057ac" ) Log.stop()
def main(num): try: Log.start() results = [] test_json(results, "jsons.encoder", json_encoder, num) test_json(results, "jsons.encoder (again)", json_encoder, num) test_json(results, "scrub before json.dumps", cPythonJSONEncoder().encode, num) test_json(results, "override JSONEncoder.default()", EnhancedJSONEncoder().encode, num) test_json(results, "default json.dumps", json.dumps, num) # WILL CRASH, CAN NOT HANDLE DIVERSITY OF TYPES try: test_json(results, "scrubbed ujson", ujson.dumps, num) except Exception: pass Log.note("\n{{summary}}", summary=convert.list2tab(results)) finally: Log.stop()
def test_51586(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_51586_profile.tab" } } Log.start(debug_settings) source_key = "51586_5124145.52" content = File("tests/resources/51586_5124145.52.json.gz").read_bytes() source = Dict(read_lines=lambda: GzipLines(content)) with Accumulator(File("tests/results/51586_5124145.52.json")) as destination: with Timer("ETL file"): process_unittest_in_s3(source_key, source, destination, please_stop=None) Log.stop()
def test_51586(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_51586_profile.tab" } } Log.start(debug_settings) source_key = "51586_5124145.52" content = File("tests/resources/51586_5124145.52.json.gz").read_bytes() source = Dict(read_lines=lambda: GzipLines(content)) with Accumulator( File("tests/results/51586_5124145.52.json")) as destination: with Timer("ETL file"): process_unittest_in_s3(source_key, source, destination, please_stop=None) Log.stop()
if job.settings.start_next: formula = next_run(job.settings.start_next) elif job.settings.start_interval: formula = "now|" + job.settings.start_interval + "+" + job.settings.start_interval else: Log.error("Expecting `start_next` or `start_interval` for job {{job}}", job=job.name) now = Date.now() next = Date(formula) if next < now: Log.error("{{formula|quote}} does not calculate a future date") return next def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e) finally: Log.stop() if __name__ == "__main__": main()
def tearDown(self): Log.stop()
"access-control-allow-origin": "*", "content-type": "text/html" } ) if __name__ == "__main__": try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SETUP TREEHERDER CACHE hg = HgMozillaOrg(use_cache=True, settings=config.hg) th = TreeherderService(hg, settings=config.treeherder) app.add_url_rule('/treeherder', None, th.get_treeherder_job, methods=['GET']) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) app.run(**config.flask) except Exception, e: Log.error("Serious problem with service construction! Shutdown!", cause=e) finally: Log.stop() sys.exit(0)
def tearDownClass(cls): Log.stop()