def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with SingleInstance(flavor_id=settings.args.filename): settings.run_interval = Duration(settings.run_interval) for u in settings.utility: u.discount = coalesce(u.discount, 0) # MARKUP drives WITH EXPECTED device MAPPING num_ephemeral_volumes = ephemeral_storage[ u.instance_type]["num"] for i, d in enumerate(d for d in u.drives if not d.device): letter = convert.ascii2char(98 + num_ephemeral_volumes + i) d.device = "/dev/xvd" + letter settings.utility = UniqueIndex(["instance_type"], data=settings.utility) instance_manager = new_instance(settings.instance) m = SpotManager(instance_manager, kwargs=settings) if ENABLE_SIDE_EFFECTS: m.update_spot_requests() if m.watcher: m.watcher.join() except Exception as e: Log.warning("Problem with spot manager", cause=e) finally: Log.stop() MAIN_THREAD.stop()
def main(): try: settings = startup.read_settings() with startup.SingleInstance(settings.args.filename): constants.set(settings.constants) Log.start(settings.debug) extractor = Extract(settings) def extract(please_stop): with MySQL(**settings.snowflake.database) as db: with db.transaction(): for kwargs in extractor.queue: if please_stop: break try: extractor.extract(db=db, please_stop=please_stop, **kwargs) except Exception as e: Log.warning("Could not extract", cause=e) extractor.queue.add(kwargs) for i in range(settings.extract.threads): Thread.run("extract #" + text_type(i), extract) please_stop = Signal() Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True, wait_forever=False) except Exception as e: Log.warning("Problem with data extraction", e) finally: Log.stop()
def main(): try: settings = startup.read_settings() Log.start(settings.debug) with SingleInstance(flavor_id=settings.args.filename): constants.set(settings.constants) settings.run_interval = Duration(settings.run_interval) for u in settings.utility: u.discount = coalesce(u.discount, 0) # MARKUP drives WITH EXPECTED device MAPPING num_ephemeral_volumes = ephemeral_storage[u.instance_type]["num"] for i, d in enumerate(d for d in u.drives if not d.device): letter = convert.ascii2char(98 + num_ephemeral_volumes + i) d.device = "/dev/xvd" + letter settings.utility = UniqueIndex(["instance_type"], data=settings.utility) instance_manager = new_instance(settings.instance) m = SpotManager(instance_manager, kwargs=settings) if ENABLE_SIDE_EFFECTS: m.update_spot_requests(instance_manager.required_utility()) if m.watcher: m.watcher.join() except Exception as e: Log.warning("Problem with spot manager", cause=e) finally: Log.stop() MAIN_THREAD.stop()
def run(self, *args, **kwargs): # ENSURE THE LOGGING IS CLEANED UP try: Flask.run(self, *args, **kwargs) except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT Log.warning("TUID service shutdown!", cause=e) finally: Log.stop()
def tearDownClass(self): for i in ESUtils.indexes: try: self._es_cluster.delete_index(i) Log.note("remove index {{index}}", index=i) except Exception as e: pass Log.stop()
def run(self, *args, **kwargs): # ENSURE THE LOGGING IS CLEANED UP try: Flask.run(self, *args, **kwargs) except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT Log.warning(APP_NAME + " service shutdown!", cause=e) finally: Log.stop()
def tearDownClass(self): cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) for i in ESUtils.indexes: try: cluster.delete_index(i) Log.note("remove index {{index}}", index=i) except Exception as e: pass Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) ETL(settings).setup(settings.instance, settings.utility) except Exception as e: Log.warning("Problem with setup of ETL", cause=e) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) ETL(settings).setup(settings.instance, settings.utility) except Exception as e: Log.warning("Problem with setup of ETL", cause=e) finally: Log.stop()
def start(): try: config = json2value(STDIN.readline().decode('utf8')) constants.set(config.constants) Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]})) command_loop({"config": config}) except Exception as e: Log.error("problem staring worker", cause=e) finally: Log.stop()
def main(): settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) try: _synch(settings) except Exception as e: Log.error("Problem with synch", e) finally: Log.stop()
def run(self, *args, **kwargs): # ENSURE THE LOGGING IS CLEANED UP try: Flask.run(self, *args, **kwargs) except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT if e.args and e.args[0] == 0: pass # ASSUME NORMAL EXIT else: Log.warning("Serious problem with ActiveData service construction! Shutdown!", cause=e) finally: Log.stop() stop_main_thread()
def start(): try: line = STDIN.readline().decode("utf8") config = json2value(line) constants.set(config.constants) Log.start(config.debug) Log.set_logger(RawLogger()) command_loop({"config": config}) except Exception as e: Log.error("problem staring worker", cause=e) finally: Log.stop()
def run(self, force=False, restart=False, start=None, merge=False): try: # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) self.extract(settings, force, restart, start, merge) except Exception as e: Log.error("could not extract jobs", cause=e) finally: Log.stop()
def main(): try: settings = wrap({"elasticsearch":{ "host": "http://activedata.allizom.org", "port": 9200, "debug": True }}) Log.start(settings) move_shards(settings) except Exception as e: Log.error("Problem with assign of shards", e) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception as e: Log.error("Problem with etl", e) finally: Log.stop()
def main(): try: config = startup.read_settings() constants.set(config.constants) inject_secrets(config) with Timer("PATCH ADR: dd update() method to Configuration class"): def update(self, config): """ Update the configuration object with new parameters :param config: dict of configuration """ for k, v in config.items(): if v != None: self._config[k] = v self._config["sources"] = sorted( map(os.path.expanduser, set(self._config["sources"])) ) # Use the NullStore by default. This allows us to control whether # caching is enabled or not at runtime. self._config["cache"].setdefault("stores", {"null": {"driver": "null"}}) object.__setattr__(self, "cache", CacheManager(self._config["cache"])) self.cache.extend("null", lambda driver: NullStore()) setattr(Configuration, "update", update) # UPDATE ADR COFIGURATION adr.config.update(config.adr) Log.start(config.debug) # SHUNT ADR LOGGING TO MAIN LOGGING # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add loguru.logger.remove() loguru.logger.add( _logging, level="DEBUG", format="{message}", filter=lambda r: True, ) Schedulers(config).process() except Exception as e: Log.warning("Problem with etl! Shutting down.", cause=e) finally: Log.stop()
def main(): try: args = startup.argparse({ "name": ["--file", "--source"], "help": "directory or file with *.json schema files", "type": str, "dest": "source", "required": True, }) _convert(File(args.source)) except Exception as e: Log.error( "Serious problem with ActiveData service! Shutdown completed!", cause=e) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception as e: Log.error("Problem with etl", e) finally: Log.stop()
def main(num): try: Log.start() results = [] test_json(results, "mo-json encoder", json_encoder, num) test_json(results, "mo-json encoder (again)", json_encoder, num) test_json(results, "scrub before json.dumps", cPythonJSONEncoder().encode, num) test_json(results, "override JSONEncoder.default()", EnhancedJSONEncoder().encode, num) test_json(results, "default json.dumps", json.dumps, num) # WILL CRASH, CAN NOT HANDLE DIVERSITY OF TYPES test_json(results, "typed json", typed_encoder.encode, num) # test_json(results, "scrubbed ujson", ujson.dumps, num) # THIS PLAIN CRASHES Log.note(u"\n{{summary}}", summary=convert.list2tab(results)) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) hg = HgMozillaOrg(settings) todo = Queue() todo.add("97160a734959") least = 100000 while todo: next_ = todo.pop() curr = hg.get_revision( wrap({ "changeset": { "id": next_ }, "branch": { "name": BRANCH } })) if len(curr.changeset.files) > MIN_FILES: diff = hg._get_json_diff_from_hg(curr) num_changes = sum(len(d.changes) for d in diff) score = num_changes / len(diff) if score < least: least = score Log.note( "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}", rev=curr.changeset.id, num=num_changes, files=len(diff)) todo.extend(listwrap(curr.parents)) except Exception as e: Log.error("Problem with scna", e) finally: Log.stop()
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SHUNT PYTHON LOGGING TO MAIN LOGGING capture_logging() # SHUNT ADR LOGGING TO MAIN LOGGING # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add capture_loguru() if config.taskcluster: inject_secrets(config) @extend(Configuration) def update(self, config): """ Update the configuration object with new parameters :param config: dict of configuration """ for k, v in config.items(): if v != None: self._config[k] = v self._config["sources"] = sorted( map(os.path.expanduser, set(self._config["sources"]))) # Use the NullStore by default. This allows us to control whether # caching is enabled or not at runtime. self._config["cache"].setdefault("stores", {"null": { "driver": "null" }}) object.__setattr__(self, "cache", CustomCacheManager(self._config)) for _, store in self._config["cache"]["stores"].items(): if store.path and not store.path.endswith("/"): # REQUIRED, OTHERWISE FileStore._create_cache_directory() WILL LOOK AT PARENT DIRECTORY store.path = store.path + "/" if SHOW_S3_CACHE_HIT: s3_get = S3Store._get @extend(S3Store) def _get(self, key): with Timer("get {{key}} from S3", {"key": key}, verbose=False) as timer: output = s3_get(self, key) if output is not None: timer.verbose = True return output # UPDATE ADR CONFIGURATION with Repeat("waiting for ADR", every="10second"): adr.config.update(config.adr) # DUMMY TO TRIGGER CACHE make_push_objects(from_date=Date.today().format(), to_date=Date.now().format(), branch="autoland") outatime = Till(seconds=Duration(MAX_RUNTIME).total_seconds()) outatime.then(lambda: Log.alert("Out of time, exit early")) Schedulers(config).process(outatime) except Exception as e: Log.warning("Problem with etl! Shutting down.", cause=e) finally: Log.stop()
any_flask_app.add_url_rule(str("/<path:path>"), None, relay_get, methods=[str("GET")]) any_flask_app.add_url_rule(str("/<path:path>"), None, relay_post, methods=[str("POST")]) any_flask_app.add_url_rule(str("/"), None, relay_get, methods=[str("GET")]) any_flask_app.add_url_rule(str("/"), None, relay_post, methods=[str("POST")]) if __name__ in ("__main__",): Log.note("Starting " + APP_NAME + " Service App...") flask_app = RelayApp(__name__) try: config = startup.read_settings(filename=os.environ.get("HG_RELAY_CONFIG")) constants.set(config.constants) Log.start(config.debug) add(flask_app) Log.note("Started " + APP_NAME + " Service") except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT try: Log.error( "Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e ) finally: Log.stop() if config.flask: if config.flask.port and config.args.process_num: config.flask.port += config.args.process_num Log.note("Running Flask...") flask_app.run(**config.flask)
def tearDownClass(cls): Log.stop()
def extract(self, settings, force, restart, merge): if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination).get_or_create_table( settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis.from_url(REDIS_URL) state = redis.get(settings.extractor.key) if restart or not state: state = (0, 0) redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) last_modified, alert_id = state last_modified = Date(last_modified) # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note( "Extracting alerts for last_modified={{last_modified|datetime|quote}}, alert.id={{alert_id}}", last_modified=last_modified, alert_id=alert_id, ) last_year = Date.today( ) - YEAR + DAY # ONLY YOUNG RECORDS CAN GO INTO BIGQUERY get_ids = SQL( "SELECT s.id " + "\nFROM treeherder.performance_alert_summary s" + "\nLEFT JOIN treeherder.performance_alert a ON s.id=a.summary_id" + "\nWHERE s.created>" + quote_value(last_year).sql + " AND (s.last_updated > " + quote_value(last_modified).sql + "\nOR a.last_updated > " + quote_value(last_modified).sql + ")" + "\nGROUP BY s.id" + "\nORDER BY s.id" + "\nLIMIT " + quote_value(settings.extractor.chunk_size).sql) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] last_modified, alert_id = last_doc.created, last_doc.id redis.set( settings.extractor.key, value2json((last_modified, alert_id)).encode("utf8"), ) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done alert extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done alert merge") Log.stop()
try: config = startup.read_settings( filename=os.environ.get('TUID_CONFIG') ) constants.set(config.constants) Log.start(config.debug) service = TUIDService(config.tuid) # Log memory info while running initial_growth = {} objgraph.growth(peak_stats={}) objgraph.growth(peak_stats=initial_growth) service.statsdaemon.initial_growth = initial_growth Log.note("Started TUID Service") Log.note("Current free memory: {{mem}} Mb", mem=service.statsdaemon.get_free_memory()) except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT try: Log.error("Serious problem with TUID service construction! Shutdown!", cause=e) finally: Log.stop() if config.flask: if config.flask.port and config.args.process_num: config.flask.port += config.args.process_num Log.note("Running Flask...") flask_app.run(**config.flask)
def run(self, force=False, restart=False, merge=False): # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination ).get_or_create_table(settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis() state = redis.get(settings.extractor.key) if restart or not state: state = (0, 0) redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) last_modified, alert_id = state last_modified = parse(last_modified) # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note( "Extracting alerts for last_modified={{last_modified|datetime|quote}}, alert.id={{alert_id}}", last_modified=last_modified, alert_id=alert_id, ) last_year = ( Date.today() - YEAR + DAY ) # ONLY YOUNG RECORDS CAN GO INTO BIGQUERY # SELECT # s.od # FROM # treeherder.performance_alert_summary s # LEFT JOIN # treeherder.performance_alert a ON s.id=a.summary_id # WHERE # s.created>{last_year} AND (s.last_updated>{last_modified} OR a.last_updated>{last_modified}) # GROUP BY # s.id # ORDER BY # s.id # LIMIT # {settings.extractor.chunk_size} get_ids = SQL( str( ( PerformanceAlertSummary.objects.filter( Q(created__gt=last_year.datetime) & ( Q(last_updated__gt=last_modified.datetime) | Q(alerts__last_updated__gt=last_modified.datetime) ) ) .annotate() .values("id") .order_by("id")[: settings.extractor.chunk_size] ).query ) ) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] last_modified, alert_id = last_doc.created, last_doc.id redis.set( settings.extractor.key, value2json((last_modified, alert_id)).encode("utf8"), ) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done alert extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done alert merge") Log.stop()
def tearDownClass(cls): broker.close() Log.stop()
def tearDown(self): Log.stop()