def extract_alert_settings(env_setup): settings = startup.read_settings(filename=extract_alerts.CONFIG_FILE, complain=False) settings.source.database.ssl = None # NOT REQUIRED FOR TEST DATABASE constants.set(settings.constants) Log.start(settings.debug) return settings
def setUpClass(cls): try: cls.config = startup.read_settings(filename="tests/config.json") constants.set(cls.config.constants) Log.start(cls.config.debug) except Exception as e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with SingleInstance(flavor_id=settings.args.filename): settings.run_interval = Duration(settings.run_interval) for u in settings.utility: u.discount = coalesce(u.discount, 0) # MARKUP drives WITH EXPECTED device MAPPING num_ephemeral_volumes = ephemeral_storage[ u.instance_type]["num"] for i, d in enumerate(d for d in u.drives if not d.device): letter = convert.ascii2char(98 + num_ephemeral_volumes + i) d.device = "/dev/xvd" + letter settings.utility = UniqueIndex(["instance_type"], data=settings.utility) instance_manager = new_instance(settings.instance) m = SpotManager(instance_manager, kwargs=settings) if ENABLE_SIDE_EFFECTS: m.update_spot_requests() if m.watcher: m.watcher.join() except Exception as e: Log.warning("Problem with spot manager", cause=e) finally: Log.stop() MAIN_THREAD.stop()
def main(): try: settings = startup.read_settings( defs=[{ "name": ["--all", "-a"], "action": 'store_true', "help": 'process all mo-* subdirectories', "dest": "all", "required": False }, { "name": ["--dir", "--directory", "-d"], "help": 'directory to deploy', "type": str, "dest": "directory", "required": True, "default": "." }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.all: deploy_all(File(settings.args.directory), settings.prefix, settings) else: Deploy(File(settings.args.directory), kwargs=settings).deploy() except Exception, e: Log.warning("Problem with etl", cause=e)
def main(): try: settings = startup.read_settings() Log.start(settings.debug) with SingleInstance(flavor_id=settings.args.filename): constants.set(settings.constants) settings.run_interval = Duration(settings.run_interval) for u in settings.utility: u.discount = coalesce(u.discount, 0) # MARKUP drives WITH EXPECTED device MAPPING num_ephemeral_volumes = ephemeral_storage[u.instance_type]["num"] for i, d in enumerate(d for d in u.drives if not d.device): letter = convert.ascii2char(98 + num_ephemeral_volumes + i) d.device = "/dev/xvd" + letter settings.utility = UniqueIndex(["instance_type"], data=settings.utility) instance_manager = new_instance(settings.instance) m = SpotManager(instance_manager, kwargs=settings) if ENABLE_SIDE_EFFECTS: m.update_spot_requests(instance_manager.required_utility()) if m.watcher: m.watcher.join() except Exception as e: Log.warning("Problem with spot manager", cause=e) finally: Log.stop() MAIN_THREAD.stop()
def main(): try: settings = startup.read_settings() with startup.SingleInstance(settings.args.filename): constants.set(settings.constants) Log.start(settings.debug) extractor = Extract(settings) def extract(please_stop): with MySQL(**settings.snowflake.database) as db: with db.transaction(): for kwargs in extractor.queue: if please_stop: break try: extractor.extract(db=db, please_stop=please_stop, **kwargs) except Exception as e: Log.warning("Could not extract", cause=e) extractor.queue.add(kwargs) for i in range(settings.extract.threads): Thread.run("extract #" + text_type(i), extract) please_stop = Signal() Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True, wait_forever=False) except Exception as e: Log.warning("Problem with data extraction", e) finally: Log.stop()
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def setup(settings=None): global config try: config = startup.read_settings(defs={ "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False }, filename=settings) constants.set(config.constants) Log.start(config.debug) if config.args.process_num and config.flask.port: config.flask.port += config.args.process_num # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: request_logger = elasticsearch.Cluster( config.request_logs).get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue( max_size=2000) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY containers.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TURN ON /exit FOR WINDOWS DEBUGGING if config.flask.debug or config.flask.allow_exit: config.flask.allow_exit = None Log.warning("ActiveData is in debug mode") app.add_url_rule('/exit', 'exit', _exit) # TRIGGER FIRST INSTANCE FromESMetadata(config.elasticsearch) if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) if config.flask.ssl_context: if config.args.process_num: Log.error( "can not serve ssl and multiple Flask instances at once") setup_ssl() return app except Exception, e: Log.error( "Serious problem with ActiveData service construction! Shutdown!", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) ETL(settings).setup(settings.instance, settings.utility) except Exception as e: Log.warning("Problem with setup of ETL", cause=e) finally: Log.stop()
def main(): settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) try: _synch(settings) except Exception as e: Log.error("Problem with synch", e) finally: Log.stop()
def main(): global config global hg try: config = startup.read_settings() constants.set(config.constants) hg = HgMozillaOrg(config) Log.start(config.debug) except Exception as e: Log.error("Problem with etl", e)
def setUpClass(cls): global config, broker try: config = startup.read_settings(filename="tests/config/file.json") constants.set(config.constants) Log.start(config.debug) File(config.broker.backing.directory).delete() broker = Broker(kwargs=config.broker) except Exception as e: Log.error("could not setup for testing", cause=e)
def run(self, force=False, restart=False, start=None, merge=False): try: # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) self.extract(settings, force, restart, start, merge) except Exception as e: Log.error("could not extract jobs", cause=e) finally: Log.stop()
def main(): global config global hg try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) hg = HgMozillaOrg(config) random = _parse_diff( Data(changeset={"id": "2d9d0bebb5c6"}, branch={"url": "https://hg.mozilla.org/mozilla-central"})) except Exception as e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings() constants.set(config.constants) inject_secrets(config) with Timer("PATCH ADR: dd update() method to Configuration class"): def update(self, config): """ Update the configuration object with new parameters :param config: dict of configuration """ for k, v in config.items(): if v != None: self._config[k] = v self._config["sources"] = sorted( map(os.path.expanduser, set(self._config["sources"])) ) # Use the NullStore by default. This allows us to control whether # caching is enabled or not at runtime. self._config["cache"].setdefault("stores", {"null": {"driver": "null"}}) object.__setattr__(self, "cache", CacheManager(self._config["cache"])) self.cache.extend("null", lambda driver: NullStore()) setattr(Configuration, "update", update) # UPDATE ADR COFIGURATION adr.config.update(config.adr) Log.start(config.debug) # SHUNT ADR LOGGING TO MAIN LOGGING # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add loguru.logger.remove() loguru.logger.add( _logging, level="DEBUG", format="{message}", filter=lambda r: True, ) Schedulers(config).process() except Exception as e: Log.warning("Problem with etl! Shutting down.", cause=e) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception as e: Log.error("Problem with etl", e) finally: Log.stop()
def extract_job_settings(): # These values not directly accessed during testing, but the code requires that they be present. os.environ["NEW_RELIC_APP_NAME"] = "testing" os.environ["BIGQUERY_PRIVATE_KEY_ID"] = "1" os.environ["BIGQUERY_PRIVATE_KEY"] = "1" # USE THE TEST SCHEMA db_url = os.environ["DATABASE_URL"] db_url = db_url.replace(strings.between(db_url, "/", None), DATABASES["default"]["TEST"]["NAME"]) os.environ["DATABASE_URL"] = db_url settings = startup.read_settings(filename=extract_jobs.CONFIG_FILE, complain=False) settings.source.database.ssl = None # NOT REQUIRED FOR TEST DATABASE constants.set(settings.constants) Log.start(settings.debug) return settings
def setup(): global config config = startup.read_settings( filename=os.environ.get('ACTIVEDATA_CONFIG'), defs=[ { "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False } ] ) constants.set(config.constants) Log.start(config.debug) # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: cluster = elasticsearch.Cluster(config.request_logs) request_logger = cluster.get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue(max_size=2000) if config.dockerflow: def backend_check(): http.get_json(config.elasticsearch.host + ":" + text_type(config.elasticsearch.port)) dockerflow(flask_app, backend_check) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY container.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TRIGGER FIRST INSTANCE if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) HeaderRewriterFix(flask_app, remove_headers=['Date', 'Server'])
def main(): try: config = startup.read_settings(defs=[{ "name": ["--file"], "help": "file to save backup", "type": str, "dest": "file", "required": True }]) constants.set(config.constants) Log.start(config.debug) sq = elasticsearch.Index(kwargs=config.saved_queries) result = sq.search({"query": {"match_all": {}}, "size": 200000}) File(config.args.file).write("".join( map(convert.json2value, result.hits.hits))) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) hg = HgMozillaOrg(settings) todo = Queue() todo.add("97160a734959") least = 100000 while todo: next_ = todo.pop() curr = hg.get_revision( wrap({ "changeset": { "id": next_ }, "branch": { "name": BRANCH } })) if len(curr.changeset.files) > MIN_FILES: diff = hg._get_json_diff_from_hg(curr) num_changes = sum(len(d.changes) for d in diff) score = num_changes / len(diff) if score < least: least = score Log.note( "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}", rev=curr.changeset.id, num=num_changes, files=len(diff)) todo.extend(listwrap(curr.parents)) except Exception as e: Log.error("Problem with scna", e) finally: Log.stop()
config = startup.read_settings([ { "name": ["--id", "--key", "--ids", "--keys"], "dest": "id", "nargs": "*", "type": int, "help": "show specific signatures", }, { "name": "--download", "dest": "download", "help": "download deviance to CSV local file", "nargs": "?", "const": "deviant_stats.csv", "type": str, "action": "store", }, { "name": ["--dev", "--deviant", "--deviance"], "dest": "deviant", "nargs": "?", "const": 10, "type": int, "help": "show number of top deviant series", "action": "store", }, { "name": ["--modal"], "dest": "modal", "nargs": "?", "const": 10, "type": int, "help": "show number of top modal series", "action": "store", }, { "name": ["--outliers"], "dest": "outliers", "nargs": "?", "const": 10, "type": int, "help": "show number of top outliers series", "action": "store", }, { "name": ["--skewed", "--skew"], "dest": "skewed", "nargs": "?", "const": 10, "type": int, "help": "show number of top skewed series", "action": "store", }, { "name": ["--ok"], "dest": "ok", "nargs": "?", "const": 10, "type": int, "help": "show number of top worst OK series", "action": "store", }, { "name": ["--noise", "--noisy"], "dest": "noise", "nargs": "?", "const": 10, "type": int, "help": "show number of top noisiest series", "action": "store", }, { "name": ["--extra", "-e"], "dest": "extra", "nargs": "?", "const": 10, "type": int, "help": "show number of series that are missing perfherder alerts", "action": "store", }, { "name": ["--missing", "--miss", "-m"], "dest": "missing", "nargs": "?", "const": 10, "type": int, "help": "show number of series which are missing alerts over perfherder", "action": "store", }, { "name": ["--pathological", "--pathological", "--pathology", "-p"], "dest": "pathological", "nargs": "?", "const": 3, "type": int, "help": "show number of series that have most edges", "action": "store", }, ])
def run(self, force=False, restart=False, merge=False): # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination ).get_or_create_table(settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis() state = redis.get(settings.extractor.key) if restart or not state: state = (0, 0) redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) last_modified, job_id = state # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note( "Extracting jobs for last_modified={{last_modified|datetime|quote}}, job.id={{job_id}}", last_modified=last_modified, job_id=job_id, ) # Example: job.id ==283890114 # get_ids = ConcatSQL( # (SQL_SELECT, sql_alias(quote_value(283890114), "id")) # ) # get_ids = sql_query( # { # "from": "job", # "select": ["id"], # "where": { # "or": [ # {"gt": {"last_modified": parse(last_modified)}}, # { # "and": [ # {"eq": {"last_modified": parse(last_modified)}}, # {"gt": {"id": job_id}}, # ] # }, # ] # }, # "sort": ["last_modified", "id"], # "limit": settings.extractor.chunk_size, # } # ) get_ids = SQL(str( ( Job.objects.filter( Q(last_modified__gt=parse(last_modified).datetime) | ( Q(last_modified=parse(last_modified).datetime) & Q(id__gt=job_id) ) ) .annotate() .values("id") .order_by("last_modified", "id")[ : settings.extractor.chunk_size ] ).query )) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] last_modified, job_id = last_doc.last_modified, last_doc.id redis.set( settings.extractor.key, value2json((last_modified, job_id)).encode("utf8"), ) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done job extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done job merge")
if __name__ in ("__main__", ): Log.note("Starting TUID Service App...") flask_app = TUIDApp(__name__) flask_app.add_url_rule(str('/'), None, tuid_endpoint, defaults={'path': ''}, methods=[str('GET'), str('POST')]) flask_app.add_url_rule(str('/<path:path>'), None, tuid_endpoint, methods=[str('GET'), str('POST')]) try: config = startup.read_settings(filename=os.environ.get('TUID_CONFIG')) constants.set(config.constants) Log.start(config.debug) service = TUIDService(config.tuid) Log.note("Started TUID Service") except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT try: Log.error( "Serious problem with TUID service construction! Shutdown!", cause=e) finally: Log.stop() if config.flask: if config.flask.port and config.args.process_num:
headers={ "Content-Type": "text/html" } ) if __name__ in ("__main__",): Log.note("Starting TUID Service App...") flask_app = TUIDApp(__name__) flask_app.add_url_rule(str('/'), None, tuid_endpoint, defaults={'path': ''}, methods=[str('GET'), str('POST')]) flask_app.add_url_rule(str('/<path:path>'), None, tuid_endpoint, methods=[str('GET'), str('POST')]) try: config = startup.read_settings( filename=os.environ.get('TUID_CONFIG') ) constants.set(config.constants) Log.start(config.debug) service = TUIDService(config.tuid) # Log memory info while running initial_growth = {} objgraph.growth(peak_stats={}) objgraph.growth(peak_stats=initial_growth) service.statsdaemon.initial_growth = initial_growth Log.note("Started TUID Service") Log.note("Current free memory: {{mem}} Mb", mem=service.statsdaemon.get_free_memory()) except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT
def __enter__(self): self.config = config = startup.read_settings() from mo_logs import constants constants.set(config.constants) Log.start(config.debug) return config
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SHUNT PYTHON LOGGING TO MAIN LOGGING capture_logging() # SHUNT ADR LOGGING TO MAIN LOGGING # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add capture_loguru() if config.taskcluster: inject_secrets(config) @extend(Configuration) def update(self, config): """ Update the configuration object with new parameters :param config: dict of configuration """ for k, v in config.items(): if v != None: self._config[k] = v self._config["sources"] = sorted( map(os.path.expanduser, set(self._config["sources"]))) # Use the NullStore by default. This allows us to control whether # caching is enabled or not at runtime. self._config["cache"].setdefault("stores", {"null": { "driver": "null" }}) object.__setattr__(self, "cache", CustomCacheManager(self._config)) for _, store in self._config["cache"]["stores"].items(): if store.path and not store.path.endswith("/"): # REQUIRED, OTHERWISE FileStore._create_cache_directory() WILL LOOK AT PARENT DIRECTORY store.path = store.path + "/" if SHOW_S3_CACHE_HIT: s3_get = S3Store._get @extend(S3Store) def _get(self, key): with Timer("get {{key}} from S3", {"key": key}, verbose=False) as timer: output = s3_get(self, key) if output is not None: timer.verbose = True return output # UPDATE ADR CONFIGURATION with Repeat("waiting for ADR", every="10second"): adr.config.update(config.adr) # DUMMY TO TRIGGER CACHE make_push_objects(from_date=Date.today().format(), to_date=Date.now().format(), branch="autoland") outatime = Till(seconds=Duration(MAX_RUNTIME).total_seconds()) outatime.then(lambda: Log.alert("Out of time, exit early")) Schedulers(config).process(outatime) except Exception as e: Log.warning("Problem with etl! Shutting down.", cause=e) finally: Log.stop()
def run(self, force=False, restart=False, merge=False): # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination).get_or_create_table( settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis() state = redis.get(settings.extractor.key) if restart or not state: state = 916850000 redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) perf_id = state # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note("Extracting perfs for perf.id={{perf_id}}", perf_id=perf_id) # get_ids = sql_query( # { # "from": "performance_datum", # "select": ["id"], # "where": {"gt": {"id": perf_id}}, # "sort": ["id"], # "limit": settings.extractor.chunk_size, # } # ) get_ids = SQL( str((PerformanceDatum.objects.filter( id__gt=perf_id).values("id").order_by("id") [:settings.extractor.chunk_size]).query)) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break # TODO: Remove me July 2021 # OLD PERF RECORDS HAVE NO CORRESPONDING JOB # ADD job.submit_time FOR PARTITIONING for a in acc: if not a.job.submit_time: a.job.submit_time = a.push_timestamp destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] perf_id = last_doc.id redis.set(settings.extractor.key, value2json(perf_id).encode("utf8")) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done perf extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done perf merge")
# from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals from mo_files import File from mo_logs import Log, startup, constants from mo_testing.fuzzytestcase import FuzzyTestCase from mo_times.timer import Timer from mo_dots import set_default, wrap, Null from mysql_to_s3.extract import Extract from pyLibrary.sql.mysql import MySQL settings = startup.read_settings(filename="tests/resources/config/test.json") constants.set(settings.constants) class TestExtract(FuzzyTestCase): @classmethod def setUpClass(cls): Log.start(settings.debug) with Timer("setup database"): try: with MySQL(schema=None, kwargs=settings.database) as db: db.query("drop database testing") except Exception as e: if "Can't drop database " in e: pass else:
def config(): config = startup.read_settings(filename=os.environ.get('TUID_CONFIG')) constants.set(config.constants) Log.start(config.debug) return config
def add(any_flask_app): global cache cache = Cache(config.cache) any_flask_app.add_url_rule(str("/<path:path>"), None, relay_get, methods=[str("GET")]) any_flask_app.add_url_rule(str("/<path:path>"), None, relay_post, methods=[str("POST")]) any_flask_app.add_url_rule(str("/"), None, relay_get, methods=[str("GET")]) any_flask_app.add_url_rule(str("/"), None, relay_post, methods=[str("POST")]) if __name__ in ("__main__",): Log.note("Starting " + APP_NAME + " Service App...") flask_app = RelayApp(__name__) try: config = startup.read_settings(filename=os.environ.get("HG_RELAY_CONFIG")) constants.set(config.constants) Log.start(config.debug) add(flask_app) Log.note("Started " + APP_NAME + " Service") except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT try: Log.error( "Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e ) finally: Log.stop() if config.flask: if config.flask.port and config.args.process_num:
Log.note("Skipping try revision.") queue.commit() continue now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request "+text_type(request_count), one_request, request) request_count += 1 queue.commit() if __name__ == '__main__': try: tmp_signal = Signal() config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) queue_consumer(kwargs=config, please_stop=tmp_signal) worker = Thread.run("sqs consumer", queue_consumer, kwargs=config) MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped) except BaseException as e: Log.error("Serious problem with consumer construction! Shutdown!", cause=e)
def setup(): global config config = startup.read_settings( default_filename=os.environ.get('ACTIVEDATA_CONFIG'), defs=[{ "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False }]) constants.set(config.constants) Log.start(config.debug) agg_bulk.S3_CONFIG = config.bulk.s3 File.new_instance("activedata.pid").write(text(machine_metadata.pid)) # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: cluster = elasticsearch.Cluster(config.request_logs) request_logger = cluster.get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue( max_size=2000, period=1) if config.dockerflow: def backend_check(): http.get_json(config.elasticsearch.host + ":" + text(config.elasticsearch.port)) dockerflow(flask_app, backend_check) else: # IF NOT USING DOCKERFLOW, THEN RESPOND WITH A SIMPLER __version__ add_version(flask_app) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY container.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TRIGGER FIRST INSTANCE if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) # STARTUP QUERY STATS QueryStats(elasticsearch.Cluster(config.elasticsearch)) if config.flask.port and config.args.process_num: config.flask.port += config.args.process_num # TURN ON /exit FOR WINDOWS DEBUGGING if config.flask.debug or config.flask.allow_exit: config.flask.allow_exit = None Log.warning("ActiveData is in debug mode") flask_app.add_url_rule('/exit', 'exit', _exit) if config.flask.ssl_context: if config.args.process_num: Log.error("can not serve ssl and multiple Flask instances at once") setup_flask_ssl() # ENSURE MAIN THREAD SHUTDOWN TRIGGERS Flask SHUTDOWN MAIN_THREAD.stopped.then(exit)
now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request " + text_type(request_count), one_request, request) request_count += 1 queue.commit() if __name__ == '__main__': try: tmp_signal = Signal() config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) queue_consumer(kwargs=config, please_stop=tmp_signal) worker = Thread.run("sqs consumer", queue_consumer, kwargs=config) MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped) except BaseException as e: Log.error("Serious problem with consumer construction! Shutdown!", cause=e)
global cache cache = Cache(config.cache) any_flask_app.add_url_rule(str('/<path:path>'), None, relay_get, methods=[str('GET')]) any_flask_app.add_url_rule(str('/<path:path>'), None, relay_post, methods=[str('POST')]) any_flask_app.add_url_rule(str('/'), None, relay_get, methods=[str('GET')]) any_flask_app.add_url_rule(str('/'), None, relay_post, methods=[str('POST')]) if __name__ in ("__main__",): Log.note("Starting " + APP_NAME + " Service App...") flask_app = RelayApp(__name__) try: config = startup.read_settings( filename=os.environ.get('HG_RELAY_CONFIG') ) constants.set(config.constants) Log.start(config.debug) add(flask_app) Log.note("Started " + APP_NAME + " Service") except BaseException as e: # MUST CATCH BaseException BECAUSE argparse LIKES TO EXIT THAT WAY, AND gunicorn WILL NOT REPORT try: Log.error("Serious problem with " + APP_NAME + " service construction! Shutdown!", cause=e) finally: Log.stop() if config.flask: if config.flask.port and config.args.process_num: config.flask.port += config.args.process_num