def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) try: _refresh_etl() # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST except Exception, e: ec2_conn.terminate_instances([i.id]) Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) with startup.SingleInstance(flavor_id=settings.args.filename): with aws.s3.Bucket(settings.destination) as bucket: if settings.param.debug: if settings.source.durable: Log.error("Can not run in debug mode with a durable queue") synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) else: synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) if settings.source.durable: synch.startup() queue = PersistentQueue(settings.param.queue_file) if queue: last_item = queue[len(queue) - 1] synch.source_key = last_item._meta.count + 1 with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key): Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket) Thread.wait_for_shutdown_signal(allow_exit=True) Log.warning("starting shutdown") queue.close() Log.note("write shutdown state to S3") synch.shutdown() except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL(name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [ {"name": "branch", "value": "build.branch"}, {"name": "revision", "value": "build.revision12"}, {"name": "suite", "value": "run.suite"}, {"name": "chunk", "value": "run.chunk"}, {"name": "test", "value": "result.test"} ], "where": {"and": [ {"eq": {"result.ok": False}}, {"gt": {"run.timestamp": Date.today() - WEEK}}, {"missing": "treeherder.job.note"} ]}, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL( name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper ) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def start(cls, settings=None): """ RUN ME FIRST TO SETUP THE THREADED LOGGING http://victorlin.me/2012/08/good-logging-practice-in-python/ log - LIST OF PARAMETERS FOR LOGGER(S) trace - SHOW MORE DETAILS IN EVERY LOG LINE (default False) cprofile - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False) USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"} profile - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):) USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"} constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE) """ if not settings: return settings = wrap(settings) cls.settings = settings cls.trace = cls.trace | coalesce(settings.trace, False) if cls.trace: from pyLibrary.thread.threads import Thread if settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled): if isinstance(settings.cprofile, bool): settings.cprofile = { "enabled": True, "filename": "cprofile.tab" } import cProfile cls.cprofiler = cProfile.Profile() cls.cprofiler.enable() if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): from pyLibrary.debugs import profiles if isinstance(settings.profile, bool): profiles.ON = True settings.profile = {"enabled": True, "filename": "profile.tab"} if settings.profile.enabled: profiles.ON = True if settings.constants: constants.set(settings.constants) if not settings.log: return cls.logging_multi = Log_usingMulti() if cls.main_log: cls.main_log.stop() cls.main_log = Log_usingThread(cls.logging_multi) for log in listwrap(settings.log): Log.add_log(Log.new_instance(log))
def start(cls, settings=None): """ RUN ME FIRST TO SETUP THE THREADED LOGGING http://victorlin.me/2012/08/good-logging-practice-in-python/ log - LIST OF PARAMETERS FOR LOGGER(S) trace - SHOW MORE DETAILS IN EVERY LOG LINE (default False) cprofile - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False) USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"} profile - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):) USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"} constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE) """ global _Thread if not settings: return settings = wrap(settings) cls.settings = settings cls.trace = cls.trace | coalesce(settings.trace, False) if cls.trace: from pyLibrary.thread.threads import Thread as _Thread if settings.cprofile is False: settings.cprofile = {"enabled": False} elif settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled): if isinstance(settings.cprofile, bool): settings.cprofile = {"enabled": True, "filename": "cprofile.tab"} import cProfile cls.cprofiler = cProfile.Profile() cls.cprofiler.enable() if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): from pyLibrary.debugs import profiles if isinstance(settings.profile, bool): profiles.ON = True settings.profile = {"enabled": True, "filename": "profile.tab"} if settings.profile.enabled: profiles.ON = True if settings.constants: constants.set(settings.constants) if settings.log: cls.logging_multi = TextLog_usingMulti() if cls.main_log: cls.main_log.stop() cls.main_log = TextLog_usingThread(cls.logging_multi) for log in listwrap(settings.log): Log.add_log(Log.new_instance(log)) if settings.cprofile.enabled==True: Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename))
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [{ "name": "branch", "value": "build.branch" }, { "name": "revision", "value": "build.revision12" }, { "name": "suite", "value": "run.suite" }, { "name": "chunk", "value": "run.chunk" }, { "name": "test", "value": "result.test" }], "where": { "and": [{ "eq": { "result.ok": False } }, { "gt": { "run.timestamp": Date.today() - WEEK } }, { "missing": "treeherder.job.note" }] }, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def main(): settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with startup.SingleInstance(flavor_id=settings.args.filename): try: full_etl(settings) finally: Log.stop()
def setup(settings=None): global config try: config = startup.read_settings(defs={ "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False }, filename=settings) constants.set(config.constants) Log.start(config.debug) if config.args.process_num and config.flask.port: config.flask.port += config.args.process_num # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: request_logger = elasticsearch.Cluster( config.request_logs).get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue( max_size=2000) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY containers.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TURN ON /exit FOR WINDOWS DEBUGGING if config.flask.debug or config.flask.allow_exit: config.flask.allow_exit = None Log.warning("ActiveData is in debug mode") app.add_url_rule('/exit', 'exit', _exit) # TRIGGER FIRST INSTANCE FromESMetadata(config.elasticsearch) if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) if config.flask.ssl_context: if config.args.process_num: Log.error( "can not serve ssl and multiple Flask instances at once") setup_ssl() return app except Exception, e: Log.error( "Serious problem with ActiveData service construction! Shutdown!", cause=e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) queries.config.default = { "type": "elasticsearch", "settings": settings.elasticsearch.copy() } if settings.args.id: work_queue = Queue("local work queue") work_queue.extend(parse_id_argument(settings.args.id)) else: work_queue = aws.Queue(settings=settings.work_queue) Log.note("Listen to queue {{queue}}, and read off of {{s3}}", queue=settings.work_queue.name, s3=settings.source.bucket) es = MultiDayIndex(settings.elasticsearch, queue_size=100000) threads = [] please_stop = Signal() for _ in range(settings.threads): p = Thread.run("copy to es", copy2es, es, settings, work_queue, please_stop=please_stop) threads.append(p) def monitor_progress(please_stop): while not please_stop: Log.note("Remaining: {{num}}", num=len(work_queue)) Thread.sleep(seconds=10) Thread.run(name="monitor progress", target=monitor_progress, please_stop=please_stop) aws.capture_termination_signal(please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) please_stop.go() Log.note("Shutdown started") except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) big_data.MAX_STRING_SIZE = 100 * 1000 * 1000 # get_active_data(settings) get_bugs(settings) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) hg = HgMozillaOrg(settings.hg) th = TreeHerder(settings=settings) find_some_work(th) except Exception, e: Log.error("Problem with etl", e)
def start(): global hg global config _ = wrap try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) if config.hg: hg = HgMozillaOrg(config.hg) main() except Exception, e: Log.error("Problems exist", e)
def main(): global all_creds global config try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) all_creds = config.users app.run(**config.flask) except Exception, e: Log.error("Serious problem with MoDataSubmission service! Shutdown completed!", cause=e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) diff(settings) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(settings=settings.hg.branches).get_or_create_index(settings=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception, e: Log.error("Problem with etl", e)
def main(): global config try: config = startup.read_settings() with startup.SingleInstance(flavor_id=config.args.filename): constants.set(config.constants) Log.start(config.debug) es = elasticsearch.Cluster(config.destination).get_or_create_index(config.destination) please_stop = Signal() Thread.run("aggregator", loop_all_days, es, please_stop=please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) except Exception, e: Log.error("Serious problem with Test Failure Aggregator service! Shutdown completed!", cause=e)
def main(): try: settings = startup.read_settings(defs=[ { "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False } ]) constants.set(settings.constants) Log.start(settings.debug) diff(settings) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = get_branches(settings.hg) es = elasticsearch.Cluster( settings=settings.hg.branches).get_or_create_index( settings=settings.hg.branches) es.add_alias() es.extend({ "id": b.name + " " + b.locale, "value": b } for b in branches) Log.alert("DONE!") except Exception, e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings(defs=[{ "name": ["--file"], "help": "file to save backup", "type": str, "dest": "file", "required": True }]) constants.set(config.constants) Log.start(config.debug) sq = elasticsearch.Index(settings=config.saved_queries) result = sq.search({"query": {"match_all": {}}, "size": 200000}) File(config.args.file).write("".join( map(convert.json2value, result.hits.hits))) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) _refresh_indexer() except Exception, e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings() with startup.SingleInstance(flavor_id=config.args.filename): constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") coverage_index = elasticsearch.Cluster(config.source).get_index(settings=config.source) config.destination.schema = coverage_index.get_schema() coverage_summary_index = elasticsearch.Cluster(config.destination).get_or_create_index(read_only=False, settings=config.destination) coverage_summary_index.add_alias(config.destination.index) Thread.run( "processing loop", loop, config.source, coverage_summary_index, config, please_stop=please_stop ) Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with code coverage score calculation", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key)) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) _refresh_indexer() except Exception, e: Log.error("Problem with etl", e)
def setUp(self): config = startup.read_settings(filename=CONFIG_FILE) Log.start(config.debug) constants.set(config.constants) app.config=config
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST']) @app.route('/<path:path>', methods=['GET', 'POST']) def catch_all(path): return Response( b"", status=400, headers={ "access-control-allow-origin": "*", "content-type": "text/html" } ) if __name__ == "__main__": try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SETUP TREEHERDER CACHE hg = HgMozillaOrg(use_cache=True, settings=config.hg) th = TreeherderService(hg, settings=config.treeherder) app.add_url_rule('/treeherder', None, th.get_treeherder_job, methods=['GET']) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) app.run(**config.flask) except Exception, e: Log.error("Serious problem with service construction! Shutdown!", cause=e) finally: Log.stop()
import datetime from kombu import Connection, Producer, Exchange from pytz import timezone from mozillapulse.utils import time_to_string from pyLibrary.debugs import constants from pyLibrary import jsons from pyLibrary.debugs.logs import Log, Except from pyLibrary.dot import wrap, coalesce, Dict, set_default from pyLibrary.meta import use_settings from pyLibrary.thread.threads import Thread from mozillapulse.consumers import GenericConsumer constants.set({"mozillapulse": {"consumers": {"logging": Log}}}) class Consumer(Thread): @use_settings def __init__( self, exchange, # name of the Pulse exchange topic, # message name pattern to subscribe to ('#' is wildcard) target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS host='pulse.mozilla.org', # url to connect, port=5671, # tcp port user=None, password=None, vhost="/", start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST']) @app.route('/<path:path>', methods=['GET', 'POST']) def catch_all(path): return Response(b"", status=400, headers={ "access-control-allow-origin": "*", "content-type": "text/html" }) if __name__ == "__main__": try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SETUP TREEHERDER CACHE hg = HgMozillaOrg(use_cache=True, settings=config.hg) th = TreeherderService(hg, settings=config.treeherder) app.add_url_rule('/treeherder', None, th.get_treeherder_job, methods=['GET']) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) app.run(**config.flask) except Exception, e: Log.error("Serious problem with service construction! Shutdown!",
import datetime from kombu import Connection, Producer, Exchange from pytz import timezone from mozillapulse.utils import time_to_string from pyLibrary.debugs import constants from pyLibrary import jsons from pyLibrary.debugs.logs import Log, Except from pyLibrary.dot import wrap, coalesce, Dict, set_default from pyLibrary.meta import use_settings from pyLibrary.thread.threads import Thread from mozillapulse.consumers import GenericConsumer constants.set({"mozillapulse": {"consumers": {"logging": Log}}}) class Consumer(Thread): @use_settings def __init__( self, exchange, # name of the Pulse exchange topic, # message name pattern to subscribe to ('#' is wildcard) target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS host='pulse.mozilla.org', # url to connect, port=5671, # tcp port user=None, password=None, vhost="/",