def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def __init__(self, name, target, *args, **kwargs): self.id = -1 self.name = name self.target = target self.end_of_thread = None self.synch_lock = Lock("response synch lock") self.args = args # ENSURE THERE IS A SHARED please_stop SIGNAL self.kwargs = copy(kwargs) self.kwargs["please_stop"] = self.kwargs.get( "please_stop", Signal("please_stop for " + self.name)) self.please_stop = self.kwargs["please_stop"] self.thread = None self.stopped = Signal("stopped signal for " + self.name) self.cprofiler = None self.children = [] if "parent_thread" in kwargs: del self.kwargs["parent_thread"] self.parent = kwargs["parent_thread"] else: self.parent = Thread.current() self.parent.add_child(self)
def test_loop(self): acc = [] started = Signal() def work(please_stop): started.go() while not please_stop: acc.append(Date.now().unix) Till(seconds=0.1).wait() worker = Thread.run("loop", work) started.wait() while len(acc) < 10: Till(seconds=0.1).wait() worker.stop() worker.join() # We expect 10, but 9 is good enough num = len(acc) self.assertGreater( num, 9, "Expecting some reasonable number of entries to prove there was looping, not " + text(num), )
def __init__(self, conn=None, tuid_service=None, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg( kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self) self.rev_locker = Lock() self.working_locker = Lock() self.init_db() self.next_revnum = coalesce( self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue( name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query( "SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False) Log.note( "Table is filled with atleast {{minim}} entries. Starting workers...", minim=MINIMUM_PERMANENT_CSETS) Thread.run('clogger-tip', self.fill_forward_continuous) Thread.run('clogger-backfill', self.fill_backward_with_list) Thread.run('clogger-maintenance', self.csetLog_maintenance) Thread.run('clogger-deleter', self.csetLog_deleter) Log.note("Started clogger workers.") except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def test_memory_cleanup_with_till(self): objgraph.growth() root = Signal() for i in range(100000): if i % 1000 == 0: Log.note("at {{num}} tills", num=i) root = root | Till(seconds=100000) mid_mem = psutil.Process(os.getpid()).memory_info().rss if mid_mem > 1000 * 1000 * 1000: Log.note("{{num}} Till triggers created", num=i) break trigger = Signal() root = root | trigger growth = objgraph.growth(limit=4) growth and Log.note("More object\n{{growth}}", growth=growth) trigger.go() root.wait() # THERE SHOULD BE NO DELAY HERE for _ in range(0, 20): try: Till(seconds=0.1).wait() # LET TIMER DAEMON CLEANUP current = [(t, objgraph.count(t), objgraph.count(t) - c) for t, c, d in growth] Log.note("Object count\n{{current}}", current=current) # NUMBER OF OBJECTS CLEANED UP SHOULD MATCH NUMBER OF OBJECTS CREATED for (_, _, cd), (_, _, gd) in zip(current, growth): self.assertAlmostEqual(-cd, gd, places=2) return except Exception as e: pass Log.error("object counts did not go down")
def __init__(self, backing, database, kwargs=None): if backing.directory: self.backing = DirectoryBacking(kwargs=backing) else: self.backing = s3.Bucket(kwargs=backing) self.db = Sqlite(database) # ENSURE DATABASE IS SETUP if not self.db.about(VERSION_TABLE): schema.setup(self) self.next_id = id_generator(db=self.db, version_table=VERSION_TABLE) self.queues = [] self.please_stop = Signal() self.cleaner = Thread.run("cleaner", self._cleaner)
def test_till_in_loop(self): def loop(please_stop): counter = 0 while not please_stop: (Till(seconds=0.001) | please_stop).wait() counter += 1 Log.note("{{count}}", count=counter) please_stop=Signal("please_stop") Thread.run("loop", loop, please_stop=please_stop) Till(seconds=1).wait() with please_stop.lock: self.assertLessEqual(len(please_stop.job_queue), 1, "Expecting only one pending job on go") please_stop.go()
def _setup(): threads = Data() signals = Data() db = Sqlite() db.query("CREATE TABLE my_table (value TEXT)") for name in ["a", "b"]: signals[name] = [{ "begin": Signal(), "done": Signal() } for _ in range(4)] threads[name] = Thread.run(name, _work, name, db, signals[name]) return db, threads, signals
def query(self, command): """ WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED :param command: COMMAND FOR SQLITE :return: list OF RESULTS """ if not self.worker: self.worker = Thread.run("sqlite db thread", self._worker) signal = Signal() result = Data() self.queue.add((command, result, signal, None)) signal.wait() if result.exception: Log.error("Problem with Sqlite call", cause=result.exception) return result
def main(): try: settings = startup.read_settings() with startup.SingleInstance(settings.args.filename): constants.set(settings.constants) Log.start(settings.debug) extractor = Extract(settings) def extract(please_stop): with MySQL(**settings.snowflake.database) as db: with db.transaction(): for kwargs in extractor.queue: if please_stop: break try: extractor.extract(db=db, please_stop=please_stop, **kwargs) except Exception as e: Log.warning("Could not extract", cause=e) extractor.queue.add(kwargs) for i in range(settings.extract.threads): Thread.run("extract #" + text_type(i), extract) please_stop = Signal() Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True, wait_forever=False) except Exception as e: Log.warning("Problem with data extraction", e) finally: Log.stop()
def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict( region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key) ) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.done_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex(("id",)) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(): self._start_life_cycle_watcher() if not disable_prices: self.pricing()
def __init__(self, conn=None, tuid_service=None, start_workers=True, new_table=False, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( kwargs=self.config.tuid, conn=self.conn, clogger=self ) self.rev_locker = Lock() self.working_locker = Lock() if new_table: with self.conn.transaction() as t: t.execute("DROP TABLE IF EXISTS csetLog") self.init_db() self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") if 'tuid' in self.config: self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False self.backfill_thread = None self.tipfill_thread = None self.deletion_thread = None self.maintenance_thread = None # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range( MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False ) Log.note( "Table is filled with atleast {{minim}} entries.", minim=MINIMUM_PERMANENT_CSETS ) if start_workers: self.start_workers() except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def test_daemon(service): from mo_threads import Signal temp_signal = Signal() # Run the daemon indefinitely to see if # we can update all known files to the latest # revisions. This can take a while though. service._daemon(temp_signal)
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def _test_queue_speed(self, test=False): SCALE = 1000 * 10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", slow_queue=slow) def empty(please_stop): while not please_stop: item = slow.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() Log.note( "{{num}} items through queue in {{seconds|round(3)}} seconds", num=SCALE, seconds=timer.duration.seconds, ) if PY2 and "windows" not in platform.system().lower(): expected_time = 15 # LINUX PY2 IS CRAZY SLOW elif PY3 and "windows" not in platform.system().lower(): expected_time = 6 # LINUX PY3 IS SLOW else: expected_time = 6 if test: self.assertLess( timer.duration.seconds, expected_time, "Expecting queue to be fast, not " + text(timer.duration.seconds) + " seconds", )
def _execute(self, command): with self.lock: if self.current_task is not None: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None self.process.stdin.add(value2json(command)) self.current_task.wait() with self.lock: try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = None self.current_response = None self.current_error = None
def request(self, method, path, headers): now = Date.now() self.inbound_rate.add(now) ready = Signal(path) # TEST CACHE with self.cache_locker: pair = self.cache.get(path) if pair is None: self.cache[path] = (ready, None, None, now) if pair is not None: # REQUEST IS IN THE QUEUE ALREADY, WAIT ready, headers, response, then = pair if response is None: ready.wait() with self.cache_locker: ready, headers, response, timestamp = self.cache.get(path) with self.db.transaction() as t: t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) return Response( response, status=200, headers=json.loads(headers) ) # TEST DB db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data if db_response: headers, response = db_response[0] with self.db.transaction() as t: t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) with self.cache_locker: self.cache[path] = (ready, headers, response.encode('latin1'), now) ready.go() return Response( response, status=200, headers=json.loads(headers) ) # MAKE A NETWORK REQUEST self.todo.add((ready, method, path, headers, now)) ready.wait() with self.cache_locker: ready, headers, response, timestamp = self.cache[path] return Response( response, status=200, headers=json.loads(headers) )
def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Data() self.pending = [] if self.file.exists: for line in self.file: with suppress_exception: delta = mo_json.json2value(line) apply_delta(self.db, delta) if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): with suppress_exception: if k != "status" and int(k) < self.start: self.db[k] = None lost += 1 # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num=lost) if DEBUG: Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Data(start=0, end=0) self.start = self.db.status.start if DEBUG: Log.note("New persistent queue {{name}}", name=self.file.abspath)
def wait_for_shutdown_signal( please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False -> IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL SHUTDOWN IS REQUESTED SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit" :param please_stop: :param allow_exit: :param wait_forever:: Assume all needed threads have been launched. When done :return: """ if not isinstance(please_stop, Signal): please_stop = Signal() please_stop.on_go(lambda: thread.start_new_thread(_stop_main_thread, ())) self_thread = Thread.current() if self_thread != MAIN_THREAD: Log.error("Only the main thread can sleep forever (waiting for KeyboardInterrupt)") if not wait_forever: # TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE pending = copy(self_thread.children) all = AndSignals(please_stop, len(pending)) for p in pending: p.stopped.on_go(all.done) try: if allow_exit: _wait_for_exit(please_stop) else: _wait_for_interrupt(please_stop) except (KeyboardInterrupt, SystemExit), _: Log.alert("SIGINT Detected! Stopping...")
def _execute(self, command): with self.lock: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None if self.process.service_stopped: Log.error("python is not running") self.process.stdin.add(value2json(command)) (self.current_task | self.process.service_stopped).wait() try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = DONE self.current_response = None self.current_error = None
def test_job_queue_in_signal(self): gc.collect() start_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("Start memory {{mem|comma}}", mem=start_mem) main = Signal() result = [main | Signal() for _ in range(10000)] mid_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("Mid memory {{mem|comma}}", mem=mid_mem) del result gc.collect() end_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("End memory {{mem|comma}}", mem=end_mem) main.go() # NOT NEEDED, BUT INTERESTING self.assertLess(end_mem, (start_mem + mid_mem) / 2, "end memory should be closer to start")
def __init__(self, name, max=None, silent=False, unique=False, allow_add_after_close=False): """ max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK silent - COMPLAIN IF THE READERS ARE TOO SLOW unique - SET True IF YOU WANT ONLY ONE INSTANCE IN THE QUEUE AT A TIME """ if not _Log: _late_import() self.name = name self.max = coalesce(max, 2**10) self.silent = silent self.allow_add_after_close = allow_add_after_close self.unique = unique self.please_stop = Signal("stop signal for " + name) self.lock = Lock("lock for queue " + name) self.queue = deque() self.next_warning = time() # FOR DEBUGGING
def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict(region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap( kwargs.aws.aws_secret_access_key)) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.no_capacity = {} self.no_capacity_file = File( kwargs.price_file).parent / "no capacity.json" self.done_making_new_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex( ("id", )) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce( self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce( Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce( Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce( self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required( ): self._start_life_cycle_watcher() if not disable_prices: self.pricing()
def execute(self, command): """ COMMANDS WILL BE EXECUTED IN THE ORDER THEY ARE GIVEN BUT CAN INTERLEAVE WITH OTHER TREAD COMMANDS :param command: COMMAND FOR SQLITE :return: Signal FOR IF YOU WANT TO BE NOTIFIED WHEN DONE """ if DEBUG_EXECUTE: # EXECUTE IMMEDIATELY FOR BETTER STACK TRACE self.query(command) return DONE if self.get_trace: trace = extract_stack(1) else: trace = None is_done = Signal() self.queue.add((command, None, is_done, trace)) return is_done
def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Data() self.pending = [] if self.file.exists: for line in self.file: with suppress_exception: delta = mo_json.json2value(line) apply_delta(self.db, delta) if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): with suppress_exception: if k!="status" and int(k) < self.start: self.db[k] = None lost += 1 # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num= lost) DEBUG and Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Data( start=0, end=0 ) self.start = self.db.status.start DEBUG and Log.note("New persistent queue {{name}}", name=self.file.abspath)
now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request " + text_type(request_count), one_request, request) request_count += 1 queue.commit() if __name__ == '__main__': try: tmp_signal = Signal() config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) queue_consumer(kwargs=config, please_stop=tmp_signal) worker = Thread.run("sqs consumer", queue_consumer, kwargs=config) MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped) except BaseException as e: Log.error("Serious problem with consumer construction! Shutdown!", cause=e)
class SpotManager(object): @override def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict( region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key) ) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.done_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex(("id",)) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(): self._start_life_cycle_watcher() if not disable_prices: self.pricing() def update_spot_requests(self, utility_required): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount ) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending ) remaining_budget = self.settings.budget - used_budget current_utility = coalesce(SUM(self.price_lookup[r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) net_new_utility = utility_required - current_utility Log.note("have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money(remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = Math.min(net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = Math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances(net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget ) # Give EC2 a chance to notice the new requests before tagging them. Till(timeout=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_spot_requests.go() def add_instances(self, net_new_utility, remaining_budget): prices = self.pricing() for p in prices: if net_new_utility <= 0 or remaining_budget <= 0: break if p.current_price == None: Log.note("{{type}} has no current price", type=p.type.instance_type ) continue if self.settings.utility[p.type.instance_type].blacklist or \ p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones): Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone) continue # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount max_bid = Math.min(p.higher_price, max_acceptable_price, remaining_budget) min_bid = p.price_80 if min_bid > max_acceptable_price: Log.note( "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour", type=p.type.instance_type, price=min_bid, remaining=max_acceptable_price ) continue elif min_bid > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour", type=p.type.instance_type, bid=min_bid, remaining_budget=remaining_budget ) continue elif min_bid > max_bid: Log.error("not expected") naive_number_needed = int(Math.round(float(net_new_utility) / float(p.type.utility), decimal=0)) limit_total = None if self.settings.max_percent_per_type < 1: current_count = sum(1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone) all_count = sum(1 for a in self.active if a.launch_specification.placement == p.availability_zone) all_count = max(all_count, naive_number_needed) limit_total = int(Math.floor((all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type))) num = Math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type) if num < 0: Log.note( "{{type}} is over {{limit|percent}} of instances, no more requested", limit=self.settings.max_percent_per_type, type=p.type.instance_type ) continue elif num == 1: min_bid = Math.min(Math.max(p.current_price * 1.1, min_bid), max_acceptable_price) price_interval = 0 else: price_interval = Math.min(min_bid / 10, (max_bid - min_bid) / (num - 1)) for i in range(num): bid_per_machine = min_bid + (i * price_interval) if bid_per_machine < p.current_price: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, current_price=p.current_price ) continue if bid_per_machine - p.type.discount > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, remaining=remaining_budget ) continue try: if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1: Log.error("Spot Manager can only request machine one-at-a-time") new_requests = self._request_spot_instances( price=bid_per_machine, availability_zone_group=p.availability_zone, instance_type=p.type.instance_type, kwargs=copy(self.settings.ec2.request) ) Log.note( "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour", num=len(new_requests), type=p.type.instance_type, zone=p.availability_zone, utility=p.type.utility, price=bid_per_machine ) net_new_utility -= p.type.utility * len(new_requests) remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests) with self.net_new_locker: for ii in new_requests: self.net_new_spot_requests.add(ii) except Exception as e: Log.warning( "Request instance {{type}} failed because {{reason}}", type=p.type.instance_type, reason=e.message, cause=e ) if "Max spot instance count exceeded" in e.message: Log.note("No further spot requests will be attempted.") return net_new_utility, remaining_budget return net_new_utility, remaining_budget def remove_instances(self, net_new_utility): instances = self.running_instances() # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE remove_list = [] for acceptable_error in range(0, 8): remaining_utility = -net_new_utility remove_list = FlatList() for s in instances: utility = coalesce(s.markup.type.utility, 0) if utility <= remaining_utility + acceptable_error: remove_list.append(s) remaining_utility -= utility if remaining_utility <= 0: net_new_utility = -remaining_utility break if not remove_list: return net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.note("Shutdown {{instances}}", instances=remove_list.id) for i in remove_list: try: self.instance_manager.teardown(i) except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests = remove_list.spot_instance_request_id # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests) return net_new_utility def running_instances(self): # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS instances = self._get_managed_instances() for r in instances: try: r.markup = self.price_lookup[r.instance_type, r.placement] except Exception as e: r.markup = self.price_lookup[r.instance_type, r.placement] Log.error("No pricing!!!", e) instances = jx.sort(instances, [ {"value": "markup.type.utility", "sort": -1}, {"value": "markup.estimated_value", "sort": 1} ]) return instances def save_money(self, remaining_budget, net_new_utility): remove_spot_requests = wrap([]) # FIRST CANCEL THE PENDING REQUESTS if remaining_budget < 0: requests = self._get_managed_spot_requests() for r in requests: if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN: remove_spot_requests.append(r.id) net_new_utility += self.settings.utility[r.launch_specification.instance_type].utility remaining_budget += r.price instances = jx.sort(self.running_instances(), "markup.estimated_value") remove_list = wrap([]) for s in instances: if remaining_budget >= 0: break remove_list.append(s) net_new_utility += coalesce(s.markup.type.utility, 0) remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price) if not remove_list: return remaining_budget, net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id) for i in remove_list: try: self.instance_manager.teardown(i) except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests.extend(remove_list.spot_instance_request_id) # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests) return remaining_budget, net_new_utility @cache(duration=5 * SECOND) def _get_managed_spot_requests(self): output = wrap([datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name)]) return output def _get_managed_instances(self): requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter(lambda r: r.instance_id!=None)) reservations = self.ec2_conn.get_all_instances() output = [] for res in reservations: for instance in res.instances: if instance.tags.get('Name', '').startswith(self.settings.ec2.instance.name) and instance._state.name == "running": instance.request = requests[instance.id] output.append(datawrap(instance)) return wrap(output) def _start_life_cycle_watcher(self): def life_cycle_watcher(please_stop): failed_attempts=Data() while not please_stop: spot_requests = self._get_managed_spot_requests() last_get = Date.now() instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances}) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p try: self.instance_manager.setup(i, coalesce(p, 0)) except Exception as e: e = Except.wrap(e) failed_attempts[r.id] += [e] Log.error(ERROR_ON_CALL_TO_SETUP, e) i.add_tag("Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(r.id) except Exception as e: if not time_to_stop_trying.get(i.id): time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning("Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id, cause=e) elif "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: if len(failed_attempts[r.id]) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id]) else: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES]) give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES]) ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_spot_requests: with self.net_new_locker: expired = Date.now() - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in give_up: self.net_new_spot_requests.remove(g.id) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id",), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id) Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) if not pending and not time_to_stop_trying and self.done_spot_requests: Log.note("No more pending spot requests") please_stop.go() break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() Log.note("life cycle watcher has stopped") # Log.warning("lifecycle watcher is disabled") timeout = Till(seconds=self.settings.run_interval.seconds - 60) self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout) def _get_valid_availability_zones(self): subnets = list(self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.network_interfaces.subnet_id)) zones_with_interfaces = [s.availability_zone for s in subnets] if self.settings.availability_zone: # If they pass a list of zones, constrain it by zones we have an # interface for. return set(zones_with_interfaces) & set(listwrap(self.settings.availability_zone)) else: # Otherwise, use all available zones. return zones_with_interfaces @override def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs): kwargs.kwargs = None # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP if instance_type.startswith("m3."): kwargs.placement_group = None kwargs.network_interfaces = NetworkInterfaceCollection(*( NetworkInterfaceSpecification(**i) for i in listwrap(kwargs.network_interfaces) if self.vpc_conn.get_all_subnets(subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group}) )) if len(kwargs.network_interfaces) == 0: Log.error("No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group) block_device_map = BlockDeviceMapping() # GENERIC BLOCK DEVICE MAPPING for dev, dev_settings in kwargs.block_device_map.items(): block_device_map[dev] = BlockDeviceType( delete_on_termination=True, **dev_settings ) kwargs.block_device_map = block_device_map # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping num_ephemeral_volumes = ephemeral_storage[instance_type]["num"] for i in range(num_ephemeral_volumes): letter = convert.ascii2char(98 + i) # START AT "b" kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType( ephemeral_name='ephemeral' + unicode(i), delete_on_termination=True ) if kwargs.expiration: kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601) kwargs.expiration = None # ATTACH NEW EBS VOLUMES for i, drive in enumerate(self.settings.utility[instance_type].drives): letter = convert.ascii2char(98 + i + num_ephemeral_volumes) device = drive.device = coalesce(drive.device, "/dev/sd" + letter) d = drive.copy() d.path = None # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType d.device = None if d.size: kwargs.block_device_map[device] = BlockDeviceType( delete_on_termination=True, **d ) output = list(self.ec2_conn.request_spot_instances(**kwargs)) return output def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() expressions.ALLOW_SCRIPTING = True with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": {"coalesce": [{"rows": {"timestamp": 1}}, {"date": "eod"}]}, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": {"sub": {"timestamp": self.settings.uptime.duration.seconds}} } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": {"min": "effective", "max": "expire", "mode": "inclusive"}, "allowNulls": False, "domain": {"type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR)+HOUR, "interval": "hour"} } ], "select": [ {"value": "price", "aggregate": "max"}, {"aggregate": "count"} ], "where": {"gt": {"expire": now.floor(HOUR) - self.settings.uptime.history}}, "window": [ { "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" } ] }).data bid80 = jx.run({ "from": hourly_pricing, "edges": [ { "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": {"type": "set", "key": "instance_type", "partitions": self.settings.utility} } ], "select": [ {"name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile}, {"name": "max_price", "value": "price", "aggregate": "max"}, {"aggregate": "count"}, {"value": "current_price", "aggregate": "one"}, {"name": "all_price", "value": "price", "aggregate": "list"} ], "window": [ {"name": "estimated_value", "value": {"div": ["type.utility", "price_80"]}}, {"name": "higher_price", "value": lambda row, rownum, rows: find_higher(row.all_price, row.price_80)} # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.run({ "from": bid80, "sort": {"value": "estimated_value", "sort": -1} }) self.prices = wrap(output.data) self.price_lookup = UniqueIndex(("type.instance_type", "availability_zone"), data=self.prices) return self.prices def _get_spot_prices_from_aws(self): with Timer("Read pricing file"): try: content = File(self.settings.price_file).read() cache = convert.json2value(content, flexible=False, leaves=False) except Exception as e: cache = FlatList() most_recents = jx.run({ "from": cache, "edges": ["instance_type", "availability_zone"], "select": {"value": "timestamp", "aggregate": "max"} }) zones = self._get_valid_availability_zones() prices = set(cache) with Timer("Get pricing from AWS"): for instance_type in self.settings.utility.keys(): for zone in zones: if cache: most_recent = most_recents[{ "instance_type": instance_type, "availability_zone": zone }].timestamp start_at = MAX([Date(most_recent), Date.today() - WEEK]) else: start_at = Date.today() - WEEK if DEBUG_PRICING: Log.note("get pricing for {{instance_type}} starting at {{start_at}}", instance_type=instance_type, start_at=start_at ) next_token = None while True: resultset = self.ec2_conn.get_spot_price_history( product_description=coalesce(self.settings.product, "Linux/UNIX (Amazon VPC)"), instance_type=instance_type, availability_zone=zone, start_time=start_at.format(ISO8601), next_token=next_token ) next_token = resultset.next_token for p in resultset: prices.add(wrap({ "availability_zone": p.availability_zone, "instance_type": p.instance_type, "price": p.price, "product_description": p.product_description, "region": p.region.name, "timestamp": Date(p.timestamp).unix })) if not next_token: break with Timer("Save prices to file"): new_prices = jx.filter(prices, {"gte": {"timestamp": {"date": "today-2day"}}}) def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]" File(self.settings.price_file).write(stream()) return prices
def test_signal_is_boolean(self): a = Signal() self.assertEqual(bool(a), False) a.go() self.assertEqual(bool(a), True)
def test_signal_is_not_null(self): a = Signal() self.assertNotEqual(a, None) a.go() self.assertNotEqual(a, None)
class Queue(object): """ SIMPLE MESSAGE QUEUE, multiprocessing.Queue REQUIRES SERIALIZATION, WHICH IS DIFFICULT TO USE JUST BETWEEN THREADS (SERIALIZATION REQUIRED) """ def __init__(self, name, max=None, silent=False, unique=False, allow_add_after_close=False): """ max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK silent - COMPLAIN IF THE READERS ARE TOO SLOW unique - SET True IF YOU WANT ONLY ONE INSTANCE IN THE QUEUE AT A TIME """ if not _Log: _late_import() self.name = name self.max = coalesce(max, 2**10) self.silent = silent self.allow_add_after_close = allow_add_after_close self.unique = unique self.please_stop = Signal("stop signal for " + name) self.lock = Lock("lock for queue " + name) self.queue = deque() self.next_warning = time() # FOR DEBUGGING def __iter__(self): try: while True: value = self.pop(self.please_stop) if value is THREAD_STOP: break if value is not None: yield value except Exception as e: _Log.warning("Tell me about what happened here", e) if not self.silent: _Log.note("queue iterator is done") def add(self, value, timeout=None): with self.lock: if value is THREAD_STOP: # INSIDE THE lock SO THAT EXITING WILL RELEASE wait() self.queue.append(value) self.please_stop.go() return self._wait_for_queue_space(timeout=timeout) if self.please_stop and not self.allow_add_after_close: _Log.error("Do not add to closed queue") else: if self.unique: if value not in self.queue: self.queue.append(value) else: self.queue.append(value) return self def push(self, value): """ SNEAK value TO FRONT OF THE QUEUE """ if self.please_stop and not self.allow_add_after_close: _Log.error("Do not push to closed queue") with self.lock: self._wait_for_queue_space() if not self.please_stop: self.queue.appendleft(value) return self def pop_message(self, till=None): """ RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE DUMMY IMPLEMENTATION FOR DEBUGGING """ if till is not None and not isinstance(till, Signal): _Log.error("Expecting a signal") return Null, self.pop(till=till) def extend(self, values): if self.please_stop and not self.allow_add_after_close: _Log.error("Do not push to closed queue") with self.lock: # ONCE THE queue IS BELOW LIMIT, ALLOW ADDING MORE self._wait_for_queue_space() if not self.please_stop: if self.unique: for v in values: if v is THREAD_STOP: self.please_stop.go() continue if v not in self.queue: self.queue.append(v) else: for v in values: if v is THREAD_STOP: self.please_stop.go() continue self.queue.append(v) return self def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE """ wait_time = 5 if DEBUG and len(self.queue) > 1 * 1000 * 1000: Log.warning("Queue {{name}} has over a million items") now = time() if timeout != None: time_to_stop_waiting = now + timeout else: time_to_stop_waiting = Null if self.next_warning < now: self.next_warning = now + wait_time while not self.please_stop and len(self.queue) >= self.max: if now > time_to_stop_waiting: if not _Log: _late_import() _Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(Till(till=time_to_stop_waiting)) else: self.lock.wait(Till(timeout=wait_time)) if len(self.queue) >= self.max: now = time() if self.next_warning < now: self.next_warning = now + wait_time _Log.alert( "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=wait_time) def __len__(self): with self.lock: return len(self.queue) def __nonzero__(self): with self.lock: return any(r != THREAD_STOP for r in self.queue) def pop(self, till=None): """ WAIT FOR NEXT ITEM ON THE QUEUE RETURN THREAD_STOP IF QUEUE IS CLOSED RETURN None IF till IS REACHED AND QUEUE IS STILL EMPTY :param till: A `Signal` to stop waiting and return None :return: A value, or a THREAD_STOP or None """ if till is not None and not isinstance(till, Signal): _Log.error("expecting a signal") with self.lock: while True: if self.queue: value = self.queue.popleft() return value if self.please_stop: break if not self.lock.wait(till=till | self.please_stop): if self.please_stop: break return None if DEBUG or not self.silent: _Log.note(self.name + " queue stopped") return THREAD_STOP def pop_all(self): """ NON-BLOCKING POP ALL IN QUEUE, IF ANY """ with self.lock: output = list(self.queue) self.queue.clear() return output def pop_one(self): """ NON-BLOCKING POP IN QUEUE, IF ANY """ with self.lock: if self.please_stop: return [THREAD_STOP] elif not self.queue: return None else: v = self.queue.pop() if v is THREAD_STOP: # SENDING A STOP INTO THE QUEUE IS ALSO AN OPTION self.please_stop.go() return v def close(self): with self.lock: self.please_stop.go() def commit(self): pass def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close()
class Python(object): def __init__(self, name, config): config = wrap(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") self.process = Process(name, [PYTHON, "mo_threads" + os.sep + "python_worker.py"], shell=True) self.process.stdin.add(value2json(set_default({"debug": {"trace": True}}, config))) self.lock = Lock("wait for response from "+name) self.current_task = None self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr) def _execute(self, command): with self.lock: if self.current_task is not None: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None self.process.stdin.add(value2json(command)) self.current_task.wait() with self.lock: try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = None self.current_response = None self.current_error = None def _daemon(self, please_stop): while not please_stop: line = self.process.stdout.pop(till=please_stop) if line == THREAD_STOP: break try: data = json2value(line.decode('utf8')) if "log" in data: Log.main_log.write(*data.log) elif "out" in data: with self.lock: self.current_response = data.out self.current_task.go() elif "err" in data: with self.lock: self.current_error = data.err self.current_task.go() except Exception: Log.note("non-json line: {{line}}", line=line) DEBUG and Log.note("stdout reader is done") def _stderr(self, please_stop): while not please_stop: try: line = self.process.stderr.pop(till=please_stop) if line == THREAD_STOP: please_stop.go() break Log.note("Error line from {{name}}({{pid}}): {{line}}", line=line, name=self.process.name, pid=self.process.pid) except Exception as e: Log.error("could not process line", cause=e) def import_module(self, module_name, var_names=None): if var_names is None: self._execute({"import": module_name}) else: self._execute({"import": {"from": module_name, "vars": var_names}}) def set(self, var_name, value): self._execute({"set": {var_name, value}}) def get(self, var_name): return self._execute({"get": var_name}) def execute_script(self, script): return self._execute({"exec": script}) def __getattr__(self, item): def output(*args, **kwargs): if len(args): if len(kwargs.keys()): Log.error("Not allowed to use both args and kwargs") return self._execute({item: args}) else: return self._execute({item: kwargs}) return output def stop(self): self._execute({"stop": {}}) self.process.join() self.daemon.stop() self.errors.stop()
class PersistentQueue(object): """ THREAD-SAFE, PERSISTENT QUEUE CAN HANDLE MANY PRODUCERS, BUT THE pop(), commit() IDIOM CAN HANDLE ONLY ONE CONSUMER. IT IS IMPORTANT YOU commit() or close(), OTHERWISE NOTHING COMES OFF THE QUEUE """ def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Data() self.pending = [] if self.file.exists: for line in self.file: with suppress_exception: delta = json2value(line) apply_delta(self.db, delta) if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): with suppress_exception: if k != "status" and int(k) < self.start: self.db[k] = None lost += 1 # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num=lost) DEBUG and Log.note( "Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Data(start=0, end=0) self.start = self.db.status.start DEBUG and Log.note("New persistent queue {{name}}", name=self.file.abspath) def _add_pending(self, delta): delta = to_data(delta) self.pending.append(delta) def _apply_pending(self): for delta in self.pending: apply_delta(self.db, delta) self.pending = [] def __iter__(self): """ BLOCKING ITERATOR """ while not self.please_stop: try: value = self.pop() if value is not THREAD_STOP: yield value except Exception as e: Log.warning("Tell me about what happened here", cause=e) def add(self, value): with self.lock: if self.closed: Log.error("Queue is closed") if value is THREAD_STOP: DEBUG and Log.note("Stop is seen in persistent queue") self.please_stop.go() return self._add_pending({"add": {str(self.db.status.end): value}}) self.db.status.end += 1 self._add_pending({"add": {"status.end": self.db.status.end}}) self._commit() return self def __len__(self): with self.lock: return self.db.status.end - self.start def __getitem__(self, item): return self.db[str(item + self.start)] def pop(self, timeout=None): """ :param timeout: OPTIONAL DURATION :return: None, IF timeout PASSES """ with self.lock: while not self.please_stop: if self.db.status.end > self.start: value = self.db[str(self.start)] self.start += 1 return value if timeout is not None: with suppress_exception: self.lock.wait(timeout=timeout) if self.db.status.end <= self.start: return None else: self.lock.wait() DEBUG and Log.note("persistent queue already stopped") return THREAD_STOP def pop_all(self): """ NON-BLOCKING POP ALL IN QUEUE, IF ANY """ with self.lock: if self.please_stop: return [THREAD_STOP] if self.db.status.end == self.start: return [] output = [] for i in range(self.start, self.db.status.end): output.append(self.db[str(i)]) self.start = self.db.status.end return output def rollback(self): with self.lock: if self.closed: return self.start = self.db.status.start self.pending = [] def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or randoms.range( 0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write(value2json({"add": self.db}) + "\n") else: self._commit() except Exception as e: raise e def _commit(self): self.file.append("\n".join(value2json(p) for p in self.pending)) self._apply_pending() def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(THREAD_STOP) if self.db.status.end == self.start: DEBUG and Log.note("persistent queue clear and closed") self.file.delete() else: DEBUG and Log.note( "persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( value2json({"add": self.db}) + "\n" + ("\n".join(value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception as e: raise e self.db = None @property def closed(self): with self.lock: return self.db is None
class PersistentQueue(object): """ THREAD-SAFE, PERSISTENT QUEUE CAN HANDLE MANY PRODUCERS, BUT THE pop(), commit() IDIOM CAN HANDLE ONLY ONE CONSUMER. IT IS IMPORTANT YOU commit() or close(), OTHERWISE NOTHING COMES OFF THE QUEUE """ def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Data() self.pending = [] if self.file.exists: for line in self.file: with suppress_exception: delta = mo_json.json2value(line) apply_delta(self.db, delta) if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): with suppress_exception: if k!="status" and int(k) < self.start: self.db[k] = None lost += 1 # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num= lost) DEBUG and Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Data( start=0, end=0 ) self.start = self.db.status.start DEBUG and Log.note("New persistent queue {{name}}", name=self.file.abspath) def _add_pending(self, delta): delta = wrap(delta) self.pending.append(delta) def _apply_pending(self): for delta in self.pending: apply_delta(self.db, delta) self.pending = [] def __iter__(self): """ BLOCKING ITERATOR """ while not self.please_stop: try: value = self.pop() if value is not THREAD_STOP: yield value except Exception as e: Log.warning("Tell me about what happened here", cause=e) def add(self, value): with self.lock: if self.closed: Log.error("Queue is closed") if value is THREAD_STOP: DEBUG and Log.note("Stop is seen in persistent queue") self.please_stop.go() return self._add_pending({"add": {str(self.db.status.end): value}}) self.db.status.end += 1 self._add_pending({"add": {"status.end": self.db.status.end}}) self._commit() return self def __len__(self): with self.lock: return self.db.status.end - self.start def __getitem__(self, item): return self.db[str(item + self.start)] def pop(self, timeout=None): """ :param timeout: OPTIONAL DURATION :return: None, IF timeout PASSES """ with self.lock: while not self.please_stop: if self.db.status.end > self.start: value = self.db[str(self.start)] self.start += 1 return value if timeout is not None: with suppress_exception: self.lock.wait(timeout=timeout) if self.db.status.end <= self.start: return None else: self.lock.wait() DEBUG and Log.note("persistent queue already stopped") return THREAD_STOP def pop_all(self): """ NON-BLOCKING POP ALL IN QUEUE, IF ANY """ with self.lock: if self.please_stop: return [THREAD_STOP] if self.db.status.end == self.start: return [] output = [] for i in range(self.start, self.db.status.end): output.append(self.db[str(i)]) self.start = self.db.status.end return output def rollback(self): with self.lock: if self.closed: return self.start = self.db.status.start self.pending = [] def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note("Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write(mo_json.value2json({"add": self.db}) + "\n") else: self._commit() except Exception as e: raise e def _commit(self): self.file.append("\n".join(mo_json.value2json(p) for p in self.pending)) self._apply_pending() def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(THREAD_STOP) if self.db.status.end == self.start: DEBUG and Log.note("persistent queue clear and closed") self.file.delete() else: DEBUG and Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write(mo_json.value2json({"add": self.db}) + "\n" + ("\n".join(mo_json.value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception as e: raise e self.db = None @property def closed(self): with self.lock: return self.db is None
class Python(object): def __init__(self, name, config): config = to_data(config) if config.debug.logs: Log.error("not allowed to configure logging on other process") Log.note("begin process") # WINDOWS REQUIRED shell, WHILE LINUX NOT shell = "windows" in platform.system().lower() self.process = Process( name, [PYTHON, "-u", "mo_threads" + os.sep + "python_worker.py"], debug=False, cwd=os.getcwd(), shell=shell) self.process.stdin.add( value2json(set_default({}, config, {"debug": { "trace": True }}))) status = self.process.stdout.pop() if status != '{"out":"ok"}': Log.error("could not start python\n{{error|indent}}", error=self.process.stderr.pop_all() + [status] + self.process.stdin.pop_all()) self.lock = Lock("wait for response from " + name) self.current_task = DONE self.current_response = None self.current_error = None self.daemon = Thread.run("", self._daemon) self.errors = Thread.run("", self._stderr) def _execute(self, command): with self.lock: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None if self.process.service_stopped: Log.error("python is not running") self.process.stdin.add(value2json(command)) (self.current_task | self.process.service_stopped).wait() try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = DONE self.current_response = None self.current_error = None def _daemon(self, please_stop): while not please_stop: line = self.process.stdout.pop(till=please_stop) if line == THREAD_STOP: break try: data = json2value(line) if "log" in data: Log.main_log.write(*data.log) elif "out" in data: self.current_response = data.out self.current_task.go() elif "err" in data: self.current_error = data.err self.current_task.go() except Exception as e: Log.note("non-json line: {{line}}", line=line) DEBUG and Log.note("stdout reader is done") def _stderr(self, please_stop): while not please_stop: try: line = self.process.stderr.pop(till=please_stop) if line == THREAD_STOP: please_stop.go() break Log.note("Error line from {{name}}({{pid}}): {{line}}", line=line, name=self.process.name, pid=self.process.pid) except Exception as e: Log.error("could not process line", cause=e) def import_module(self, module_name, var_names=None): if var_names is None: self._execute({"import": module_name}) else: self._execute({"import": {"from": module_name, "vars": var_names}}) def set(self, var_name, value): self._execute({"set": {var_name, value}}) def get(self, var_name): return self._execute({"get": var_name}) def execute_script(self, script): return self._execute({"exec": script}) def __getattr__(self, item): def output(*args, **kwargs): if len(args): if kwargs.keys(): Log.error("Not allowed to use both args and kwargs") return self._execute({item: args}) else: return self._execute({item: kwargs}) return output def stop(self): self._execute({"stop": {}}) self.process.join() self.daemon.stop() self.errors.stop()
def test_lock_and_till(self): locker = Lock("prime lock") got_signal = Signal() a_is_ready = Signal("a lock") b_is_ready = Signal("b lock") Log.note("begin") def loop(is_ready, please_stop): with locker: while not got_signal: locker.wait(till=Till(seconds=0.01)) is_ready.go() Log.note("{{thread}} is ready", thread=Thread.current().name) Log.note("outside loop") locker.wait() Log.note("thread is expected to get here") thread_a = Thread.run("a", loop, a_is_ready).release() thread_b = Thread.run("b", loop, b_is_ready).release() a_is_ready.wait() b_is_ready.wait() timeout = Till(seconds=1) with locker: got_signal.go() while not thread_a.stopped: # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE locker.wait(till=Till(seconds=0.1)) Log.note("wait for a thread") while not thread_b.stopped: # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE locker.wait(till=Till(seconds=0.1)) Log.note("wait for b thread") thread_a.join() thread_b.join() if timeout: Log.error("Took too long") self.assertTrue(bool(thread_a.stopped), "Thread should be done by now") self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
class Clogger: # Singleton of the look-ahead scanner Clogger SINGLE_CLOGGER = None def __new__(cls, *args, **kwargs): if cls.SINGLE_CLOGGER is None: cls.SINGLE_CLOGGER = object.__new__(cls) return cls.SINGLE_CLOGGER def __init__(self, conn=None, tuid_service=None, start_workers=True, new_table=False, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg(kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( kwargs=self.config.tuid, conn=self.conn, clogger=self ) self.rev_locker = Lock() self.working_locker = Lock() if new_table: with self.conn.transaction() as t: t.execute("DROP TABLE IF EXISTS csetLog") self.init_db() self.next_revnum = coalesce(self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue(name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") if 'tuid' in self.config: self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False self.backfill_thread = None self.tipfill_thread = None self.deletion_thread = None self.maintenance_thread = None # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query("SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range( MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False ) Log.note( "Table is filled with atleast {{minim}} entries.", minim=MINIMUM_PERMANENT_CSETS ) if start_workers: self.start_workers() except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e)) def start_backfilling(self): if not self.backfill_thread: self.backfill_thread = Thread.run('clogger-backfill', self.fill_backward_with_list) def start_tipfillling(self): if not self.tipfill_thread: self.tipfill_thread = Thread.run('clogger-tip', self.fill_forward_continuous) def start_maintenance(self): if not self.maintenance_thread: self.maintenance_thread = Thread.run('clogger-maintenance', self.csetLog_maintenance) def start_deleter(self): if not self.deletion_thread: self.deletion_thread = Thread.run('clogger-deleter', self.csetLog_deleter) def start_workers(self): self.start_tipfillling() self.start_backfilling() self.start_maintenance() self.start_deleter() Log.note("Started clogger workers.") def init_db(self): with self.conn.transaction() as t: t.execute(''' CREATE TABLE IF NOT EXISTS csetLog ( revnum INTEGER PRIMARY KEY, revision CHAR(12) NOT NULL, timestamp INTEGER );''') def disable_all(self): self.disable_tipfilling = True self.disable_backfilling = True self.disable_maintenance = True self.disable_deletion = True def revnum(self): """ :return: max revnum that was added """ return coalesce(self.conn.get_one("SELECT max(revnum) as revnum FROM csetLog")[0], 0) def get_tip(self, transaction): return transaction.get_one( "SELECT max(revnum) as revnum, revision FROM csetLog" ) def get_tail(self, transaction): return transaction.get_one( "SELECT min(revnum) as revnum, revision FROM csetLog" ) def _get_clog(self, clog_url): try: Log.note("Searching through changelog {{url}}", url=clog_url) clog_obj = http.get_json(clog_url, retry=RETRY) return clog_obj except Exception as e: Log.error( "Unexpected error getting changset-log for {{url}}: {{error}}", url=clog_url, error=e ) def _get_one_revision(self, transaction, cset_entry): # Returns a single revision if it exists _, rev, _ = cset_entry return transaction.get_one("SELECT revision FROM csetLog WHERE revision=?", (rev,)) def _get_one_revnum(self, transaction, rev): # Returns a single revnum if it exists return transaction.get_one("SELECT revnum FROM csetLog WHERE revision=?", (rev,)) def _get_revnum_range(self, transaction, revnum1, revnum2): # Returns a range of revision numbers (that is inclusive) high_num = max(revnum1, revnum2) low_num = min(revnum1, revnum2) return transaction.query( "SELECT revnum, revision FROM csetLog WHERE " "revnum >= " + str(low_num) + " AND revnum <= " + str(high_num) ).data def recompute_table_revnums(self): ''' Recomputes the revnums for the csetLog table by creating a new table, and copying csetLog to it. The INTEGER PRIMARY KEY in the temp table auto increments as rows are added. IMPORTANT: Only call this after acquiring the lock `self.working_locker`. :return: ''' with self.conn.transaction() as t: t.execute(''' CREATE TABLE temp ( revnum INTEGER PRIMARY KEY, revision CHAR(12) NOT NULL, timestamp INTEGER );''') t.execute( "INSERT INTO temp (revision, timestamp) " "SELECT revision, timestamp FROM csetlog ORDER BY revnum ASC" ) t.execute("DROP TABLE csetLog;") t.execute("ALTER TABLE temp RENAME TO csetLog;") def check_for_maintenance(self): ''' Returns True if the maintenance worker should be run now, and False otherwise. :return: ''' numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] Log.note("Number of csets in csetLog table: {{num}}", num=numrevs) if numrevs >= SIGNAL_MAINTENANCE_CSETS: return True return False def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True): ''' Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered based on how changesets are found in the changelog. Going forwards or backwards is dealt with by flipping the list :param ordered_cset_list: Order given from changeset log searching. :param timestamp: If false, records are kept indefinitely but if holes exist: (delete, None, delete, None) those delete's with None's around them will not be deleted. :param numbered: If True, this function will number the revision list by going forward from max(revNum), else it'll go backwards from revNum, then add X to all revnums and self.next_revnum where X is the length of ordered_rev_list :return: ''' with self.conn.transaction() as t: current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0] current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0] if not current_min or not current_max: current_min = 0 current_max = 0 direction = -1 start = current_min - 1 if number_forward: direction = 1 start = current_max + 1 ordered_rev_list = ordered_rev_list[::-1] insert_list = [ ( start + direction * count, rev, int(time.time()) if timestamp else -1 ) for count, rev in enumerate(ordered_rev_list) ] # In case of overlapping requests fmt_insert_list = [] for cset_entry in insert_list: tmp = self._get_one_revision(t, cset_entry) if not tmp: fmt_insert_list.append(cset_entry) for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE): t.execute( "INSERT INTO csetLog (revnum, revision, timestamp)" + " VALUES " + sql_list( quote_set((revnum, revision, timestamp)) for revnum, revision, timestamp in tmp_insert_list ) ) # Move the revision numbers forward if needed self.recompute_table_revnums() # Start a maintenance run if needed if self.check_for_maintenance(): Log.note("Scheduling maintenance run on clogger.") self.maintenance_signal.go() def _fill_in_range(self, parent_cset, child_cset, timestamp=False, number_forward=True): ''' Fills cset logs in a certain range. 'parent_cset' can be an int and in that case, we get that many changesets instead. If parent_cset is an int, then we consider that we are going backwards (number_forward is False) and we ignore the first changeset of the first log, and we ignore the setting for number_forward. Otherwise, we continue until we find the given 'parent_cset'. :param parent_cset: :param child_cset: :param timestamp: :param number_forward: :return: ''' csets_to_add = [] found_parent = False find_parent = False if type(parent_cset) != int: find_parent = True elif parent_cset >= MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG: Log.warning( "Requested number of new changesets {{num}} is too high. " "Max number that can be requested is {{maxnum}}.", num=parent_cset, maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG ) return None csets_found = 0 clogs_seen = 0 final_rev = child_cset while not found_parent and clogs_seen < MAX_BACKFILL_CLOGS: clog_url = str(HG_URL) + "/" + self.config.hg.branch + "/json-log/" + final_rev clog_obj = self._get_clog(clog_url) clog_csets_list = list(clog_obj['changesets']) for clog_cset in clog_csets_list[:-1]: if not number_forward and csets_found <= 0: # Skip this entry it already exists csets_found += 1 continue nodes_cset = clog_cset['node'][:12] if find_parent: if nodes_cset == parent_cset: found_parent = True if not number_forward: # When going forward this entry is # the given parent csets_to_add.append(nodes_cset) break else: if csets_found + 1 > parent_cset: found_parent = True if not number_forward: # When going forward this entry is # the given parent (which is supposed # to already exist) csets_to_add.append(nodes_cset) break csets_found += 1 csets_to_add.append(nodes_cset) if found_parent == True: break clogs_seen += 1 final_rev = clog_csets_list[-1]['node'][:12] if found_parent: self.add_cset_entries(csets_to_add, timestamp=timestamp, number_forward=number_forward) else: Log.warning( "Couldn't find the end of the request for {{request}}. " "Max number that can be requested through _fill_in_range is {{maxnum}}.", request={ 'parent_cset': parent_cset, 'child_cset':child_cset, 'number_forward': number_forward }, maxnum=MAX_BACKFILL_CLOGS * CHANGESETS_PER_CLOG ) return None return csets_to_add def initialize_to_range(self, old_rev, new_rev, delete_old=True): ''' Used in service testing to get to very old changesets quickly. :param old_rev: The oldest revision to keep :param new_rev: The revision to start searching from :return: ''' old_settings = [ self.disable_tipfilling, self.disable_backfilling, self.disable_maintenance, self.disable_deletion ] self.disable_tipfilling = True self.disable_backfilling = True self.disable_maintenance = True self.disable_deletion = True old_rev = old_rev[:12] new_rev = new_rev[:12] with self.working_locker: if delete_old: with self.conn.transaction() as t: t.execute("DELETE FROM csetLog") with self.conn.transaction() as t: t.execute( "INSERT INTO csetLog (revision, timestamp) VALUES " + quote_set((new_rev, -1)) ) self._fill_in_range(old_rev, new_rev, timestamp=True, number_forward=False) self.disable_tipfilling = old_settings[0] self.disable_backfilling = old_settings[1] self.disable_maintenance = old_settings[2] self.disable_deletion = old_settings[3] def fill_backward_with_list(self, please_stop=None): ''' Expects requests of the tuple form: (parent_cset, timestamp) parent_cset can be an int X to go back by X changesets, or a string to search for going backwards in time. If timestamp is false, no timestamps will be added to the entries. :param please_stop: :return: ''' while not please_stop: try: request = self.csets_todo_backwards.pop(till=please_stop) if please_stop: break # If backfilling is disabled, all requests # are ignored. if self.disable_backfilling: Till(till=CSET_BACKFILL_WAIT_TIME).wait() continue if request: parent_cset, timestamp = request else: continue with self.working_locker: with self.conn.transaction() as t: parent_revnum = self._get_one_revnum(t, parent_cset) if parent_revnum: continue with self.conn.transaction() as t: _, oldest_revision = self.get_tail(t) self._fill_in_range( parent_cset, oldest_revision, timestamp=timestamp, number_forward=False ) Log.note("Finished {{cset}}", cset=parent_cset) except Exception as e: Log.warning("Unknown error occurred during backfill: ", cause=e) def update_tip(self): ''' Returns False if the tip is already at the newest, or True if an update has taken place. :return: ''' clog_obj = self._get_clog( str(HG_URL) + "/" + self.config.hg.branch + "/json-log/tip" ) # Get current tip in DB with self.conn.transaction() as t: _, newest_known_rev = self.get_tip(t) # If we are still at the newest, wait for CSET_TIP_WAIT_TIME seconds # before checking again. first_clog_entry = clog_obj['changesets'][0]['node'][:12] if newest_known_rev == first_clog_entry: return False csets_to_gather = None if not newest_known_rev: Log.note( "No revisions found in table, adding {{minim}} entries...", minim=MINIMUM_PERMANENT_CSETS ) csets_to_gather = MINIMUM_PERMANENT_CSETS found_newest_known = False csets_to_add = [] csets_found = 0 clogs_seen = 0 Log.note("Found new revisions. Updating csetLog tip to {{rev}}...", rev=first_clog_entry) while not found_newest_known and clogs_seen < MAX_TIPFILL_CLOGS: clog_csets_list = list(clog_obj['changesets']) for clog_cset in clog_csets_list[:-1]: nodes_cset = clog_cset['node'][:12] if not csets_to_gather: if nodes_cset == newest_known_rev: found_newest_known = True break else: if csets_found >= csets_to_gather: found_newest_known = True break csets_found += 1 csets_to_add.append(nodes_cset) if not found_newest_known: # Get the next page clogs_seen += 1 final_rev = clog_csets_list[-1]['node'][:12] clog_url = str(HG_URL) + "/" + self.config.hg.branch + "/json-log/" + final_rev clog_obj = self._get_clog(clog_url) if clogs_seen >= MAX_TIPFILL_CLOGS: Log.error( "Too many changesets, can't find last tip or the number is too high: {{rev}}. " "Maximum possible to request is {{maxnum}}", rev=coalesce(newest_known_rev, csets_to_gather), maxnum=MAX_TIPFILL_CLOGS * CHANGESETS_PER_CLOG ) return False with self.working_locker: Log.note("Adding {{csets}}", csets=csets_to_add) self.add_cset_entries(csets_to_add, timestamp=False) return True def fill_forward_continuous(self, please_stop=None): while not please_stop: try: while not please_stop and not self.disable_tipfilling and self.update_tip(): pass (please_stop | Till(seconds=CSET_TIP_WAIT_TIME)).wait() except Exception as e: Log.warning("Unknown error occurred during tip filling:", cause=e) def csetLog_maintenance(self, please_stop=None): ''' Handles deleting old csetLog entries and timestamping revisions once they pass the length for permanent storage for deletion later. :param please_stop: :return: ''' while not please_stop: try: # Wait until something signals the maintenance cycle # to begin (or end). (self.maintenance_signal | please_stop).wait() if please_stop: break if self.disable_maintenance: continue Log.warning( "Starting clog maintenance. Since this doesn't start often, " "we need to explicitly see when it's started with this warning." ) # Reset signal so we don't request # maintenance infinitely. with self.maintenance_signal.lock: self.maintenance_signal._go = False with self.working_locker: all_data = None with self.conn.transaction() as t: all_data = sorted( t.get("SELECT revnum, revision, timestamp FROM csetLog"), key=lambda x: int(x[0]) ) # Restore maximum permanents (if overflowing) new_data = [] modified = False for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]): if count < MINIMUM_PERMANENT_CSETS: if timestamp != -1: modified = True new_data.append((revnum, revision, -1)) else: new_data.append((revnum, revision, timestamp)) elif type(timestamp) != int or timestamp == -1: modified = True new_data.append((revnum, revision, int(time.time()))) else: new_data.append((revnum, revision, timestamp)) # Delete annotations at revisions with timestamps # that are too old. The csetLog entries will have # their timestamps reset here. new_data1 = [] annrevs_to_del = [] current_time = time.time() for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]): new_timestamp = timestamp if timestamp != -1: if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds: modified = True new_timestamp = current_time annrevs_to_del.append(revision) new_data1.append((revnum, revision, new_timestamp)) if len(annrevs_to_del) > 0: # Delete any latestFileMod and annotation entries # that are too old. Log.note( "Deleting annotations and latestFileMod for revisions for being " "older than {{oldest}}: {{revisions}}", oldest=TIME_TO_KEEP_ANNOTATIONS, revisions=annrevs_to_del ) with self.conn.transaction() as t: t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(annrevs_to_del) ) t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(annrevs_to_del) ) # Delete any overflowing entries new_data2 = new_data1 reved_all_data = all_data[::-1] deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:] delete_overflowing_revstart = None if len(deleted_data) > 0: _, delete_overflowing_revstart, _ = deleted_data[0] new_data2 = set(all_data) - set(deleted_data) # Update old frontiers if requested, otherwise # they will all get deleted by the csetLog_deleter # worker if UPDATE_VERY_OLD_FRONTIERS: _, max_revision, _ = all_data[-1] for _, revision, _ in deleted_data: with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision,) ) if old_files is None or len(old_files) <= 0: continue self.tuid_service.get_tuids_from_files( old_files, max_revision, going_forward=True, ) still_exist = True while still_exist and not please_stop: Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait() with self.conn.transaction() as t: old_files = t.get( "SELECT file FROM latestFileMod WHERE revision=?", (revision,) ) if old_files is None or len(old_files) <= 0: still_exist = False # Update table and schedule a deletion if modified: with self.conn.transaction() as t: insert_into_db_chunked( t, new_data2, "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES " ) if not deleted_data: continue Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data)) self.deletions_todo.add(delete_overflowing_revstart) except Exception as e: Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e) return def csetLog_deleter(self, please_stop=None): ''' Deletes changesets from the csetLog table and also changesets from the annotation table that have revisions matching the given changesets. Accepts lists of csets from self.deletions_todo. :param please_stop: :return: ''' while not please_stop: try: request = self.deletions_todo.pop(till=please_stop) if please_stop: break # If deletion is disabled, ignore the current # request - it will need to be re-requested. if self.disable_deletion: Till(till=CSET_DELETION_WAIT_TIME).wait() continue with self.working_locker: first_cset = request # Since we are deleting and moving stuff around in the # TUID tables, we need everything to be contained in # one transaction with no interruptions. with self.conn.transaction() as t: revnum = self._get_one_revnum(t, first_cset)[0] csets_to_del = t.get( "SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,) ) csets_to_del = [cset for _, cset in csets_to_del] existing_frontiers = t.query( "SELECT revision FROM latestFileMod WHERE revision IN " + quote_set(csets_to_del) ).data existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)] Log.note( "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}", csets=csets_to_del ) if len(existing_frontiers) > 0: # This handles files which no longer exist anymore in # the main branch. Log.note( "Deleting existing frontiers for revisions: {{revisions}}", revisions=existing_frontiers ) t.execute( "DELETE FROM latestFileMod WHERE revision IN " + quote_set(existing_frontiers) ) Log.note("Deleting annotations...") t.execute( "DELETE FROM annotations WHERE revision IN " + quote_set(csets_to_del) ) Log.note( "Deleting {{num_entries}} csetLog entries...", num_entries=len(csets_to_del) ) t.execute( "DELETE FROM csetLog WHERE revision IN " + quote_set(csets_to_del) ) # Recalculate the revnums self.recompute_table_revnums() except Exception as e: Log.warning("Unexpected error occured while deleting from csetLog:", cause=e) Till(seconds=CSET_DELETION_WAIT_TIME).wait() return def get_old_cset_revnum(self, revision): self.csets_todo_backwards.add((revision, True)) revnum = None timeout = Till(seconds=BACKFILL_REVNUM_TIMEOUT) while not timeout: with self.conn.transaction() as t: revnum = self._get_one_revnum(t, revision) if revnum and revnum[0] >= 0: break elif revnum[0] < 0: Log.note("Waiting for table to recompute...") else: Log.note("Waiting for backfill to complete...") Till(seconds=CSET_BACKFILL_WAIT_TIME).wait() if timeout: Log.error( "Cannot find revision {{rev}} after waiting {{timeout}} seconds", rev=revision, timeout=BACKFILL_REVNUM_TIMEOUT ) return revnum def get_revnnums_from_range(self, revision1, revision2): with self.conn.transaction() as t: revnum1 = self._get_one_revnum(t, revision1) revnum2 = self._get_one_revnum(t, revision2) if not revnum1 or not revnum2: did_an_update = self.update_tip() if did_an_update: with self.conn.transaction() as t: revnum1 = self._get_one_revnum(t, revision1) revnum2 = self._get_one_revnum(t, revision2) if not revnum1: revnum1 = self.get_old_cset_revnum(revision1) # Refresh the second entry with self.conn.transaction() as t: revnum2 = self._get_one_revnum(t, revision2) if not revnum2: revnum2 = self.get_old_cset_revnum(revision2) # The first revnum might change also with self.conn.transaction() as t: revnum1 = self._get_one_revnum(t, revision1) with self.conn.transaction() as t: result = self._get_revnum_range(t, revnum1[0], revnum2[0]) return sorted( result, key=lambda x: int(x[0]) )