def write_profiles(main_thread_profile): if cprofiler_stats is None: return from pyLibrary import convert from mo_files import File cprofiler_stats.add(pstats.Stats(main_thread_profile.cprofiler)) stats = cprofiler_stats.pop_all() Log.note("aggregating {{num}} profile stats", num=len(stats)) acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in iteritems(acc.stats)] stats_file = File(FILENAME, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats)) Log.note("profile written to {{filename}}", filename=stats_file.abspath)
def write_profiles(main_thread_profile): if cprofiler_stats is None: return from pyLibrary import convert from mo_files import File cprofiler_stats.add(pstats.Stats(main_thread_profile.cprofiler)) stats = cprofiler_stats.pop_all() Log.note("aggregating {{num}} profile stats", num=len(stats)) acc = stats[0] for s in stats[1:]: acc.add(s) stats = [ { "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in iteritems(acc.stats) ] stats_file = File(FILENAME, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats)) Log.note("profile written to {{filename}}", filename=stats_file.abspath)
def format_file_in_place(src, mode): """Format file under `src` path. Return True if changed. If `write_back` is DIFF, write a diff to stdout. If it is YES, write reformatted code to the file. `mode` and `fast` options are passed to :func:`format_file_contents`. """ file = File(src) file.write(format_str(file.read(), mode=mode)) return True
def __deploy__(): # ONLY MEANT TO BE RUN FOR DEPLOYMENT from mo_files import File source_file = File("moz_sql_parser/sql_parser.py") lines = source_file.read().split("\n") lines = [ "sys.setrecursionlimit(1500)" if line.startswith("sys.setrecursionlimit") else line for line in lines ] source_file.write("\n".join(lines))
class FakeES(): @override def __init__(self, filename, host="fake", index="fake", kwargs=None): self.settings = kwargs self.file = File(filename) self.cluster= Null try: self.data = mo_json.json2value(self.file.read()) except Exception as e: self.data = Data() def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}}) def extend(self, records): """ JUST SO WE MODEL A Queue """ records = { v["id"]: v["value"] if "value" in v else mo_json.json2value(v['json']) for v in records } for r in records.values(): try: del r['etl'] except Exception: pass unwrap(self.data).update(records) self.refresh() Log.note("{{num}} documents added", num=len(records)) def add(self, record): if is_list(record): Log.error("no longer accepting lists, use extend()") return self.extend([record]) def delete_record(self, filter): f = esfilter2where(filter) self.data = wrap({k: v for k, v in self.data.items() if not f(v)}) def refresh(self, *args, **kwargs): data_as_json = mo_json.value2json(self.data, pretty=True) self.file.write(data_as_json) def set_refresh_interval(self, seconds): pass
def _set_mtu(self, mtu=1500): # SET RIGHT NOW sudo("ifconfig eth0 mtu "+unicode(mtu)) # DESPITE THE FILE CHANGE, THE MTU VALUE DOES NOT STICK local_file = File("./results/temp/ifcfg-eth0") local_file.delete() get("/etc/sysconfig/network-scripts/ifcfg-eth0", "./results/temp/ifcfg-eth0", use_sudo=True) lines = local_file.read() if lines.find("MTU=1500") == -1: lines += "\nMTU=1500" local_file.write(lines) put("./results/temp/ifcfg-eth0", "/etc/sysconfig/network-scripts/ifcfg-eth0", use_sudo=True)
class FakeES(): @override def __init__(self, filename, host="fake", index="fake", kwargs=None): self.settings = kwargs self.file = File(filename) self.cluster= Null try: self.data = mo_json.json2value(self.file.read()) except Exception as e: self.data = Data() def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}}) def extend(self, records): """ JUST SO WE MODEL A Queue """ records = { v["id"]: v["value"] if "value" in v else mo_json.json2value(v['json']) for v in records } unwrap(self.data).update(records) self.refresh() Log.note("{{num}} documents added", num=len(records)) def add(self, record): if isinstance(record, list): Log.error("no longer accepting lists, use extend()") return self.extend([record]) def delete_record(self, filter): f = esfilter2where(filter) self.data = wrap({k: v for k, v in self.data.items() if not f(v)}) def refresh(self, *args, **kwargs): data_as_json = mo_json.value2json(self.data, pretty=True) self.file.write(data_as_json) def set_refresh_interval(self, seconds): pass
def _set_mtu(self, mtu=1500): # SET RIGHT NOW sudo("ifconfig eth0 mtu " + unicode(mtu)) # DESPITE THE FILE CHANGE, THE MTU VALUE DOES NOT STICK local_file = File("./results/temp/ifcfg-eth0") local_file.delete() get("/etc/sysconfig/network-scripts/ifcfg-eth0", "./results/temp/ifcfg-eth0", use_sudo=True) lines = local_file.read() if lines.find("MTU=1500") == -1: lines += "\nMTU=1500" local_file.write(lines) put("./results/temp/ifcfg-eth0", "/etc/sysconfig/network-scripts/ifcfg-eth0", use_sudo=True)
def write_profile(profile_settings, stats): from pyLibrary import convert from mo_files import File acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in acc.stats.iteritems()] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats))
def write_profile(profile_settings, stats): from pyLibrary import convert from mo_files import File acc = stats[0] for s in stats[1:]: acc.add(s) stats = [{ "num_calls": d[1], "self_time": d[2], "total_time": d[3], "self_time_per_call": d[2] / d[1], "total_time_per_call": d[3] / d[1], "file": (f[0] if f[0] != "~" else "").replace("\\", "/"), "line": f[1], "method": f[2].lstrip("<").rstrip(">") } for f, d, in acc.stats.iteritems() ] stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) stats_file.write(convert.list2tab(stats))
def write(profile_settings): from mo_files import File from mo_logs.convert import datetime2string from mo_math import MAX from pyLibrary.convert import list2tab profs = list(profiles.values()) for p in profs: p.stats = p.stats.end() stats = [{ "description": p.description, "num_calls": p.stats.count, "total_time": p.stats.count * p.stats.mean, "total_time_per_call": p.stats.mean } for p in profs if p.stats.count > 0] stats_file = File(profile_settings.filename, suffix=datetime2string(datetime.now(), "_%Y%m%d_%H%M%S")) if stats: stats_file.write(list2tab(stats)) else: stats_file.write("<no profiles>") stats_file2 = File(profile_settings.filename, suffix=datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S")) if not profs: return max_samples = MAX([len(p.samples) for p in profs if p.samples]) if not max_samples: return r = range(max_samples) profs.insert(0, Data(description="index", samples=r)) stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r] if stats: stats_file2.write(list2tab(stats))
def extract(self, db, start_point, first_value, data, please_stop): Log.note( "Starting scan of {{table}} at {{id}} and sending to batch {{start_point}}", table=self.settings.snowflake.fact_table, id=first_value, start_point=start_point) id = quote_column(self._extract.field.last()) ids = (SQL_SELECT + id + SQL_FROM + self.settings.snowflake.fact_table + SQL_WHERE + id + " in " + sql_iso(sql_list(map(db.quote_value, data)))) sql = self.schema.get_sql(ids) with Timer("Sending SQL"): cursor = db.query(sql, stream=True, row_tuples=True) extract = self.settings.extract fact_table = self.settings.snowflake.fact_table with TempFile() as temp_file: parent_etl = None for s in start_point: parent_etl = {"id": s, "source": parent_etl} parent_etl["revision"] = get_git_revision() parent_etl["machine"] = machine_metadata def append(value, i): """ :param value: THE DOCUMENT TO ADD :return: PleaseStop """ temp_file.append( convert.value2json({ fact_table: elasticsearch.scrub(value), "etl": { "id": i, "source": parent_etl, "timestamp": Date.now() } })) with Timer("assemble data"): self.construct_docs(cursor, append, please_stop) # WRITE TO S3 s3_file_name = ".".join(map(text_type, start_point)) with Timer("write to destination {{filename}}", param={"filename": s3_file_name}): if not isinstance(self.settings.destination, text_type): destination = self.bucket.get_key(s3_file_name, must_exist=False) destination.write_lines(temp_file) else: destination = File(self.settings.destination) destination.write( convert.value2json( [convert.json2value(o) for o in temp_file], pretty=True)) return False # NOTIFY SQS now = Date.now() self.notify.add({ "bucket": self.settings.destination.bucket, "key": s3_file_name, "timestamp": now.unix, "date/time": now.format() }) # SUCCESS!! File(extract.last).write(convert.value2json([start_point, first_value]))
class SpotManager(object): @override def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict(region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap( kwargs.aws.aws_secret_access_key)) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.no_capacity = {} self.no_capacity_file = File( kwargs.price_file).parent / "no capacity.json" self.done_making_new_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex( ("id", )) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce( self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce( Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce( Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce( self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required( ): self._start_life_cycle_watcher() if not disable_prices: self.pricing() def update_spot_requests(self): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([ r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN ]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[ a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending) remaining_budget = self.settings.budget - used_budget current_utility = coalesce( SUM(self.price_lookup[ r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) utility_required = self.instance_manager.required_utility( current_utility) net_new_utility = utility_required - current_utility Log.note( "have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money( remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = mo_math.min( net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = mo_math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances( net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget) # Give EC2 a chance to notice the new requests before tagging them. Till(seconds=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_making_new_spot_requests.go() def add_instances(self, net_new_utility, remaining_budget): prices = self.pricing() for p in prices: if net_new_utility <= 0 or remaining_budget <= 0: break if p.current_price == None: Log.note("{{type}} has no current price", type=p.type.instance_type) continue if self.settings.utility[p.type.instance_type].blacklist or \ p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones): Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone) continue # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount max_bid = mo_math.min(p.higher_price, max_acceptable_price, remaining_budget) min_bid = p.price_80 if min_bid > max_acceptable_price: Log.note( "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour", type=p.type.instance_type, price=min_bid, remaining=max_acceptable_price) continue elif min_bid > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour", type=p.type.instance_type, bid=min_bid, remaining_budget=remaining_budget) continue elif min_bid > max_bid: Log.error("not expected") naive_number_needed = int( mo_math.round(float(net_new_utility) / float(p.type.utility), decimal=0)) limit_total = None if self.settings.max_percent_per_type < 1: current_count = sum( 1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone) all_count = sum( 1 for a in self.active if a.launch_specification.placement == p.availability_zone) all_count = max(all_count, naive_number_needed) limit_total = int( mo_math.floor( (all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type))) num = mo_math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type) if num < 0: Log.note( "{{type}} is over {{limit|percent}} of instances, no more requested", limit=self.settings.max_percent_per_type, type=p.type.instance_type) continue elif num == 1: min_bid = mo_math.min( mo_math.max(p.current_price * 1.1, min_bid), max_acceptable_price) price_interval = 0 else: price_interval = mo_math.min(min_bid / 10, (max_bid - min_bid) / (num - 1)) for i in range(num): bid_per_machine = min_bid + (i * price_interval) if bid_per_machine < p.current_price: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, current_price=p.current_price) continue if bid_per_machine - p.type.discount > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, remaining=remaining_budget) continue last_no_capacity_message = self.no_capacity.get( p.type.instance_type, Null) if last_no_capacity_message > Date.now( ) - CAPACITY_NOT_AVAILABLE_RETRY: Log.note( "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}", type=p.type.instance_type, last_time=last_no_capacity_message) continue try: if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1: Log.error( "Spot Manager can only request machine one-at-a-time" ) new_requests = self._request_spot_instances( price=bid_per_machine, availability_zone_group=p.availability_zone, instance_type=p.type.instance_type, kwargs=copy(self.settings.ec2.request)) Log.note( "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour", num=len(new_requests), type=p.type.instance_type, zone=p.availability_zone, utility=p.type.utility, price=bid_per_machine) net_new_utility -= p.type.utility * len(new_requests) remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests) with self.net_new_locker: for ii in new_requests: self.net_new_spot_requests.add(ii) except Exception as e: Log.warning( "Request instance {{type}} failed because {{reason}}", type=p.type.instance_type, reason=e.message, cause=e) if "Max spot instance count exceeded" in e.message: Log.note("No further spot requests will be attempted.") return net_new_utility, remaining_budget return net_new_utility, remaining_budget def remove_instances(self, net_new_utility): instances = self.running_instances() # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE remove_list = [] for acceptable_error in range(0, 8): remaining_utility = -net_new_utility remove_list = FlatList() for s in instances: utility = coalesce(s.markup.type.utility, 0) if utility <= remaining_utility + acceptable_error: remove_list.append(s) remaining_utility -= utility if remaining_utility <= 0: net_new_utility = -remaining_utility break if not remove_list: return net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.note("Shutdown {{instances}}", instances=remove_list.id) remove_threads = [ Thread.run("teardown for " + text(i.id), self.instance_manager.teardown, i) for i in remove_list ] for t in remove_threads: try: t.join() except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests = remove_list.spot_instance_request_id # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests( request_ids=remove_spot_requests) return net_new_utility def running_instances(self): # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS instances = self._get_managed_instances() for r in instances: try: r.markup = self.price_lookup[r.instance_type, r.placement] except Exception as e: r.markup = self.price_lookup[r.instance_type, r.placement] Log.error("No pricing!!!", e) instances = jx.sort(instances, [{ "value": "markup.type.utility", "sort": -1 }, { "value": "markup.estimated_value", "sort": 1 }]) return instances def save_money(self, remaining_budget, net_new_utility): remove_spot_requests = wrap([]) # FIRST CANCEL THE PENDING REQUESTS if remaining_budget < 0: requests = self._get_managed_spot_requests() for r in requests: if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN: remove_spot_requests.append(r.id) net_new_utility += self.settings.utility[ r.launch_specification.instance_type].utility remaining_budget += r.price instances = jx.sort(self.running_instances(), "markup.estimated_value") remove_list = wrap([]) for s in instances: if remaining_budget >= 0: break remove_list.append(s) net_new_utility += coalesce(s.markup.type.utility, 0) remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price) if not remove_list: return remaining_budget, net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id) if ALLOW_SHUTDOWN: for g, removals in jx.chunk(remove_list, size=20): for i, t in [(i, Thread.run("teardown " + i.id, self.instance_manager.teardown, i, please_stop=False)) for i in removals]: try: t.join() except Exception: Log.note("Problem with shutdown of {{id}}", id=i.id) remove_spot_requests.extend(remove_list.spot_instance_request_id) # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests( request_ids=remove_spot_requests) return remaining_budget, net_new_utility @cache(duration=5 * SECOND) def _get_managed_spot_requests(self): output = wrap([ datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name) ]) return output def _get_managed_instances(self): requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter( lambda r: r.instance_id != None)) reservations = self.ec2_conn.get_all_instances() output = [] for res in reservations: for instance in res.instances: if instance.tags.get('Name', '').startswith( self.settings.ec2.instance.name ) and instance._state.name == "running": instance.request = requests[instance.id] output.append(datawrap(instance)) return wrap(output) def _start_life_cycle_watcher(self): failed_locker = Lock() failed_attempts = Data() def track_setup( instance_setup_function, request, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility, # THE utility OBJECT FOUND IN CONFIG please_stop): try: instance_setup_function(instance, utility, please_stop) instance.add_tag( "Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(request.id) except Exception as e: e = Except.wrap(e) instance.add_tag("Name", "") with failed_locker: failed_attempts[request.id] += [e] if "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: with failed_locker: causes = failed_attempts[request.id] if len(causes) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=instance.id, cause=causes) else: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e) def life_cycle_watcher(please_stop): bad_requests = Data() setup_threads = [] last_get = Date.now() setup_in_progress = set() while not please_stop: spot_requests = self._get_managed_spot_requests() instances = wrap({ i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances }) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and (not i.tags.get("Name") or i.tags.get( "Name") == self.settings.ec2.instance.name + " (setup)") and i.id not in setup_in_progress and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: if not time_to_stop_trying.get(i.id): time_to_stop_trying[ i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning( "Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id) continue try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances( instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error( "Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p i.add_tag("Name", self.settings.ec2.instance.name + " (setup)") setup_in_progress.add(i.id) t = Thread.run("setup for " + text(i.id), track_setup, self.instance_manager.setup, r, i, p) if SINGLE_THREAD_SETUP: t.join() setup_threads.append(t) except Exception as e: i.add_tag("Name", "") Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([ r for r in spot_requests if r.status.code in PENDING_STATUS_CODES ]) give_up = wrap([ r for r in spot_requests if (r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES) and r.id not in bad_requests ]) ignore = wrap([ r for r in spot_requests if r.status.code in MIGHT_HAPPEN ]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_making_new_spot_requests: with self.net_new_locker: expired = Date.now( ) - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id", ), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests( request_ids=give_up.id) Log.note( "Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) for g in give_up: bad_requests[g.id] += 1 if g.id in self.net_new_spot_requests: self.net_new_spot_requests.remove(g.id) if g.status.code == "capacity-not-available": self.no_capacity[ g.launch_specification. instance_type] = Date.now() if g.status.code == "bad-parameters": self.no_capacity[ g.launch_specification. instance_type] = Date.now() Log.warning( "bad parameters while requesting type {{type}}", type=g.launch_specification. instance_type) if not pending and self.done_making_new_spot_requests: Log.note("No more pending spot requests") break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() with Timer("Save no capacity to file"): table = [{ "instance_type": k, "last_failure": v } for k, v in self.no_capacity.items()] self.no_capacity_file.write(value2json(table, pretty=True)) # WAIT FOR SETUP TO COMPLETE for t in setup_threads: t.join() Log.note("life cycle watcher has stopped") # Log.warning("lifecycle watcher is disabled") timeout = Till(seconds=self.settings.run_interval.seconds - 60) self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout) def _get_valid_availability_zones(self): subnets = list( self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request. network_interfaces.subnet_id)) zones_with_interfaces = [s.availability_zone for s in subnets] if self.settings.availability_zone: # If they pass a list of zones, constrain it by zones we have an # interface for. return set(zones_with_interfaces) & set( listwrap(self.settings.availability_zone)) else: # Otherwise, use all available zones. return zones_with_interfaces @override def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs): kwargs.self = None kwargs.kwargs = None # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP if instance_type.startswith("m3."): kwargs.placement_group = None kwargs.network_interfaces = NetworkInterfaceCollection( *(NetworkInterfaceSpecification(**i) for i in listwrap(kwargs.network_interfaces) if self.vpc_conn.get_all_subnets( subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group}))) if len(kwargs.network_interfaces) == 0: Log.error( "No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group) block_device_map = BlockDeviceMapping() # GENERIC BLOCK DEVICE MAPPING for dev, dev_settings in kwargs.block_device_map.items(): block_device_map[dev] = BlockDeviceType(delete_on_termination=True, **dev_settings) kwargs.block_device_map = block_device_map # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping num_ephemeral_volumes = ephemeral_storage[instance_type]["num"] for i in range(num_ephemeral_volumes): letter = convert.ascii2char(98 + i) # START AT "b" kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType( ephemeral_name='ephemeral' + text(i), delete_on_termination=True) if kwargs.expiration: kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601) kwargs.expiration = None # ATTACH NEW EBS VOLUMES for i, drive in enumerate(self.settings.utility[instance_type].drives): letter = convert.ascii2char(98 + i + num_ephemeral_volumes) device = drive.device = coalesce(drive.device, "/dev/sd" + letter) d = drive.copy() d.path = None # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType d.device = None if d.size: kwargs.block_device_map[device] = BlockDeviceType( delete_on_termination=True, **d) output = list(self.ec2_conn.request_spot_instances(**kwargs)) return output def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": { "coalesce": [{ "rows": { "timestamp": 1 } }, { "date": "eod" }] }, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": { "sub": { "timestamp": self.settings.uptime.duration.seconds } } } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": { "min": "effective", "max": "expire", "mode": "inclusive" }, "allowNulls": False, "domain": { "type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR) + HOUR, "interval": "hour" } } ], "select": [{ "value": "price", "aggregate": "max" }, { "aggregate": "count" }], "where": { "gt": { "expire": now.floor(HOUR) - self.settings.uptime.history } }, "window": [{ "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" }] }).data bid80 = jx.run({ "from": ListContainer(name=None, data=hourly_pricing), "edges": [{ "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": { "type": "set", "key": "instance_type", "partitions": self.settings.utility } }], "select": [{ "name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile }, { "name": "max_price", "value": "price", "aggregate": "max" }, { "aggregate": "count" }, { "value": "current_price", "aggregate": "one" }, { "name": "all_price", "value": "price", "aggregate": "list" }], "window": [ { "name": "estimated_value", "value": { "div": ["type.utility", "price_80"] } }, { "name": "higher_price", "value": lambda row, rownum, rows: find_higher( row.all_price, row.price_80) } # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.sort(bid80.values(), { "value": "estimated_value", "sort": -1 }) self.prices = wrap(output) self.price_lookup = UniqueIndex( ("type.instance_type", "availability_zone"), data=self.prices) return self.prices def _get_spot_prices_from_aws(self): with Timer("Read no capacity file"): try: # FILE IS LIST OF {instance_type, last_failure} OBJECTS content = self.no_capacity_file.read() self.no_capacity = dict( (r.instance_type, r.last_failure) for r in convert.json2value( content, flexible=False, leaves=False)) except Exception as e: self.no_capacity = {} with Timer("Read pricing file"): try: content = File(self.settings.price_file).read() cache = convert.json2value(content, flexible=False, leaves=False) except Exception as e: cache = FlatList() cache = ListContainer(name=None, data=cache) most_recents = jx.run({ "from": cache, "edges": ["instance_type", "availability_zone"], "select": { "value": "timestamp", "aggregate": "max" } }) zones = self._get_valid_availability_zones() prices = set(cache) with Timer("Get pricing from AWS"): for instance_type in self.settings.utility.keys(): for zone in zones: if cache: most_recent = most_recents[{ "instance_type": instance_type, "availability_zone": zone }].timestamp start_at = MAX( [Date(most_recent), Date.today() - WEEK]) else: start_at = Date.today() - WEEK if DEBUG_PRICING: Log.note( "get pricing for {{instance_type}} starting at {{start_at}}", instance_type=instance_type, start_at=start_at) next_token = None while True: resultset = self.ec2_conn.get_spot_price_history( product_description=coalesce( self.settings.product, "Linux/UNIX (Amazon VPC)"), instance_type=instance_type, availability_zone=zone, start_time=start_at.format(ISO8601), next_token=next_token) next_token = resultset.next_token for p in resultset: prices.add( wrap({ "availability_zone": p.availability_zone, "instance_type": p.instance_type, "price": p.price, "product_description": p.product_description, "region": p.region.name, "timestamp": Date(p.timestamp).unix })) if not next_token: break with Timer("Save prices to file"): new_prices = jx.filter( prices, {"gte": { "timestamp": { "date": "today-2day" } }}) def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]" File(self.settings.price_file).write(stream()) return ListContainer(name="prices", data=prices)