def get_schema_from_list(table_name, frum): """ SCAN THE LIST FOR COLUMN TYPES """ columns = UniqueIndex(keys=("names.\\.",)) _get_schema_from_list(frum, ".", prefix_path=[], nested_path=ROOT_PATH, columns=columns) return Schema(table_name=table_name, columns=columns)
def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict( region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key) ) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.done_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex(("id",)) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(): self._start_life_cycle_watcher() if not disable_prices: self.pricing()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with SingleInstance(flavor_id=settings.args.filename): settings.run_interval = Duration(settings.run_interval) for u in settings.utility: u.discount = coalesce(u.discount, 0) # MARKUP drives WITH EXPECTED device MAPPING num_ephemeral_volumes = ephemeral_storage[ u.instance_type]["num"] for i, d in enumerate(d for d in u.drives if not d.device): letter = convert.ascii2char(98 + num_ephemeral_volumes + i) d.device = "/dev/xvd" + letter settings.utility = UniqueIndex(["instance_type"], data=settings.utility) instance_manager = new_instance(settings.instance) m = SpotManager(instance_manager, kwargs=settings) if ENABLE_SIDE_EFFECTS: m.update_spot_requests() if m.watcher: m.watcher.join() except Exception as e: Log.warning("Problem with spot manager", cause=e) finally: Log.stop() MAIN_THREAD.stop()
def get_branches(hg, branches, kwargs=None): # TRY ES try: es = elasticsearch.Cluster(kwargs=branches).get_index(kwargs=branches, read_only=False) query = { "query": {"match_all": {}}, "size": 10000 } found_branches = es.search(query).hits.hits._source # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(MAX(found_branches.etl.timestamp)) if oldest == None or Date.now() - oldest > OLD_BRANCH: found_branches = _get_branches_from_hg(hg) es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) es.flush() try: return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) except Exception as e: Log.error("Bad branch in ES index", cause=e) except Exception as e: if "Can not find index " in e: set_default(branches, {"schema": branches_schema}) es = elasticsearch.Cluster(kwargs=branches).get_or_create_index(kwargs=branches) es.add_alias() return get_branches(kwargs=kwargs) Log.error("problem getting branches", cause=e)
def get_branches(hg, branches, kwargs=None): # TRY ES cluster = elasticsearch.Cluster(branches) try: es = cluster.get_index(kwargs=branches, read_only=False) esq = jx_elasticsearch.new_instance(branches) found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(MAX(found_branches.etl.timestamp)) if oldest == None or Date.now() - oldest > OLD_BRANCH: found_branches = _get_branches_from_hg(hg) es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) es.flush() try: return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) except Exception as e: Log.error("Bad branch in ES index", cause=e) except Exception as e: e = Except.wrap(e) if "Can not find index " in e: set_default(branches, {"schema": branches_schema}) es = cluster.get_or_create_index(branches) es.add_alias() return get_branches(kwargs) Log.error("problem getting branches", cause=e)
def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict(region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap( kwargs.aws.aws_secret_access_key)) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.no_capacity = {} self.no_capacity_file = File( kwargs.price_file).parent / "no capacity.json" self.done_making_new_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex( ("id", )) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce( self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce( Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce( Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce( self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required( ): self._start_life_cycle_watcher() if not disable_prices: self.pricing()
def _get_managed_instances(self): requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter( lambda r: r.instance_id != None)) reservations = self.ec2_conn.get_all_instances() output = [] for res in reservations: for instance in res.instances: if instance.tags.get('Name', '').startswith( self.settings.ec2.instance.name ) and instance._state.name == "running": instance.request = requests[instance.id] output.append(datawrap(instance)) return wrap(output)
def get_schema_from_list(table_name, frum, native_type_to_json_type=python_type_to_json_type): """ SCAN THE LIST FOR COLUMN TYPES """ columns = UniqueIndex(keys=("name", )) _get_schema_from_list( frum, ".", parent=".", nested_path=ROOT_PATH, columns=columns, native_type_to_json_type=native_type_to_json_type, ) return Schema(table_name=table_name, columns=list(columns))
def _get_branches_from_hg(kwarg): # GET MAIN PAGE response = http.get(kwarg.url) doc = BeautifulSoup(response.all_content, "html.parser") all_repos = doc("table")[1] branches = UniqueIndex(["name", "locale"], fail_on_dup=False) for i, r in enumerate(all_repos("tr")): dir, name = [v.text.strip() for v in r("td")] b = _get_single_branch_from_hg(kwarg, name, dir.lstrip("/")) branches.extend(b) # branches.add(set_default({"name": "release-mozilla-beta"}, branches["mozilla-beta", DEFAULT_LOCALE])) for b in list(branches["mozilla-beta", ]): branches.add(set_default({"name": "release-mozilla-beta"}, b)) # THIS IS THE l10n "name" b.url = "https://hg.mozilla.org/releases/mozilla-beta" # THIS IS THE for b in list(branches["mozilla-release", ]): branches.add(set_default({"name": "release-mozilla-release"}, b)) for b in list(branches["mozilla-aurora", ]): if b.locale == "en-US": continue branches.add(set_default({"name": "comm-aurora"}, b)) # b.url = "https://hg.mozilla.org/releases/mozilla-aurora" for b in list(branches): if b.name.startswith("mozilla-esr"): branches.add(set_default({"name": "release-" + b.name}, b)) # THIS IS THE l10n "name" b.url = "https://hg.mozilla.org/releases/" + b.name #CHECKS for b in branches: if b.name != b.name.lower(): Log.error("Expecting lowercase name") if not b.locale: Log.error("Not expected") if not b.url.startswith("http"): Log.error("Expecting a valid url") if not b.etl.timestamp: Log.error("Expecting a timestamp") return branches
class SpotManager(object): @override def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict( region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap(kwargs.aws.aws_secret_access_key) ) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.done_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex(("id",)) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce(self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce(Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce(Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce(self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required(): self._start_life_cycle_watcher() if not disable_prices: self.pricing() def update_spot_requests(self, utility_required): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount ) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending ) remaining_budget = self.settings.budget - used_budget current_utility = coalesce(SUM(self.price_lookup[r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) net_new_utility = utility_required - current_utility Log.note("have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money(remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = Math.min(net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = Math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances(net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget ) # Give EC2 a chance to notice the new requests before tagging them. Till(timeout=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_spot_requests.go() def add_instances(self, net_new_utility, remaining_budget): prices = self.pricing() for p in prices: if net_new_utility <= 0 or remaining_budget <= 0: break if p.current_price == None: Log.note("{{type}} has no current price", type=p.type.instance_type ) continue if self.settings.utility[p.type.instance_type].blacklist or \ p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones): Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone) continue # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount max_bid = Math.min(p.higher_price, max_acceptable_price, remaining_budget) min_bid = p.price_80 if min_bid > max_acceptable_price: Log.note( "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour", type=p.type.instance_type, price=min_bid, remaining=max_acceptable_price ) continue elif min_bid > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour", type=p.type.instance_type, bid=min_bid, remaining_budget=remaining_budget ) continue elif min_bid > max_bid: Log.error("not expected") naive_number_needed = int(Math.round(float(net_new_utility) / float(p.type.utility), decimal=0)) limit_total = None if self.settings.max_percent_per_type < 1: current_count = sum(1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone) all_count = sum(1 for a in self.active if a.launch_specification.placement == p.availability_zone) all_count = max(all_count, naive_number_needed) limit_total = int(Math.floor((all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type))) num = Math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type) if num < 0: Log.note( "{{type}} is over {{limit|percent}} of instances, no more requested", limit=self.settings.max_percent_per_type, type=p.type.instance_type ) continue elif num == 1: min_bid = Math.min(Math.max(p.current_price * 1.1, min_bid), max_acceptable_price) price_interval = 0 else: price_interval = Math.min(min_bid / 10, (max_bid - min_bid) / (num - 1)) for i in range(num): bid_per_machine = min_bid + (i * price_interval) if bid_per_machine < p.current_price: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, current_price=p.current_price ) continue if bid_per_machine - p.type.discount > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, remaining=remaining_budget ) continue try: if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1: Log.error("Spot Manager can only request machine one-at-a-time") new_requests = self._request_spot_instances( price=bid_per_machine, availability_zone_group=p.availability_zone, instance_type=p.type.instance_type, kwargs=copy(self.settings.ec2.request) ) Log.note( "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour", num=len(new_requests), type=p.type.instance_type, zone=p.availability_zone, utility=p.type.utility, price=bid_per_machine ) net_new_utility -= p.type.utility * len(new_requests) remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests) with self.net_new_locker: for ii in new_requests: self.net_new_spot_requests.add(ii) except Exception as e: Log.warning( "Request instance {{type}} failed because {{reason}}", type=p.type.instance_type, reason=e.message, cause=e ) if "Max spot instance count exceeded" in e.message: Log.note("No further spot requests will be attempted.") return net_new_utility, remaining_budget return net_new_utility, remaining_budget def remove_instances(self, net_new_utility): instances = self.running_instances() # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE remove_list = [] for acceptable_error in range(0, 8): remaining_utility = -net_new_utility remove_list = FlatList() for s in instances: utility = coalesce(s.markup.type.utility, 0) if utility <= remaining_utility + acceptable_error: remove_list.append(s) remaining_utility -= utility if remaining_utility <= 0: net_new_utility = -remaining_utility break if not remove_list: return net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.note("Shutdown {{instances}}", instances=remove_list.id) for i in remove_list: try: self.instance_manager.teardown(i) except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests = remove_list.spot_instance_request_id # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests) return net_new_utility def running_instances(self): # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS instances = self._get_managed_instances() for r in instances: try: r.markup = self.price_lookup[r.instance_type, r.placement] except Exception as e: r.markup = self.price_lookup[r.instance_type, r.placement] Log.error("No pricing!!!", e) instances = jx.sort(instances, [ {"value": "markup.type.utility", "sort": -1}, {"value": "markup.estimated_value", "sort": 1} ]) return instances def save_money(self, remaining_budget, net_new_utility): remove_spot_requests = wrap([]) # FIRST CANCEL THE PENDING REQUESTS if remaining_budget < 0: requests = self._get_managed_spot_requests() for r in requests: if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN: remove_spot_requests.append(r.id) net_new_utility += self.settings.utility[r.launch_specification.instance_type].utility remaining_budget += r.price instances = jx.sort(self.running_instances(), "markup.estimated_value") remove_list = wrap([]) for s in instances: if remaining_budget >= 0: break remove_list.append(s) net_new_utility += coalesce(s.markup.type.utility, 0) remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price) if not remove_list: return remaining_budget, net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id) for i in remove_list: try: self.instance_manager.teardown(i) except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests.extend(remove_list.spot_instance_request_id) # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests(request_ids=remove_spot_requests) return remaining_budget, net_new_utility @cache(duration=5 * SECOND) def _get_managed_spot_requests(self): output = wrap([datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name)]) return output def _get_managed_instances(self): requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter(lambda r: r.instance_id!=None)) reservations = self.ec2_conn.get_all_instances() output = [] for res in reservations: for instance in res.instances: if instance.tags.get('Name', '').startswith(self.settings.ec2.instance.name) and instance._state.name == "running": instance.request = requests[instance.id] output.append(datawrap(instance)) return wrap(output) def _start_life_cycle_watcher(self): def life_cycle_watcher(please_stop): failed_attempts=Data() while not please_stop: spot_requests = self._get_managed_spot_requests() last_get = Date.now() instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances}) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p try: self.instance_manager.setup(i, coalesce(p, 0)) except Exception as e: e = Except.wrap(e) failed_attempts[r.id] += [e] Log.error(ERROR_ON_CALL_TO_SETUP, e) i.add_tag("Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(r.id) except Exception as e: if not time_to_stop_trying.get(i.id): time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning("Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id, cause=e) elif "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: if len(failed_attempts[r.id]) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id]) else: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES]) give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES]) ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_spot_requests: with self.net_new_locker: expired = Date.now() - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in give_up: self.net_new_spot_requests.remove(g.id) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id",), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id) Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) if not pending and not time_to_stop_trying and self.done_spot_requests: Log.note("No more pending spot requests") please_stop.go() break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() Log.note("life cycle watcher has stopped") # Log.warning("lifecycle watcher is disabled") timeout = Till(seconds=self.settings.run_interval.seconds - 60) self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout) def _get_valid_availability_zones(self): subnets = list(self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request.network_interfaces.subnet_id)) zones_with_interfaces = [s.availability_zone for s in subnets] if self.settings.availability_zone: # If they pass a list of zones, constrain it by zones we have an # interface for. return set(zones_with_interfaces) & set(listwrap(self.settings.availability_zone)) else: # Otherwise, use all available zones. return zones_with_interfaces @override def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs): kwargs.kwargs = None # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP if instance_type.startswith("m3."): kwargs.placement_group = None kwargs.network_interfaces = NetworkInterfaceCollection(*( NetworkInterfaceSpecification(**i) for i in listwrap(kwargs.network_interfaces) if self.vpc_conn.get_all_subnets(subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group}) )) if len(kwargs.network_interfaces) == 0: Log.error("No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group) block_device_map = BlockDeviceMapping() # GENERIC BLOCK DEVICE MAPPING for dev, dev_settings in kwargs.block_device_map.items(): block_device_map[dev] = BlockDeviceType( delete_on_termination=True, **dev_settings ) kwargs.block_device_map = block_device_map # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping num_ephemeral_volumes = ephemeral_storage[instance_type]["num"] for i in range(num_ephemeral_volumes): letter = convert.ascii2char(98 + i) # START AT "b" kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType( ephemeral_name='ephemeral' + unicode(i), delete_on_termination=True ) if kwargs.expiration: kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601) kwargs.expiration = None # ATTACH NEW EBS VOLUMES for i, drive in enumerate(self.settings.utility[instance_type].drives): letter = convert.ascii2char(98 + i + num_ephemeral_volumes) device = drive.device = coalesce(drive.device, "/dev/sd" + letter) d = drive.copy() d.path = None # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType d.device = None if d.size: kwargs.block_device_map[device] = BlockDeviceType( delete_on_termination=True, **d ) output = list(self.ec2_conn.request_spot_instances(**kwargs)) return output def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() expressions.ALLOW_SCRIPTING = True with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": {"coalesce": [{"rows": {"timestamp": 1}}, {"date": "eod"}]}, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": {"sub": {"timestamp": self.settings.uptime.duration.seconds}} } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": {"min": "effective", "max": "expire", "mode": "inclusive"}, "allowNulls": False, "domain": {"type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR)+HOUR, "interval": "hour"} } ], "select": [ {"value": "price", "aggregate": "max"}, {"aggregate": "count"} ], "where": {"gt": {"expire": now.floor(HOUR) - self.settings.uptime.history}}, "window": [ { "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" } ] }).data bid80 = jx.run({ "from": hourly_pricing, "edges": [ { "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": {"type": "set", "key": "instance_type", "partitions": self.settings.utility} } ], "select": [ {"name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile}, {"name": "max_price", "value": "price", "aggregate": "max"}, {"aggregate": "count"}, {"value": "current_price", "aggregate": "one"}, {"name": "all_price", "value": "price", "aggregate": "list"} ], "window": [ {"name": "estimated_value", "value": {"div": ["type.utility", "price_80"]}}, {"name": "higher_price", "value": lambda row, rownum, rows: find_higher(row.all_price, row.price_80)} # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.run({ "from": bid80, "sort": {"value": "estimated_value", "sort": -1} }) self.prices = wrap(output.data) self.price_lookup = UniqueIndex(("type.instance_type", "availability_zone"), data=self.prices) return self.prices def _get_spot_prices_from_aws(self): with Timer("Read pricing file"): try: content = File(self.settings.price_file).read() cache = convert.json2value(content, flexible=False, leaves=False) except Exception as e: cache = FlatList() most_recents = jx.run({ "from": cache, "edges": ["instance_type", "availability_zone"], "select": {"value": "timestamp", "aggregate": "max"} }) zones = self._get_valid_availability_zones() prices = set(cache) with Timer("Get pricing from AWS"): for instance_type in self.settings.utility.keys(): for zone in zones: if cache: most_recent = most_recents[{ "instance_type": instance_type, "availability_zone": zone }].timestamp start_at = MAX([Date(most_recent), Date.today() - WEEK]) else: start_at = Date.today() - WEEK if DEBUG_PRICING: Log.note("get pricing for {{instance_type}} starting at {{start_at}}", instance_type=instance_type, start_at=start_at ) next_token = None while True: resultset = self.ec2_conn.get_spot_price_history( product_description=coalesce(self.settings.product, "Linux/UNIX (Amazon VPC)"), instance_type=instance_type, availability_zone=zone, start_time=start_at.format(ISO8601), next_token=next_token ) next_token = resultset.next_token for p in resultset: prices.add(wrap({ "availability_zone": p.availability_zone, "instance_type": p.instance_type, "price": p.price, "product_description": p.product_description, "region": p.region.name, "timestamp": Date(p.timestamp).unix })) if not next_token: break with Timer("Save prices to file"): new_prices = jx.filter(prices, {"gte": {"timestamp": {"date": "today-2day"}}}) def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]" File(self.settings.price_file).write(stream()) return prices
def _scan_database(self): # GET ALL RELATIONS raw_relations = self.db.query( """ SELECT table_schema, table_name, referenced_table_schema, referenced_table_name, referenced_column_name, constraint_name, column_name, ordinal_position FROM information_schema.key_column_usage WHERE referenced_column_name IS NOT NULL """, param=self.settings.database, ) if not raw_relations: Log.error("No relations in the database") for r in self.settings.add_relations: try: lhs, rhs = map(strings.trim, r.split("->")) lhs = lhs.split(".") if len(lhs) == 2: lhs = [self.settings.database.schema] + lhs rhs = rhs.split(".") if len(rhs) == 2: rhs = [self.settings.database.schema] + rhs to_add = Data( ordinal_position=1, # CAN ONLY HANDLE 1-COLUMN RELATIONS table_schema=lhs[0], table_name=lhs[1], column_name=lhs[2], referenced_table_schema=rhs[0], referenced_table_name=rhs[1], referenced_column_name=rhs[2], ) # CHECK IF EXISTING if jx.filter(raw_relations, {"eq": to_add}): Log.note("Relation {{relation}} already exists", relation=r) continue to_add.constraint_name = Random.hex(20) raw_relations.append(to_add) except Exception as e: Log.error("Could not parse {{line|quote}}", line=r, cause=e) relations = jx.select( raw_relations, [ { "name": "constraint.name", "value": "constraint_name" }, { "name": "table.schema", "value": "table_schema" }, { "name": "table.name", "value": "table_name" }, { "name": "column.name", "value": "column_name" }, { "name": "referenced.table.schema", "value": "referenced_table_schema" }, { "name": "referenced.table.name", "value": "referenced_table_name" }, { "name": "referenced.column.name", "value": "referenced_column_name" }, { "name": "ordinal_position", "value": "ordinal_position" }, ], ) # GET ALL TABLES raw_tables = self.db.query(""" SELECT t.table_schema, t.table_name, c.constraint_name, c.constraint_type, k.column_name, k.ordinal_position FROM information_schema.tables t LEFT JOIN information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY') LEFT JOIN information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema ORDER BY t.table_schema, t.table_name, c.constraint_name, k.ordinal_position, k.column_name """) # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING tables = UniqueIndex(keys=["name", "schema"]) for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]): c = wrap(list(c)) best_index = Null is_referenced = False is_primary = False for g, w in jx.groupby(c, "constraint_name"): if not g.constraint_name: continue w = list(w) ref = False for r in relations: if (r.table.name == t.table_name and r.table.schema == t.table_schema and r.constraint.name == g.constraint_name): ref = True is_prime = w[0].constraint_type == "PRIMARY" reasons_this_one_is_better = [ best_index == None, # WE DO NOT HAVE A CANDIDATE YET is_prime and not is_primary, # PRIMARY KEYS ARE GOOD TO HAVE is_primary == is_prime and ref and not is_referenced, # REFERENCED UNIQUE TUPLES ARE GOOD TOO is_primary == is_prime and ref == is_referenced and len(w) < len(best_index), # THE SHORTER THE TUPLE, THE BETTER ] if any(reasons_this_one_is_better): is_primary = is_prime is_referenced = ref best_index = w tables.add({ "name": t.table_name, "schema": t.table_schema, "id": [b.column_name for b in best_index], }) fact_table = tables[self.settings.fact_table, self.settings.database.schema] ids_table = { "alias": "t0", "name": "__ids__", "schema": fact_table.schema, "id": fact_table.id, } relations.extend( wrap({ "constraint": { "name": "__link_ids_to_fact_table__" }, "table": ids_table, "column": { "name": c }, "referenced": { "table": fact_table, "column": { "name": c } }, "ordinal_position": i, }) for i, c in enumerate(fact_table.id)) tables.add(ids_table) # GET ALL COLUMNS raw_columns = self.db.query(""" SELECT column_name, table_schema, table_name, ordinal_position, data_type FROM information_schema.columns """) reference_only_tables = [ r.split(".")[0] for r in self.settings.reference_only if len(r.split(".")) == 2 ] reference_all_tables = [ r.split(".")[0] for r in self.settings.reference_only if len(r.split(".")) == 1 ] foreign_column_table_schema_triples = {(r.column.name, r.table.name, r.table.schema) for r in relations} referenced_column_table_schema_triples = {( r.referenced.column.name, r.referenced.table.name, r.referenced.table.schema, ) for r in relations} related_column_table_schema_triples = ( foreign_column_table_schema_triples | referenced_column_table_schema_triples) columns = UniqueIndex(["column.name", "table.name", "table.schema"]) for c in raw_columns: if c.table_name in reference_only_tables: if c.table_name + "." + c.column_name in self.settings.reference_only: include = True reference = True foreign = False elif c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = False foreign = False else: include = False reference = False foreign = False elif c.table_name in reference_all_tables: # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED if c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = True foreign = False elif ( c.column_name, c.table_name, c.table_schema, ) in foreign_column_table_schema_triples: include = False reference = False foreign = True else: include = True reference = False foreign = False elif c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = False foreign = False elif ( c.column_name, c.table_name, c.table_schema, ) in foreign_column_table_schema_triples: include = False reference = False foreign = True elif ( c.column_name, c.table_name, c.table_schema, ) in referenced_column_table_schema_triples: include = self.settings.show_foreign_keys reference = False foreign = False else: include = True reference = False foreign = False rel = { "column": { "name": c.column_name, "type": c.data_type }, "table": { "name": c.table_name, "schema": c.table_schema }, "ordinal_position": c.ordinal_position, "is_id": c.column_name in tables[(c.table_name, c.table_schema)].id, "include": include, # TRUE IF THIS COLUMN IS OUTPUTTED "reference": reference, # TRUE IF THIS COLUMN REPRESENTS THE ROW "foreign": foreign, # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW } columns.add(rel) # ITERATE OVER ALL PATHS todo = FlatList() output_columns = FlatList() nested_path_to_join = {} all_nested_paths = [["."]] def follow_paths(position, path, nested_path, done_relations, no_nested_docs): if position.name in self.settings.exclude: return if self.path_not_allowed(path): return if DEBUG: Log.note("Trace {{path}}", path=path) if position.name != "__ids__": # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS) self.db.query( ConcatSQL( SQL_SELECT, SQL_STAR, SQL_FROM, quote_column(position.schema, position.name), SQL_LIMIT, SQL_ONE, )) if position.name in reference_all_tables: no_nested_docs = True if position.name in reference_only_tables: return curr_join_list = copy(nested_path_to_join[nested_path[0]]) ############################################################################### # INNER OBJECTS ############################################################################### referenced_tables = list( sort_using_key( jx.groupby( jx.filter( relations, { "eq": { "table.name": position.name, "table.schema": position.schema, } }, ), "constraint.name", ), key=lambda p: first(p[1]).column.name, )) for g, constraint_columns in referenced_tables: g = unwrap(g) constraint_columns = deepcopy(constraint_columns) if g["constraint.name"] in done_relations: continue if any(cc for cc in constraint_columns if cc.referenced.table.name in self.settings.exclude): continue done_relations.add(g["constraint.name"]) many_to_one_joins = nested_path_to_join[nested_path[0]] index = len(many_to_one_joins) alias = "t" + text(index) for c in constraint_columns: c.referenced.table.alias = alias c.table = position many_to_one_joins.append({ "join_columns": constraint_columns, "path": path, "nested_path": nested_path, }) # HANDLE THE COMMON *id SUFFIX name = [] for cname, tname in zip( constraint_columns.column.name, constraint_columns.referenced.table.name, ): if cname.startswith(tname): name.append(tname) elif cname.endswith("_id"): name.append(cname[:-3]) else: name.append(cname) relation_string = many_to_one_string(constraint_columns[0]) step = "/".join(name) if len(constraint_columns) == 1: step = self.name_relations.get(relation_string, step) referenced_column_path = concat_field(path, step) if self.path_not_allowed(referenced_column_path): continue if referenced_column_path in reference_only_tables: continue col_pointer_name = relative_field(referenced_column_path, nested_path[0]) for col in columns: if (col.table.name == constraint_columns[0].referenced.table.name and col.table.schema == constraint_columns[0].referenced.table.schema): col_full_name = concat_field( col_pointer_name, literal_field(col.column.name)) if (col.is_id and len(nested_path) == 1 and col.table.name == fact_table.name and col.table.schema == fact_table.schema): # ALWAYS SHOW THE ID OF THE FACT c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": True, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name, }) elif col.column.name == constraint_columns[ 0].column.name: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name if self.settings.show_foreign_keys else None, }) elif col.is_id: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name if self.settings.show_foreign_keys else None, }) elif col.reference: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_pointer_name if not self.settings.show_foreign_keys else col_full_name, # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED }) elif col.include: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name, }) if position.name in reference_only_tables: continue todo.append( Data( position=copy(constraint_columns[0].referenced.table), path=referenced_column_path, nested_path=nested_path, done_relations=copy(done_relations), no_nested_docs=no_nested_docs, )) ############################################################################### # NESTED OBJECTS ############################################################################### if not no_nested_docs: nesting_tables = list( sort_using_key( jx.groupby( jx.filter( relations, { "eq": { "referenced.table.name": position.name, "referenced.table.schema": position.schema, } }, ), "constraint.name", ), key=lambda p: [(r.table.name, r.column.name) for r in [first(p[1])]][0], )) for g, constraint_columns in nesting_tables: g = unwrap(g) constraint_columns = deepcopy(constraint_columns) if g["constraint.name"] in done_relations: continue done_relations.add(g["constraint.name"]) many_table = set(constraint_columns.table.name) if not (many_table - self.settings.exclude): continue relation_string = one_to_many_string(constraint_columns[0]) step = "/".join(many_table) if len(constraint_columns) == 1: step = self.name_relations.get(relation_string, step) referenced_column_path = concat_field(path, step) if self.path_not_allowed(referenced_column_path): continue new_nested_path = [referenced_column_path] + nested_path all_nested_paths.append(new_nested_path) if referenced_column_path in nested_path_to_join: Log.error( "{{path}} already exists, try adding entry to name_relations", path=referenced_column_path, ) one_to_many_joins = nested_path_to_join[ referenced_column_path] = copy(curr_join_list) index = len(one_to_many_joins) alias = "t" + text(index) for c in constraint_columns: c.table.alias = alias c.referenced.table = position one_to_many_joins.append( set_default( {}, g, { "children": True, "join_columns": constraint_columns, "path": path, "nested_path": nested_path, }, )) for col in columns: if (col.table.name == constraint_columns[0].table.name and col.table.schema == constraint_columns[0].table.schema): col_full_name = join_field( split_field(referenced_column_path) [len(split_field(new_nested_path[0])):] + [literal_field(col.column.name)]) if col.column.name == constraint_columns[ 0].column.name: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if self.settings.show_foreign_keys else None, }) elif col.is_id: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if self.settings.show_foreign_keys else None, }) else: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if col.include else None, }) todo.append( Data( position=constraint_columns[0].table, path=referenced_column_path, nested_path=new_nested_path, done_relations=copy(done_relations), no_nested_docs=no_nested_docs, )) path = "." nested_path = [path] nested_path_to_join["."] = [{ "path": path, "join_columns": [{ "referenced": { "table": ids_table } }], "nested_path": nested_path, }] todo.append( Data( position=ids_table, path=path, nested_path=nested_path, done_relations=set(), no_nested_docs=False, )) while todo: item = todo.pop(0) follow_paths(**item) self.all_nested_paths = all_nested_paths self.nested_path_to_join = nested_path_to_join self.columns = output_columns
return self def get_schema(self, name): if self.name != name: Log.error("This container only has table by name of {{name}}", name=name) return self.schema def get_table(self, name): if self is name or self.name == name: return self Log.error("This container only has table by name of {{name}}", name=name) def _exec(code): try: temp = None exec("temp = " + code) return temp except Exception as e: Log.error("Could not execute {{code|quote}}", code=code, cause=e) DUAL = ListContainer(name="dual", data=[{}], schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name", )))) export("jx_base.container", ListContainer)
def add(self, value): self.data.append(value) def __getitem__(self, item): if item < 0 or len(self.data) <= item: return Null return self.data[item] def __iter__(self): return (wrap(d) for d in self.data) def __len__(self): return len(self.data) def _exec(code): try: temp = None exec "temp = " + code return temp except Exception as e: Log.error("Could not execute {{code|quote}}", code=code, cause=e) from pyLibrary.queries import Schema, jx DUAL = ListContainer(name="dual", data=[{}], schema=Schema(table_name="dual", columns=UniqueIndex(keys=("names.\\.", ))))
Log.error("This container only has table by name of {{name}}", name=name) return self def get_schema(self, name): if self.name != name: Log.error("This container only has table by name of {{name}}", name=name) return self.schema def get_table(self, name): if self is name or self.name == name: return self Log.error("This container only has table by name of {{name}}", name=name) def _exec(code): try: temp = None exec("temp = " + code) return temp except Exception as e: Log.error("Could not execute {{code|quote}}", code=code, cause=e) from jx_python import jx DUAL = ListContainer( name="dual", data=[{}], schema=Schema(table_name="dual", columns=UniqueIndex(keys=("name",))) )
def update_spot_requests(self): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([ r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN ]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[ a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending) remaining_budget = self.settings.budget - used_budget current_utility = coalesce( SUM(self.price_lookup[ r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) utility_required = self.instance_manager.required_utility( current_utility) net_new_utility = utility_required - current_utility Log.note( "have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money( remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = mo_math.min( net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = mo_math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances( net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget) # Give EC2 a chance to notice the new requests before tagging them. Till(seconds=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_making_new_spot_requests.go()
def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": { "coalesce": [{ "rows": { "timestamp": 1 } }, { "date": "eod" }] }, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": { "sub": { "timestamp": self.settings.uptime.duration.seconds } } } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": { "min": "effective", "max": "expire", "mode": "inclusive" }, "allowNulls": False, "domain": { "type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR) + HOUR, "interval": "hour" } } ], "select": [{ "value": "price", "aggregate": "max" }, { "aggregate": "count" }], "where": { "gt": { "expire": now.floor(HOUR) - self.settings.uptime.history } }, "window": [{ "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" }] }).data bid80 = jx.run({ "from": ListContainer(name=None, data=hourly_pricing), "edges": [{ "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": { "type": "set", "key": "instance_type", "partitions": self.settings.utility } }], "select": [{ "name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile }, { "name": "max_price", "value": "price", "aggregate": "max" }, { "aggregate": "count" }, { "value": "current_price", "aggregate": "one" }, { "name": "all_price", "value": "price", "aggregate": "list" }], "window": [ { "name": "estimated_value", "value": { "div": ["type.utility", "price_80"] } }, { "name": "higher_price", "value": lambda row, rownum, rows: find_higher( row.all_price, row.price_80) } # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.sort(bid80.values(), { "value": "estimated_value", "sort": -1 }) self.prices = wrap(output) self.price_lookup = UniqueIndex( ("type.instance_type", "availability_zone"), data=self.prices) return self.prices
class SpotManager(object): @override def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict(region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap( kwargs.aws.aws_secret_access_key)) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.no_capacity = {} self.no_capacity_file = File( kwargs.price_file).parent / "no capacity.json" self.done_making_new_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex( ("id", )) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce( self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce( Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce( Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce( self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required( ): self._start_life_cycle_watcher() if not disable_prices: self.pricing() def update_spot_requests(self): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([ r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN ]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[ a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending) remaining_budget = self.settings.budget - used_budget current_utility = coalesce( SUM(self.price_lookup[ r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) utility_required = self.instance_manager.required_utility( current_utility) net_new_utility = utility_required - current_utility Log.note( "have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money( remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = mo_math.min( net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = mo_math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances( net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget) # Give EC2 a chance to notice the new requests before tagging them. Till(seconds=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_making_new_spot_requests.go() def add_instances(self, net_new_utility, remaining_budget): prices = self.pricing() for p in prices: if net_new_utility <= 0 or remaining_budget <= 0: break if p.current_price == None: Log.note("{{type}} has no current price", type=p.type.instance_type) continue if self.settings.utility[p.type.instance_type].blacklist or \ p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones): Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone) continue # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount max_bid = mo_math.min(p.higher_price, max_acceptable_price, remaining_budget) min_bid = p.price_80 if min_bid > max_acceptable_price: Log.note( "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour", type=p.type.instance_type, price=min_bid, remaining=max_acceptable_price) continue elif min_bid > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour", type=p.type.instance_type, bid=min_bid, remaining_budget=remaining_budget) continue elif min_bid > max_bid: Log.error("not expected") naive_number_needed = int( mo_math.round(float(net_new_utility) / float(p.type.utility), decimal=0)) limit_total = None if self.settings.max_percent_per_type < 1: current_count = sum( 1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone) all_count = sum( 1 for a in self.active if a.launch_specification.placement == p.availability_zone) all_count = max(all_count, naive_number_needed) limit_total = int( mo_math.floor( (all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type))) num = mo_math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type) if num < 0: Log.note( "{{type}} is over {{limit|percent}} of instances, no more requested", limit=self.settings.max_percent_per_type, type=p.type.instance_type) continue elif num == 1: min_bid = mo_math.min( mo_math.max(p.current_price * 1.1, min_bid), max_acceptable_price) price_interval = 0 else: price_interval = mo_math.min(min_bid / 10, (max_bid - min_bid) / (num - 1)) for i in range(num): bid_per_machine = min_bid + (i * price_interval) if bid_per_machine < p.current_price: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, current_price=p.current_price) continue if bid_per_machine - p.type.discount > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, remaining=remaining_budget) continue last_no_capacity_message = self.no_capacity.get( p.type.instance_type, Null) if last_no_capacity_message > Date.now( ) - CAPACITY_NOT_AVAILABLE_RETRY: Log.note( "Did not bid on {{type}}: \"No capacity\" last seen at {{last_time|datetime}}", type=p.type.instance_type, last_time=last_no_capacity_message) continue try: if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1: Log.error( "Spot Manager can only request machine one-at-a-time" ) new_requests = self._request_spot_instances( price=bid_per_machine, availability_zone_group=p.availability_zone, instance_type=p.type.instance_type, kwargs=copy(self.settings.ec2.request)) Log.note( "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour", num=len(new_requests), type=p.type.instance_type, zone=p.availability_zone, utility=p.type.utility, price=bid_per_machine) net_new_utility -= p.type.utility * len(new_requests) remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests) with self.net_new_locker: for ii in new_requests: self.net_new_spot_requests.add(ii) except Exception as e: Log.warning( "Request instance {{type}} failed because {{reason}}", type=p.type.instance_type, reason=e.message, cause=e) if "Max spot instance count exceeded" in e.message: Log.note("No further spot requests will be attempted.") return net_new_utility, remaining_budget return net_new_utility, remaining_budget def remove_instances(self, net_new_utility): instances = self.running_instances() # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE remove_list = [] for acceptable_error in range(0, 8): remaining_utility = -net_new_utility remove_list = FlatList() for s in instances: utility = coalesce(s.markup.type.utility, 0) if utility <= remaining_utility + acceptable_error: remove_list.append(s) remaining_utility -= utility if remaining_utility <= 0: net_new_utility = -remaining_utility break if not remove_list: return net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.note("Shutdown {{instances}}", instances=remove_list.id) remove_threads = [ Thread.run("teardown for " + text(i.id), self.instance_manager.teardown, i) for i in remove_list ] for t in remove_threads: try: t.join() except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests = remove_list.spot_instance_request_id # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests( request_ids=remove_spot_requests) return net_new_utility def running_instances(self): # FIND THE BIGGEST, MOST EXPENSIVE REQUESTS instances = self._get_managed_instances() for r in instances: try: r.markup = self.price_lookup[r.instance_type, r.placement] except Exception as e: r.markup = self.price_lookup[r.instance_type, r.placement] Log.error("No pricing!!!", e) instances = jx.sort(instances, [{ "value": "markup.type.utility", "sort": -1 }, { "value": "markup.estimated_value", "sort": 1 }]) return instances def save_money(self, remaining_budget, net_new_utility): remove_spot_requests = wrap([]) # FIRST CANCEL THE PENDING REQUESTS if remaining_budget < 0: requests = self._get_managed_spot_requests() for r in requests: if r.status.code in PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN: remove_spot_requests.append(r.id) net_new_utility += self.settings.utility[ r.launch_specification.instance_type].utility remaining_budget += r.price instances = jx.sort(self.running_instances(), "markup.estimated_value") remove_list = wrap([]) for s in instances: if remaining_budget >= 0: break remove_list.append(s) net_new_utility += coalesce(s.markup.type.utility, 0) remaining_budget += coalesce(s.request.bid_price, s.markup.price_80, s.markup.current_price) if not remove_list: return remaining_budget, net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.warning("Shutdown {{instances}} to save money!", instances=remove_list.id) if ALLOW_SHUTDOWN: for g, removals in jx.chunk(remove_list, size=20): for i, t in [(i, Thread.run("teardown " + i.id, self.instance_manager.teardown, i, please_stop=False)) for i in removals]: try: t.join() except Exception: Log.note("Problem with shutdown of {{id}}", id=i.id) remove_spot_requests.extend(remove_list.spot_instance_request_id) # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests( request_ids=remove_spot_requests) return remaining_budget, net_new_utility @cache(duration=5 * SECOND) def _get_managed_spot_requests(self): output = wrap([ datawrap(r) for r in self.ec2_conn.get_all_spot_instance_requests() if not r.tags.get("Name") or r.tags.get("Name").startswith(self.settings.ec2.instance.name) ]) return output def _get_managed_instances(self): requests = UniqueIndex(["instance_id"], data=self._get_managed_spot_requests().filter( lambda r: r.instance_id != None)) reservations = self.ec2_conn.get_all_instances() output = [] for res in reservations: for instance in res.instances: if instance.tags.get('Name', '').startswith( self.settings.ec2.instance.name ) and instance._state.name == "running": instance.request = requests[instance.id] output.append(datawrap(instance)) return wrap(output) def _start_life_cycle_watcher(self): failed_locker = Lock() failed_attempts = Data() def track_setup( instance_setup_function, request, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility, # THE utility OBJECT FOUND IN CONFIG please_stop): try: instance_setup_function(instance, utility, please_stop) instance.add_tag( "Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(request.id) except Exception as e: e = Except.wrap(e) instance.add_tag("Name", "") with failed_locker: failed_attempts[request.id] += [e] if "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: with failed_locker: causes = failed_attempts[request.id] if len(causes) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=instance.id, cause=causes) else: Log.warning("Unexpected failure on startup", instance_id=instance.id, cause=e) def life_cycle_watcher(please_stop): bad_requests = Data() setup_threads = [] last_get = Date.now() setup_in_progress = set() while not please_stop: spot_requests = self._get_managed_spot_requests() instances = wrap({ i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances }) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and (not i.tags.get("Name") or i.tags.get( "Name") == self.settings.ec2.instance.name + " (setup)") and i.id not in setup_in_progress and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: if not time_to_stop_trying.get(i.id): time_to_stop_trying[ i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning( "Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id) continue try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances( instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error( "Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p i.add_tag("Name", self.settings.ec2.instance.name + " (setup)") setup_in_progress.add(i.id) t = Thread.run("setup for " + text(i.id), track_setup, self.instance_manager.setup, r, i, p) if SINGLE_THREAD_SETUP: t.join() setup_threads.append(t) except Exception as e: i.add_tag("Name", "") Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([ r for r in spot_requests if r.status.code in PENDING_STATUS_CODES ]) give_up = wrap([ r for r in spot_requests if (r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES) and r.id not in bad_requests ]) ignore = wrap([ r for r in spot_requests if r.status.code in MIGHT_HAPPEN ]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_making_new_spot_requests: with self.net_new_locker: expired = Date.now( ) - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id", ), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests( request_ids=give_up.id) Log.note( "Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) for g in give_up: bad_requests[g.id] += 1 if g.id in self.net_new_spot_requests: self.net_new_spot_requests.remove(g.id) if g.status.code == "capacity-not-available": self.no_capacity[ g.launch_specification. instance_type] = Date.now() if g.status.code == "bad-parameters": self.no_capacity[ g.launch_specification. instance_type] = Date.now() Log.warning( "bad parameters while requesting type {{type}}", type=g.launch_specification. instance_type) if not pending and self.done_making_new_spot_requests: Log.note("No more pending spot requests") break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() with Timer("Save no capacity to file"): table = [{ "instance_type": k, "last_failure": v } for k, v in self.no_capacity.items()] self.no_capacity_file.write(value2json(table, pretty=True)) # WAIT FOR SETUP TO COMPLETE for t in setup_threads: t.join() Log.note("life cycle watcher has stopped") # Log.warning("lifecycle watcher is disabled") timeout = Till(seconds=self.settings.run_interval.seconds - 60) self.watcher = Thread.run("lifecycle watcher", life_cycle_watcher, please_stop=timeout) def _get_valid_availability_zones(self): subnets = list( self.vpc_conn.get_all_subnets(subnet_ids=self.settings.ec2.request. network_interfaces.subnet_id)) zones_with_interfaces = [s.availability_zone for s in subnets] if self.settings.availability_zone: # If they pass a list of zones, constrain it by zones we have an # interface for. return set(zones_with_interfaces) & set( listwrap(self.settings.availability_zone)) else: # Otherwise, use all available zones. return zones_with_interfaces @override def _request_spot_instances(self, price, availability_zone_group, instance_type, kwargs): kwargs.self = None kwargs.kwargs = None # m3 INSTANCES ARE NOT ALLOWED PLACEMENT GROUP if instance_type.startswith("m3."): kwargs.placement_group = None kwargs.network_interfaces = NetworkInterfaceCollection( *(NetworkInterfaceSpecification(**i) for i in listwrap(kwargs.network_interfaces) if self.vpc_conn.get_all_subnets( subnet_ids=i.subnet_id, filters={"availabilityZone": availability_zone_group}))) if len(kwargs.network_interfaces) == 0: Log.error( "No network interface specifications found for {{availability_zone}}!", availability_zone=kwargs.availability_zone_group) block_device_map = BlockDeviceMapping() # GENERIC BLOCK DEVICE MAPPING for dev, dev_settings in kwargs.block_device_map.items(): block_device_map[dev] = BlockDeviceType(delete_on_termination=True, **dev_settings) kwargs.block_device_map = block_device_map # INCLUDE EPHEMERAL STORAGE IN BlockDeviceMapping num_ephemeral_volumes = ephemeral_storage[instance_type]["num"] for i in range(num_ephemeral_volumes): letter = convert.ascii2char(98 + i) # START AT "b" kwargs.block_device_map["/dev/sd" + letter] = BlockDeviceType( ephemeral_name='ephemeral' + text(i), delete_on_termination=True) if kwargs.expiration: kwargs.valid_until = (Date.now() + Duration(kwargs.expiration)).format(ISO8601) kwargs.expiration = None # ATTACH NEW EBS VOLUMES for i, drive in enumerate(self.settings.utility[instance_type].drives): letter = convert.ascii2char(98 + i + num_ephemeral_volumes) device = drive.device = coalesce(drive.device, "/dev/sd" + letter) d = drive.copy() d.path = None # path AND device PROPERTY IS NOT ALLOWED IN THE BlockDeviceType d.device = None if d.size: kwargs.block_device_map[device] = BlockDeviceType( delete_on_termination=True, **d) output = list(self.ec2_conn.request_spot_instances(**kwargs)) return output def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": { "coalesce": [{ "rows": { "timestamp": 1 } }, { "date": "eod" }] }, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": { "sub": { "timestamp": self.settings.uptime.duration.seconds } } } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": { "min": "effective", "max": "expire", "mode": "inclusive" }, "allowNulls": False, "domain": { "type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR) + HOUR, "interval": "hour" } } ], "select": [{ "value": "price", "aggregate": "max" }, { "aggregate": "count" }], "where": { "gt": { "expire": now.floor(HOUR) - self.settings.uptime.history } }, "window": [{ "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" }] }).data bid80 = jx.run({ "from": ListContainer(name=None, data=hourly_pricing), "edges": [{ "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": { "type": "set", "key": "instance_type", "partitions": self.settings.utility } }], "select": [{ "name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile }, { "name": "max_price", "value": "price", "aggregate": "max" }, { "aggregate": "count" }, { "value": "current_price", "aggregate": "one" }, { "name": "all_price", "value": "price", "aggregate": "list" }], "window": [ { "name": "estimated_value", "value": { "div": ["type.utility", "price_80"] } }, { "name": "higher_price", "value": lambda row, rownum, rows: find_higher( row.all_price, row.price_80) } # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.sort(bid80.values(), { "value": "estimated_value", "sort": -1 }) self.prices = wrap(output) self.price_lookup = UniqueIndex( ("type.instance_type", "availability_zone"), data=self.prices) return self.prices def _get_spot_prices_from_aws(self): with Timer("Read no capacity file"): try: # FILE IS LIST OF {instance_type, last_failure} OBJECTS content = self.no_capacity_file.read() self.no_capacity = dict( (r.instance_type, r.last_failure) for r in convert.json2value( content, flexible=False, leaves=False)) except Exception as e: self.no_capacity = {} with Timer("Read pricing file"): try: content = File(self.settings.price_file).read() cache = convert.json2value(content, flexible=False, leaves=False) except Exception as e: cache = FlatList() cache = ListContainer(name=None, data=cache) most_recents = jx.run({ "from": cache, "edges": ["instance_type", "availability_zone"], "select": { "value": "timestamp", "aggregate": "max" } }) zones = self._get_valid_availability_zones() prices = set(cache) with Timer("Get pricing from AWS"): for instance_type in self.settings.utility.keys(): for zone in zones: if cache: most_recent = most_recents[{ "instance_type": instance_type, "availability_zone": zone }].timestamp start_at = MAX( [Date(most_recent), Date.today() - WEEK]) else: start_at = Date.today() - WEEK if DEBUG_PRICING: Log.note( "get pricing for {{instance_type}} starting at {{start_at}}", instance_type=instance_type, start_at=start_at) next_token = None while True: resultset = self.ec2_conn.get_spot_price_history( product_description=coalesce( self.settings.product, "Linux/UNIX (Amazon VPC)"), instance_type=instance_type, availability_zone=zone, start_time=start_at.format(ISO8601), next_token=next_token) next_token = resultset.next_token for p in resultset: prices.add( wrap({ "availability_zone": p.availability_zone, "instance_type": p.instance_type, "price": p.price, "product_description": p.product_description, "region": p.region.name, "timestamp": Date(p.timestamp).unix })) if not next_token: break with Timer("Save prices to file"): new_prices = jx.filter( prices, {"gte": { "timestamp": { "date": "today-2day" } }}) def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]" File(self.settings.price_file).write(stream()) return ListContainer(name="prices", data=prices)
def life_cycle_watcher(please_stop): bad_requests = Data() setup_threads = [] last_get = Date.now() setup_in_progress = set() while not please_stop: spot_requests = self._get_managed_spot_requests() instances = wrap({ i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances }) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and (not i.tags.get("Name") or i.tags.get( "Name") == self.settings.ec2.instance.name + " (setup)") and i.id not in setup_in_progress and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: if not time_to_stop_trying.get(i.id): time_to_stop_trying[ i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning( "Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id) continue try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances( instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error( "Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p i.add_tag("Name", self.settings.ec2.instance.name + " (setup)") setup_in_progress.add(i.id) t = Thread.run("setup for " + text(i.id), track_setup, self.instance_manager.setup, r, i, p) if SINGLE_THREAD_SETUP: t.join() setup_threads.append(t) except Exception as e: i.add_tag("Name", "") Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([ r for r in spot_requests if r.status.code in PENDING_STATUS_CODES ]) give_up = wrap([ r for r in spot_requests if (r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES) and r.id not in bad_requests ]) ignore = wrap([ r for r in spot_requests if r.status.code in MIGHT_HAPPEN ]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_making_new_spot_requests: with self.net_new_locker: expired = Date.now( ) - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: # SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id", ), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests( request_ids=give_up.id) Log.note( "Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) for g in give_up: bad_requests[g.id] += 1 if g.id in self.net_new_spot_requests: self.net_new_spot_requests.remove(g.id) if g.status.code == "capacity-not-available": self.no_capacity[ g.launch_specification. instance_type] = Date.now() if g.status.code == "bad-parameters": self.no_capacity[ g.launch_specification. instance_type] = Date.now() Log.warning( "bad parameters while requesting type {{type}}", type=g.launch_specification. instance_type) if not pending and self.done_making_new_spot_requests: Log.note("No more pending spot requests") break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() with Timer("Save no capacity to file"): table = [{ "instance_type": k, "last_failure": v } for k, v in self.no_capacity.items()] self.no_capacity_file.write(value2json(table, pretty=True)) # WAIT FOR SETUP TO COMPLETE for t in setup_threads: t.join() Log.note("life cycle watcher has stopped")
def _scan_database(self): # GET ALL RELATIONS raw_relations = self.db.query(""" SELECT table_schema, table_name, referenced_table_schema, referenced_table_name, referenced_column_name, constraint_name, column_name, ordinal_position FROM information_schema.key_column_usage WHERE referenced_column_name IS NOT NULL """, param=self.settings.database) if not raw_relations: Log.error("No relations in the database") for r in self.settings.add_relations: try: a, b = map(strings.trim, r.split("->")) a = a.split(".") b = b.split(".") raw_relations.append( Data(table_schema=a[0], table_name=a[1], referenced_table_schema=b[0], referenced_table_name=b[1], referenced_column_name=b[2], constraint_name=Random.hex(20), column_name=a[2], ordinal_position=1)) except Exception as e: Log.error("Could not parse {{line|quote}}", line=r, cause=e) relations = jx.select(raw_relations, [{ "name": "constraint.name", "value": "constraint_name" }, { "name": "table.schema", "value": "table_schema" }, { "name": "table.name", "value": "table_name" }, { "name": "column.name", "value": "column_name" }, { "name": "referenced.table.schema", "value": "referenced_table_schema" }, { "name": "referenced.table.name", "value": "referenced_table_name" }, { "name": "referenced.column.name", "value": "referenced_column_name" }, { "name": "ordinal_position", "value": "ordinal_position" }]) # GET ALL TABLES raw_tables = self.db.query(""" SELECT t.table_schema, t.table_name, c.constraint_name, c.constraint_type, k.column_name, k.ordinal_position FROM information_schema.tables t LEFT JOIN information_schema.table_constraints c on c.table_name=t.table_name AND c.table_schema=t.table_schema and (constraint_type='UNIQUE' or constraint_type='PRIMARY KEY') LEFT JOIN information_schema.key_column_usage k on k.constraint_name=c.constraint_name AND k.table_name=t.table_name and k.table_schema=t.table_schema ORDER BY t.table_schema, t.table_name, c.constraint_name, k.ordinal_position, k.column_name """, param=self.settings.database) # ORGANIZE, AND PICK ONE UNIQUE CONSTRAINT FOR LINKING tables = UniqueIndex(keys=["name", "schema"]) for t, c in jx.groupby(raw_tables, ["table_name", "table_schema"]): c = wrap(list(c)) best_index = Null is_referenced = False is_primary = False for g, w in jx.groupby(c, "constraint_name"): if not g.constraint_name: continue w = list(w) ref = False for r in relations: if r.table.name == t.table_name and r.table.schema == t.table_schema and r.constraint.name == g.constraint_name: ref = True is_prime = w[0].constraint_type == "PRIMARY" reasons_this_one_is_better = [ best_index == None, # WE DO NOT HAVE A CANDIDATE YET is_prime and not is_primary, # PRIMARY KEYS ARE GOOD TO HAVE is_primary == is_prime and ref and not is_referenced, # REFERENCED UNIQUE TUPLES ARE GOOD TOO is_primary == is_prime and ref == is_referenced and len(w) < len(best_index) # THE SHORTER THE TUPLE, THE BETTER ] if any(reasons_this_one_is_better): is_primary = is_prime is_referenced = ref best_index = w tables.add({ "name": t.table_name, "schema": t.table_schema, "id": [b.column_name for b in best_index] }) fact_table = tables[self.settings.fact_table, self.settings.database.schema] ids_table = { "alias": "t0", "name": "__ids__", "schema": fact_table.schema, "id": fact_table.id } relations.extend( wrap({ "constraint": { "name": "__link_ids_to_fact_table__" }, "table": ids_table, "column": { "name": c }, "referenced": { "table": fact_table, "column": { "name": c } }, "ordinal_position": i }) for i, c in enumerate(fact_table.id)) tables.add(ids_table) # GET ALL COLUMNS raw_columns = self.db.query(""" SELECT column_name, table_schema, table_name, ordinal_position, data_type FROM information_schema.columns """, param=self.settings.database) reference_only_tables = [ r.split(".")[0] for r in self.settings.reference_only if len(r.split(".")) == 2 ] reference_all_tables = [ r.split(".")[0] for r in self.settings.reference_only if len(r.split(".")) == 1 ] foreign_column_table_schema_triples = {(r.column.name, r.table.name, r.table.schema) for r in relations} referenced_column_table_schema_triples = { (r.referenced.column.name, r.referenced.table.name, r.referenced.table.schema) for r in relations } related_column_table_schema_triples = foreign_column_table_schema_triples | referenced_column_table_schema_triples columns = UniqueIndex(["column.name", "table.name", "table.schema"]) for c in raw_columns: if c.table_name in reference_only_tables: if c.table_name + "." + c.column_name in self.settings.reference_only: include = True reference = True foreign = False elif c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = False foreign = False else: include = False reference = False foreign = False elif c.table_name in reference_all_tables: # TABLES USED FOR REFERENCE, NO NESTED DOCUMENTS EXPECTED if c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = True foreign = False elif (c.column_name, c.table_name, c.table_schema) in foreign_column_table_schema_triples: include = False reference = False foreign = True else: include = True reference = False foreign = False elif c.column_name in tables[(c.table_name, c.table_schema)].id: include = self.settings.show_foreign_keys reference = False foreign = False elif (c.column_name, c.table_name, c.table_schema) in foreign_column_table_schema_triples: include = False reference = False foreign = True elif (c.column_name, c.table_name, c.table_schema) in referenced_column_table_schema_triples: include = self.settings.show_foreign_keys reference = False foreign = False else: include = True reference = False foreign = False rel = { "column": { "name": c.column_name, "type": c.data_type }, "table": { "name": c.table_name, "schema": c.table_schema }, "ordinal_position": c.ordinal_position, "is_id": c.column_name in tables[(c.table_name, c.table_schema)].id, "include": include, # TRUE IF THIS COLUMN IS OUTPUTTED "reference": reference, # TRUE IF THIS COLUMN REPRESENTS THE ROW "foreign": foreign # TRUE IF THIS COLUMN POINTS TO ANOTHER ROW } columns.add(rel) # ITERATE OVER ALL PATHS todo = FlatList() output_columns = FlatList() nested_path_to_join = {} all_nested_paths = [["."]] def follow_paths(position, path, nested_path, done_relations, no_nested_docs): if position.name in self.settings.exclude: return if DEBUG: Log.note("Trace {{path}}", path=path) if position.name != "__ids__": # USED TO CONFIRM WE CAN ACCESS THE TABLE (WILL THROW ERROR WHEN IF IT FAILS) self.db.query("SELECT * FROM " + quote_column(position.name, position.schema) + " LIMIT 1") if position.name in reference_all_tables: no_nested_docs = True if position.name in reference_only_tables: return curr_join_list = copy(nested_path_to_join[nested_path[0]]) # INNER OBJECTS referenced_tables = list( jx.groupby( jx.filter( relations, { "eq": { "table.name": position.name, "table.schema": position.schema } }), "constraint.name")) for g, constraint_columns in referenced_tables: g = unwrap(g) constraint_columns = deepcopy(constraint_columns) if g["constraint.name"] in done_relations: continue if any(cc for cc in constraint_columns if cc.referenced.table.name in self.settings.exclude): continue done_relations.add(g["constraint.name"]) many_to_one_joins = nested_path_to_join[nested_path[0]] index = len(many_to_one_joins) alias = "t" + text_type(index) for c in constraint_columns: c.referenced.table.alias = alias c.table = position many_to_one_joins.append({ "join_columns": constraint_columns, "path": path, "nested_path": nested_path }) # referenced_table_path = join_field(split_field(path) + ["/".join(constraint_columns.referenced.table.name)]) # HANDLE THE COMMON *id SUFFIX name = [] for a, b in zip(constraint_columns.column.name, constraint_columns.referenced.table.name): if a.startswith(b): name.append(b) elif a.endswith("_id"): name.append(a[:-3]) else: name.append(a) referenced_column_path = join_field( split_field(path) + ["/".join(name)]) col_pointer_name = relative_field(referenced_column_path, nested_path[0]) # insert into nested1 VALUES (100, 10, 'aaa', -1); # id.about.time.nested1 .ref=10 # id.about.time.nested1 .ref.name for col in columns: if col.table.name == constraint_columns[ 0].referenced.table.name and col.table.schema == constraint_columns[ 0].referenced.table.schema: col_full_name = concat_field( col_pointer_name, literal_field(col.column.name)) if col.is_id and col.table.name == fact_table.name and col.table.schema == fact_table.schema: # ALWAYS SHOW THE ID OF THE FACT c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": True, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name }) elif col.column.name == constraint_columns[ 0].column.name: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name if self.settings.show_foreign_keys else None }) elif col.is_id: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name if self.settings.show_foreign_keys else None }) elif col.reference: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_pointer_name if not self.settings.show_foreign_keys else col_full_name # REFERENCE FIELDS CAN REPLACE THE WHOLE OBJECT BEING REFERENCED }) elif col.include: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": False, "path": referenced_column_path, "nested_path": nested_path, "put": col_full_name }) if position.name in reference_only_tables: continue todo.append( Data(position=copy(constraint_columns[0].referenced.table), path=referenced_column_path, nested_path=nested_path, done_relations=copy(done_relations), no_nested_docs=no_nested_docs)) # NESTED OBJECTS if not no_nested_docs: for g, constraint_columns in jx.groupby( jx.filter( relations, { "eq": { "referenced.table.name": position.name, "referenced.table.schema": position.schema } }), "constraint.name"): g = unwrap(g) constraint_columns = deepcopy(constraint_columns) if g["constraint.name"] in done_relations: continue done_relations.add(g["constraint.name"]) many_table = set(constraint_columns.table.name) if not (many_table - self.settings.exclude): continue referenced_column_path = join_field( split_field(path) + ["/".join(many_table)]) new_nested_path = [referenced_column_path] + nested_path all_nested_paths.append(new_nested_path) # if new_path not in self.settings.include: # Log.note("Exclude nested path {{path}}", path=new_path) # continue one_to_many_joins = nested_path_to_join[ referenced_column_path] = copy(curr_join_list) index = len(one_to_many_joins) alias = "t" + text_type(index) for c in constraint_columns: c.table.alias = alias c.referenced.table = position one_to_many_joins.append( set_default({}, g, { "children": True, "join_columns": constraint_columns, "path": path, "nested_path": nested_path })) # insert into nested1 VALUES (100, 10, 'aaa', -1); # id.about.time.nested1 .ref=10# id.about.time.nested1 .ref.name for col in columns: if col.table.name == constraint_columns[ 0].table.name and col.table.schema == constraint_columns[ 0].table.schema: col_full_name = join_field( split_field(referenced_column_path) [len(split_field(new_nested_path[0])):] + [literal_field(col.column.name)]) if col.column.name == constraint_columns[ 0].column.name: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if self.settings.show_foreign_keys else None }) elif col.is_id: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if self.settings.show_foreign_keys else None }) else: c_index = len(output_columns) output_columns.append({ "table_alias": alias, "column_alias": "c" + text_type(c_index), "column": col, "sort": col.is_id, "path": referenced_column_path, "nested_path": new_nested_path, "put": col_full_name if col.include else None }) todo.append( Data(position=constraint_columns[0].table, path=referenced_column_path, nested_path=new_nested_path, done_relations=copy(done_relations), no_nested_docs=no_nested_docs)) path = "." nested_path = [path] nested_path_to_join["."] = [{ "path": path, "join_columns": [{ "referenced": { "table": ids_table } }], "nested_path": nested_path }] todo.append( Data(position=ids_table, path=path, nested_path=nested_path, done_relations=set(), no_nested_docs=False)) while todo: item = todo.pop(0) follow_paths(**item) self.all_nested_paths = all_nested_paths self.nested_path_to_join = nested_path_to_join self.columns = output_columns